# üîÆ PRISM Engine - Full 14 Lenses

One notebook. All lenses. No imports needed.

**Run cells 1-4 in order, then explore.**

In [None]:
#@title ‚öôÔ∏è **CELL 1: SETUP** (run first) { display-mode: "form" }
#@markdown Mounts Drive, installs packages, loads all 14 lenses.

# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

# Config - edit if your path differs
PRISM_PATH = '/content/drive/MyDrive/prism-engine/prism-engine'

# Imports
import sys, os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from scipy import stats
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

sys.path.insert(0, PRISM_PATH)

# ============================================================================
# ALL 14 LENSES - SELF-CONTAINED
# ============================================================================

class BaseLens:
    name = "base"
    description = "Base lens class"
    def analyze(self, panel): raise NotImplementedError


# --- LENS 1: MAGNITUDE ---
class MagnitudeLens(BaseLens):
    """Importance by total magnitude of movement (L2 norm)."""
    name = "magnitude"
    def analyze(self, panel):
        normalized = (panel - panel.mean()) / panel.std()
        magnitude = np.sqrt((normalized ** 2).sum())
        return {'importance': magnitude}


# --- LENS 2: PCA ---
class PCALens(BaseLens):
    """Importance by contribution to principal components."""
    name = "pca"
    def analyze(self, panel, n_components=5):
        X = ((panel - panel.mean()) / panel.std()).fillna(0).values
        U, S, Vt = np.linalg.svd(X, full_matrices=False)
        explained = (S ** 2) / (len(X) - 1)
        loadings = Vt[:n_components].T * S[:n_components]
        importance = pd.Series(np.abs(loadings).sum(axis=1), index=panel.columns)
        return {'importance': importance, 'explained_variance': explained[:n_components] / explained.sum()}


# --- LENS 3: GRANGER CAUSALITY ---
class GrangerLens(BaseLens):
    """Importance by Granger-causal influence on other series."""
    name = "granger"
    def analyze(self, panel, max_lag=5):
        cols = panel.columns.tolist()
        influence = pd.Series(0.0, index=cols)
        
        for target in cols:
            y = panel[target].values
            for source in cols:
                if source == target: continue
                x = panel[source].values
                
                # Simple Granger: does lagged X improve prediction of Y?
                n = len(y) - max_lag
                if n < 20: continue
                
                # Restricted model: Y on lagged Y only
                Y = y[max_lag:]
                X_restricted = np.column_stack([y[max_lag-i-1:-i-1] for i in range(max_lag)])
                
                # Unrestricted: add lagged X
                X_unrestricted = np.column_stack([X_restricted] + [x[max_lag-i-1:-i-1] for i in range(max_lag)])
                
                try:
                    # OLS
                    rss_r = np.sum((Y - X_restricted @ np.linalg.lstsq(X_restricted, Y, rcond=None)[0])**2)
                    rss_u = np.sum((Y - X_unrestricted @ np.linalg.lstsq(X_unrestricted, Y, rcond=None)[0])**2)
                    
                    # F-stat
                    f_stat = ((rss_r - rss_u) / max_lag) / (rss_u / (n - 2*max_lag))
                    if f_stat > 2:  # Rough significance
                        influence[source] += f_stat
                except: pass
        
        importance = (influence - influence.min()) / (influence.max() - influence.min() + 1e-10)
        return {'importance': importance, 'granger_influence': influence}


# --- LENS 4: DMD (Dynamic Mode Decomposition) ---
class DMDLens(BaseLens):
    """Importance by contribution to dominant dynamic modes."""
    name = "dmd"
    def analyze(self, panel, n_modes=5):
        X = ((panel - panel.mean()) / panel.std()).fillna(0).values
        X1, X2 = X[:-1].T, X[1:].T
        
        # DMD via SVD
        U, S, Vh = np.linalg.svd(X1, full_matrices=False)
        r = min(n_modes, len(S))
        U, S, Vh = U[:, :r], S[:r], Vh[:r, :]
        
        # DMD matrix
        Atilde = U.T @ X2 @ Vh.T @ np.diag(1/S)
        eigs, W = np.linalg.eig(Atilde)
        
        # Modes
        Phi = X2 @ Vh.T @ np.diag(1/S) @ W
        
        # Importance: sum of mode amplitudes
        importance = pd.Series(np.abs(Phi).sum(axis=1), index=panel.columns)
        return {'importance': importance, 'eigenvalues': eigs}


# --- LENS 5: INFLUENCE ---
class InfluenceLens(BaseLens):
    """Importance by volatility √ó deviation from mean."""
    name = "influence"
    def analyze(self, panel, window=20):
        vol = panel.rolling(window=window).std()
        dev = np.abs(panel - panel.rolling(window=window).mean())
        influence = (vol * dev).mean()
        importance = (influence - influence.min()) / (influence.max() - influence.min() + 1e-10)
        return {'importance': importance}


# --- LENS 6: MUTUAL INFORMATION ---
class MutualInfoLens(BaseLens):
    """Importance by mutual information with other series."""
    name = "mutual_info"
    def analyze(self, panel, n_bins=20):
        cols = panel.columns.tolist()
        mi_scores = pd.Series(0.0, index=cols)
        
        for i, col1 in enumerate(cols):
            x = panel[col1].values
            for col2 in cols[i+1:]:
                y = panel[col2].values
                
                # Discretize
                x_bins = np.digitize(x, np.linspace(x.min(), x.max(), n_bins))
                y_bins = np.digitize(y, np.linspace(y.min(), y.max(), n_bins))
                
                # Joint and marginal histograms
                pxy = np.histogram2d(x_bins, y_bins, bins=n_bins)[0]
                pxy = pxy / pxy.sum()
                px = pxy.sum(axis=1)
                py = pxy.sum(axis=0)
                
                # MI
                mi = 0
                for xi in range(n_bins):
                    for yi in range(n_bins):
                        if pxy[xi, yi] > 0 and px[xi] > 0 and py[yi] > 0:
                            mi += pxy[xi, yi] * np.log(pxy[xi, yi] / (px[xi] * py[yi]))
                
                mi_scores[col1] += mi
                mi_scores[col2] += mi
        
        importance = (mi_scores - mi_scores.min()) / (mi_scores.max() - mi_scores.min() + 1e-10)
        return {'importance': importance, 'mutual_info': mi_scores}


# --- LENS 7: CLUSTERING ---
class ClusteringLens(BaseLens):
    """Importance by centrality within correlation clusters."""
    name = "clustering"
    def analyze(self, panel):
        corr = panel.corr().abs()
        # Importance = average correlation with others (centrality)
        importance = (corr.sum() - 1) / (len(corr) - 1)
        return {'importance': importance, 'correlation_matrix': corr}


# --- LENS 8: DECOMPOSITION ---
class DecompositionLens(BaseLens):
    """Importance by trend vs noise ratio."""
    name = "decomposition"
    def analyze(self, panel, period=252):
        importance_vals = []
        for col in panel.columns:
            s = panel[col].dropna()
            if len(s) < period * 2:
                importance_vals.append(0)
                continue
            trend = s.rolling(window=period, center=True).mean()
            trend_var = trend.var()
            total_var = s.var()
            importance_vals.append(trend_var / total_var if total_var > 0 else 0)
        importance = pd.Series(importance_vals, index=panel.columns)
        return {'importance': importance}


# --- LENS 9: WAVELET ---
class WaveletLens(BaseLens):
    """Importance by multi-scale variance (wavelet-like decomposition)."""
    name = "wavelet"
    def analyze(self, panel, scales=[5, 20, 60, 120, 252]):
        importance_vals = []
        for col in panel.columns:
            s = panel[col].dropna()
            scale_vars = []
            for scale in scales:
                if len(s) > scale:
                    smoothed = s.rolling(window=scale).mean()
                    detail = s - smoothed
                    scale_vars.append(detail.var())
            importance_vals.append(np.mean(scale_vars) if scale_vars else 0)
        importance = pd.Series(importance_vals, index=panel.columns)
        importance = (importance - importance.min()) / (importance.max() - importance.min() + 1e-10)
        return {'importance': importance}


# --- LENS 10: NETWORK ---
class NetworkLens(BaseLens):
    """Importance by network centrality in correlation graph."""
    name = "network"
    def analyze(self, panel, threshold=0.5):
        corr = panel.corr().abs()
        adj = (corr > threshold).astype(int).values
        np.fill_diagonal(adj, 0)
        
        # Degree centrality
        degree = adj.sum(axis=1)
        
        # Eigenvector centrality (dominant eigenvector)
        try:
            eigs, vecs = np.linalg.eig(adj.astype(float))
            idx = np.argmax(eigs.real)
            eigen_cent = np.abs(vecs[:, idx].real)
        except:
            eigen_cent = degree
        
        # Combine
        combined = 0.5 * degree / (degree.max() + 1e-10) + 0.5 * eigen_cent / (eigen_cent.max() + 1e-10)
        importance = pd.Series(combined, index=panel.columns)
        return {'importance': importance, 'degree': pd.Series(degree, index=panel.columns)}


# --- LENS 11: REGIME SWITCHING ---
class RegimeSwitchingLens(BaseLens):
    """Importance by behavior difference across regimes."""
    name = "regime"
    def analyze(self, panel, n_regimes=2):
        # Simple regime detection via rolling volatility of first PC
        X = ((panel - panel.mean()) / panel.std()).fillna(0).values
        U, S, Vt = np.linalg.svd(X, full_matrices=False)
        pc1 = U[:, 0] * S[0]
        
        # Regime = high vol vs low vol
        vol = pd.Series(pc1).rolling(20).std().fillna(0).values
        threshold = np.median(vol)
        regime = (vol > threshold).astype(int)
        
        # Importance: how different is behavior across regimes
        importance_vals = []
        for col in panel.columns:
            s = panel[col].values
            mean_r0 = s[regime == 0].mean() if (regime == 0).sum() > 0 else 0
            mean_r1 = s[regime == 1].mean() if (regime == 1).sum() > 0 else 0
            std_pooled = s.std() + 1e-10
            importance_vals.append(abs(mean_r1 - mean_r0) / std_pooled)
        
        importance = pd.Series(importance_vals, index=panel.columns)
        return {'importance': importance, 'regime_labels': regime}


# --- LENS 12: ANOMALY ---
class AnomalyLens(BaseLens):
    """Importance by frequency of anomalous behavior."""
    name = "anomaly"
    def analyze(self, panel, z_threshold=2.5):
        # Z-score based anomaly detection
        z_scores = (panel - panel.mean()) / panel.std()
        anomaly_rate = (z_scores.abs() > z_threshold).mean()
        importance = (anomaly_rate - anomaly_rate.min()) / (anomaly_rate.max() - anomaly_rate.min() + 1e-10)
        return {'importance': importance, 'anomaly_rate': anomaly_rate}


# --- LENS 13: TRANSFER ENTROPY ---
class TransferEntropyLens(BaseLens):
    """Importance by information flow to other series."""
    name = "transfer_entropy"
    def analyze(self, panel, lag=1, n_bins=10):
        cols = panel.columns.tolist()
        te_out = pd.Series(0.0, index=cols)  # Outgoing TE
        
        for source in cols:
            x = panel[source].values
            for target in cols:
                if source == target: continue
                y = panel[target].values
                
                # Discretize
                x_d = np.digitize(x, np.linspace(x.min(), x.max(), n_bins))
                y_d = np.digitize(y, np.linspace(y.min(), y.max(), n_bins))
                
                # TE(X->Y) ‚âà I(Y_t; X_{t-lag} | Y_{t-lag})
                # Simplified: correlation of lagged X with Y residuals
                if len(y) > lag:
                    y_curr = y[lag:]
                    y_past = y[:-lag]
                    x_past = x[:-lag]
                    
                    # Residual of Y_curr given Y_past
                    try:
                        slope = np.cov(y_curr, y_past)[0,1] / (np.var(y_past) + 1e-10)
                        residual = y_curr - slope * y_past
                        te = abs(np.corrcoef(residual, x_past)[0,1])
                        if not np.isnan(te):
                            te_out[source] += te
                    except: pass
        
        importance = (te_out - te_out.min()) / (te_out.max() - te_out.min() + 1e-10)
        return {'importance': importance, 'transfer_entropy_out': te_out}


# --- LENS 14: TDA (Topological Data Analysis) ---
class TDALens(BaseLens):
    """Importance by topological persistence in embedded space."""
    name = "tda"
    def analyze(self, panel, embed_dim=3, delay=5, sample_size=200):
        importance_vals = []
        
        for col in panel.columns:
            s = panel[col].dropna().values
            if len(s) < embed_dim * delay + sample_size:
                importance_vals.append(0)
                continue
            
            # Time-delay embedding
            n = len(s) - (embed_dim - 1) * delay
            embedded = np.array([s[i:i + embed_dim * delay:delay] for i in range(n)])
            
            # Subsample for speed
            if len(embedded) > sample_size:
                idx = np.random.choice(len(embedded), sample_size, replace=False)
                embedded = embedded[idx]
            
            # Simple persistence: range of pairwise distances (proxy for H0 persistence)
            from scipy.spatial.distance import pdist
            dists = pdist(embedded)
            persistence = dists.max() - dists.min() if len(dists) > 0 else 0
            importance_vals.append(persistence)
        
        importance = pd.Series(importance_vals, index=panel.columns)
        importance = (importance - importance.min()) / (importance.max() - importance.min() + 1e-10)
        return {'importance': importance}


# ============================================================================
# LENS REGISTRY & RUNNER
# ============================================================================

LENSES = {
    'magnitude': MagnitudeLens,
    'pca': PCALens,
    'granger': GrangerLens,
    'dmd': DMDLens,
    'influence': InfluenceLens,
    'mutual_info': MutualInfoLens,
    'clustering': ClusteringLens,
    'decomposition': DecompositionLens,
    'wavelet': WaveletLens,
    'network': NetworkLens,
    'regime': RegimeSwitchingLens,
    'anomaly': AnomalyLens,
    'transfer_entropy': TransferEntropyLens,
    'tda': TDALens,
}

def run_lens(name, panel):
    return LENSES[name]().analyze(panel)

def run_all_lenses(panel, names=None, verbose=True):
    names = names or list(LENSES.keys())
    results = {}
    for name in names:
        try:
            if verbose: print(f"  {name}...", end=" ")
            results[name] = run_lens(name, panel)
            if verbose: print("‚úì")
        except Exception as e:
            if verbose: print(f"‚úó ({e})")
    return results

def compute_consensus(results):
    rankings = {}
    for name, res in results.items():
        if 'importance' in res and isinstance(res['importance'], pd.Series):
            rankings[name] = res['importance'].rank(ascending=False)
    if not rankings: return pd.DataFrame()
    df = pd.DataFrame(rankings)
    df['avg_rank'] = df.mean(axis=1)
    df['std_rank'] = df.std(axis=1)
    df['agreement'] = 1 / (1 + df['std_rank'])
    return df.sort_values('avg_rank')

print(f"‚úì Loaded {len(LENSES)} lenses")
print(f"  {list(LENSES.keys())}")
print("\nüîë Ready!")

In [None]:
#@title üìä **CELL 2: LOAD DATA** { display-mode: "form" }
#@markdown Loads your data panel.

data_path = os.path.join(PRISM_PATH, 'data', 'raw', 'master_panel.csv')

if os.path.exists(data_path):
    panel_raw = pd.read_csv(data_path, index_col=0, parse_dates=True)
    print(f"‚úì Loaded {data_path}")
else:
    # Build from individual files
    print("Building from individual CSVs...")
    raw_dir = os.path.join(PRISM_PATH, 'data', 'raw')
    dfs = {}
    for f in os.listdir(raw_dir):
        if f.endswith('.csv'):
            try:
                df = pd.read_csv(os.path.join(raw_dir, f), index_col=0, parse_dates=True)
                name = f.replace('.csv', '').upper()
                for col in ['Close', 'Adj Close', 'VALUE', 'Value', df.columns[0]]:
                    if col in df.columns:
                        dfs[name] = df[col]
                        break
            except: pass
    panel_raw = pd.DataFrame(dfs)

# Clean
panel = panel_raw.ffill().bfill().dropna()

print(f"\nüìä Panel: {panel.shape[1]} indicators √ó {panel.shape[0]} days")
print(f"   {panel.index[0].strftime('%Y-%m-%d')} to {panel.index[-1].strftime('%Y-%m-%d')}")
print(f"   Columns: {list(panel.columns)[:10]}{'...' if len(panel.columns) > 10 else ''}")

In [None]:
#@title üöÄ **CELL 3: RUN ALL 14 LENSES** { display-mode: "form" }
#@markdown This may take 1-2 minutes for large panels.

print("Running all 14 lenses...\n")
results = run_all_lenses(panel)

print(f"\n‚úì {len(results)}/14 lenses completed")

# Consensus
consensus = compute_consensus(results)

print("\n" + "="*60)
print("üèÜ TOP 10 INDICATORS BY CONSENSUS")
print("="*60)
for i, (ind, row) in enumerate(consensus.head(10).iterrows(), 1):
    bar = "‚ñà" * int(row['agreement'] * 20)
    print(f"{i:2}. {ind:<15} rank={row['avg_rank']:5.1f}  agreement={row['agreement']:.2f} {bar}")

In [None]:
#@title üìà **CELL 4: VISUALIZE RESULTS** { display-mode: "form" }

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. Top indicators
top15 = consensus.head(15)
colors = plt.cm.RdYlGn(top15['agreement'].values)
axes[0,0].barh(range(len(top15)), top15['avg_rank'].values[::-1], color=colors[::-1])
axes[0,0].set_yticks(range(len(top15)))
axes[0,0].set_yticklabels(top15.index[::-1])
axes[0,0].set_xlabel('Avg Rank (lower=more important)')
axes[0,0].set_title('Top 15 Indicators')
axes[0,0].invert_xaxis()

# 2. Lens agreement heatmap
lens_cols = [c for c in consensus.columns if c not in ['avg_rank', 'std_rank', 'agreement']]
top10_ranks = consensus[lens_cols].head(10)
im = axes[0,1].imshow(top10_ranks.values, cmap='RdYlGn_r', aspect='auto')
axes[0,1].set_xticks(range(len(lens_cols)))
axes[0,1].set_xticklabels(lens_cols, rotation=45, ha='right', fontsize=8)
axes[0,1].set_yticks(range(len(top10_ranks)))
axes[0,1].set_yticklabels(top10_ranks.index)
axes[0,1].set_title('Rank by Each Lens')
plt.colorbar(im, ax=axes[0,1])

# 3. Lens correlation
lens_corr = consensus[lens_cols].corr(method='spearman')
im2 = axes[1,0].imshow(lens_corr, cmap='coolwarm', vmin=-1, vmax=1)
axes[1,0].set_xticks(range(len(lens_cols)))
axes[1,0].set_xticklabels(lens_cols, rotation=45, ha='right', fontsize=8)
axes[1,0].set_yticks(range(len(lens_cols)))
axes[1,0].set_yticklabels(lens_cols, fontsize=8)
axes[1,0].set_title('Lens Agreement (Spearman œÅ)')
plt.colorbar(im2, ax=axes[1,0])

# 4. Agreement distribution
axes[1,1].hist(consensus['agreement'], bins=20, edgecolor='black', alpha=0.7)
axes[1,1].axvline(consensus['agreement'].median(), color='red', linestyle='--', label=f"Median={consensus['agreement'].median():.2f}")
axes[1,1].set_xlabel('Agreement Score')
axes[1,1].set_ylabel('Count')
axes[1,1].set_title('Distribution of Lens Agreement')
axes[1,1].legend()

plt.tight_layout()
plt.show()

# Print lens correlations
print("\nLens Agreement Matrix (Spearman):")
print(lens_corr.round(2))

In [None]:
#@title üíæ **SAVE RESULTS** { display-mode: "form" }

output_dir = os.path.join(PRISM_PATH, '06_output', 'latest')
os.makedirs(output_dir, exist_ok=True)

consensus.to_csv(os.path.join(output_dir, 'consensus_14lens.csv'))

import json
meta = {
    'timestamp': datetime.now().isoformat(),
    'n_indicators': len(panel.columns),
    'n_days': len(panel),
    'lenses_run': list(results.keys()),
    'top_10': list(consensus.head(10).index),
}
with open(os.path.join(output_dir, 'run_14lens.json'), 'w') as f:
    json.dump(meta, f, indent=2)

print(f"‚úì Saved to {output_dir}")

---
## üîß Sandbox

In [None]:
# Explore a specific lens
lens_name = 'granger'  # Change this!

result = results[lens_name]
print(f"\n{lens_name.upper()} LENS")
print("="*40)
print(f"Keys: {list(result.keys())}")
print(f"\nTop 10:")
print(result['importance'].sort_values(ascending=False).head(10))

In [None]:
# Find disagreements - where lenses differ most
high_variance = consensus.nlargest(10, 'std_rank')[['avg_rank', 'std_rank', 'agreement']]
print("\nü§î MOST CONTESTED INDICATORS (lenses disagree)")
print("="*50)
print(high_variance)