# ðŸ”® PRISM Engine

**One notebook. That's it.**

Run cells top to bottom. Results at the end.

In [None]:
#@title ðŸ”‘ **RUN THIS FIRST** (click play, wait for âœ“) { display-mode: "form" }
#@markdown This cell sets up paths and loads everything.

# === SETUP - WORKS IN COLAB OR LOCALLY ===
import sys
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

def find_prism_root():
    """Find prism-engine root - works in Colab or locally."""
    # Check if we're in Colab
    try:
        from google.colab import drive
        IN_COLAB = True
    except ImportError:
        IN_COLAB = False

    if IN_COLAB:
        # Mount drive if in Colab
        drive.mount('/content/drive')
        # Search common locations
        candidates = [
            Path('/content/drive/MyDrive/prism-engine/prism-engine'),
            Path('/content/drive/MyDrive/prism-engine'),
            Path('/content/prism-engine'),
        ]
    else:
        # Local execution - search relative to notebook or cwd
        candidates = [
            Path('.').resolve(),
            Path('..').resolve(),
            Path(__file__).parent.resolve() if '__file__' in dir() else Path('.').resolve(),
        ]

    # Find the one with 05_engine/lenses
    for path in candidates:
        if (path / '05_engine' / 'lenses').exists():
            return path

    # Fallback: current directory
    return Path('.').resolve()

PRISM_ROOT = find_prism_root()
sys.path.insert(0, str(PRISM_ROOT))
print(f"âœ“ PRISM_ROOT = {PRISM_ROOT}")

# === BUILTIN LENSES (no imports needed) ===

class BaseLens:
    name = "base"
    def analyze(self, panel): raise NotImplementedError
    def top_indicators(self, result, n=10):
        if 'importance' in result:
            imp = result['importance']
            if isinstance(imp, pd.Series):
                return list(imp.sort_values(ascending=False).head(n).items())
        return []

class MagnitudeLens(BaseLens):
    """Importance by total magnitude of movement (L2 norm)."""
    name = "magnitude"
    def analyze(self, panel):
        normalized = (panel - panel.mean()) / panel.std()
        magnitude = np.sqrt((normalized ** 2).sum())
        return {'importance': magnitude, 'magnitude': magnitude}

class PCALens(BaseLens):
    """Importance by contribution to principal components."""
    name = "pca"
    def analyze(self, panel, n_components=5):
        X = ((panel - panel.mean()) / panel.std()).fillna(0)
        U, S, Vt = np.linalg.svd(X.values, full_matrices=False)
        explained = (S ** 2) / (len(X) - 1)
        explained_ratio = explained / explained.sum()
        loadings = Vt[:n_components].T * S[:n_components]
        importance = pd.Series(np.abs(loadings).sum(axis=1), index=panel.columns)
        return {'importance': importance, 'explained_variance_ratio': explained_ratio[:n_components],
                'loadings': pd.DataFrame(loadings, index=panel.columns)}

class InfluenceLens(BaseLens):
    """Importance by volatility Ã— deviation."""
    name = "influence"
    def analyze(self, panel, window=20):
        vol = panel.rolling(window=window).std()
        dev = np.abs(panel - panel.rolling(window=window).mean())
        influence = (vol * dev).mean()
        importance = (influence - influence.min()) / (influence.max() - influence.min())
        return {'importance': importance, 'influence': influence}

class ClusteringLens(BaseLens):
    """Importance by centrality within correlation clusters."""
    name = "clustering"
    def analyze(self, panel, n_clusters=None):
        corr = panel.corr()
        n_clusters = n_clusters or max(2, len(panel.columns) // 4)
        dist = 1 - np.abs(corr.values)
        np.random.seed(42)
        centers = np.random.choice(len(panel.columns), n_clusters, replace=False)
        labels = np.zeros(len(panel.columns), dtype=int)
        for _ in range(10):
            for i in range(len(panel.columns)):
                labels[i] = np.argmin([dist[i, c] for c in centers])
            for k in range(n_clusters):
                members = np.where(labels == k)[0]
                if len(members) > 0:
                    centers[k] = members[np.argmin([dist[m, members].mean() for m in members])]
        importance = pd.Series([1/(1+dist[i, np.where(labels==labels[i])[0]].mean()) 
                                 for i in range(len(panel.columns))], index=panel.columns)
        return {'importance': importance, 'labels': pd.Series(labels, index=panel.columns)}

class CorrelationLens(BaseLens):
    """Importance by average absolute correlation with other indicators."""
    name = "correlation"
    def analyze(self, panel):
        corr = panel.corr().abs()
        avg_corr = (corr.sum() - 1) / (len(corr) - 1)  # Exclude self-correlation
        return {'importance': avg_corr, 'correlation_matrix': corr}

class VolatilityLens(BaseLens):
    """Importance by rolling volatility."""
    name = "volatility"
    def analyze(self, panel, window=20):
        vol = panel.rolling(window=window).std().mean()
        importance = (vol - vol.min()) / (vol.max() - vol.min())
        return {'importance': importance, 'volatility': vol}

class MomentumLens(BaseLens):
    """Importance by trend strength (recent vs historical)."""
    name = "momentum"
    def analyze(self, panel, lookback=60):
        returns = panel.pct_change().dropna()
        recent = returns.tail(lookback).mean()
        historical = returns.mean()
        momentum = (recent - historical).abs()
        importance = (momentum - momentum.min()) / (momentum.max() - momentum.min())
        return {'importance': importance, 'momentum': momentum}

# === LENS REGISTRY ===
LENSES = {
    'magnitude': MagnitudeLens,
    'pca': PCALens,
    'influence': InfluenceLens,
    'clustering': ClusteringLens,
    'correlation': CorrelationLens,
    'volatility': VolatilityLens,
    'momentum': MomentumLens,
}

def run_lens(name, panel):
    return LENSES[name]().analyze(panel)

def run_all_lenses(panel, names=None):
    names = names or list(LENSES.keys())
    results = {}
    for name in names:
        try:
            results[name] = run_lens(name, panel)
            print(f"  âœ“ {name}")
        except Exception as e:
            print(f"  âœ— {name}: {e}")
    return results

def compute_consensus(results):
    rankings = {}
    for name, res in results.items():
        if 'importance' in res and isinstance(res['importance'], pd.Series):
            rankings[name] = res['importance'].rank(ascending=False)
    if not rankings: return pd.DataFrame()
    df = pd.DataFrame(rankings)
    df['avg_rank'] = df.mean(axis=1)
    df['agreement'] = 1 / (1 + df.drop(columns=['avg_rank']).std(axis=1))
    return df.sort_values('avg_rank')

print(f"âœ“ Loaded {len(LENSES)} lenses: {list(LENSES.keys())}")
print("\nðŸ”‘ Ready to go!")

In [None]:
#@title ðŸ“Š **LOAD DATA** { display-mode: "form" }
#@markdown Loads and cleans your data.

# Find data
data_file = PRISM_ROOT / 'data' / 'raw' / 'master_panel.csv'

if not data_file.exists():
    # Try to build from individual CSVs
    print("Building panel from individual files...")
    raw_dir = PRISM_ROOT / 'data' / 'raw'
    dfs = {}
    for f in raw_dir.glob('*.csv'):
        try:
            df = pd.read_csv(f, index_col=0, parse_dates=True)
            name = f.stem.upper()
            if len(df.columns) >= 1:
                # Use first numeric column or 'Close' or 'Value'
                for col in ['Close', 'VALUE', 'Value', df.columns[0]]:
                    if col in df.columns:
                        dfs[name] = df[col]
                        break
        except: pass
    panel_raw = pd.DataFrame(dfs)
else:
    panel_raw = pd.read_csv(data_file, index_col=0, parse_dates=True)

# Clean: forward fill, backward fill, drop remaining NaNs
panel = panel_raw.ffill().bfill().dropna()

print(f"âœ“ Loaded {panel.shape[1]} indicators, {panel.shape[0]} time points")
print(f"  Date range: {panel.index[0].strftime('%Y-%m-%d')} to {panel.index[-1].strftime('%Y-%m-%d')}")
print(f"  Indicators: {list(panel.columns)}")

In [None]:
#@title ðŸš€ **RUN ANALYSIS** { display-mode: "form" }
#@markdown Runs all lenses and computes consensus.

print("Running all lenses...\n")
results = run_all_lenses(panel)

print("\nComputing consensus...")
consensus = compute_consensus(results)

print("\n" + "="*50)
print("ðŸ“Š TOP 10 INDICATORS (by consensus)")
print("="*50)
for i, (ind, row) in enumerate(consensus.head(10).iterrows(), 1):
    bar = "â–ˆ" * int(row['agreement'] * 20)
    print(f"{i:2}. {ind:<15} rank={row['avg_rank']:.1f}  agreement={row['agreement']:.2f} {bar}")

print("\nâœ“ Analysis complete!")

In [None]:
#@title ðŸ“ˆ **VISUALIZE** { display-mode: "form" }
#@markdown Creates charts showing results.

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Left: Top indicators bar chart
top10 = consensus.head(10)
colors = plt.cm.RdYlGn(top10['agreement'].values)
axes[0].barh(range(len(top10)), top10['avg_rank'].values[::-1], color=colors[::-1])
axes[0].set_yticks(range(len(top10)))
axes[0].set_yticklabels(top10.index[::-1])
axes[0].set_xlabel('Average Rank (lower = more important)')
axes[0].set_title('Top 10 Indicators by Consensus')
axes[0].invert_xaxis()

# Right: Lens agreement heatmap
lens_cols = [c for c in consensus.columns if c not in ['avg_rank', 'agreement']]
rank_matrix = consensus[lens_cols].head(10)
im = axes[1].imshow(rank_matrix.values, cmap='RdYlGn_r', aspect='auto')
axes[1].set_xticks(range(len(lens_cols)))
axes[1].set_xticklabels(lens_cols, rotation=45, ha='right')
axes[1].set_yticks(range(len(rank_matrix)))
axes[1].set_yticklabels(rank_matrix.index)
axes[1].set_title('Rank by Each Lens (green=high, red=low)')
plt.colorbar(im, ax=axes[1], label='Rank')

plt.tight_layout()
plt.show()

# Lens correlation
print("\nLens Agreement (Spearman correlation):")
lens_corr = consensus[lens_cols].corr(method='spearman')
print(lens_corr.round(2))

In [None]:
#@title ðŸ’¾ **SAVE RESULTS** { display-mode: "form" }
#@markdown Saves consensus to your output folder.

output_dir = PRISM_ROOT / '06_output' / 'latest'
output_dir.mkdir(parents=True, exist_ok=True)

# Save consensus
consensus.to_csv(output_dir / 'consensus.csv')

# Save run metadata
import json
meta = {
    'timestamp': datetime.now().isoformat(),
    'data_shape': list(panel.shape),
    'date_range': [str(panel.index[0]), str(panel.index[-1])],
    'lenses_run': list(results.keys()),
    'top_5': list(consensus.head(5).index),
}
with open(output_dir / 'run_meta.json', 'w') as f:
    json.dump(meta, f, indent=2)

print(f"âœ“ Saved to {output_dir}/")
print(f"  - consensus.csv")
print(f"  - run_meta.json")

---

## ðŸŽ¯ Quick Reference

**Run a specific lens:**
```python
result = run_lens('pca', panel)
print(result['importance'].sort_values(ascending=False))
```

**See what's in a result:**
```python
result = run_lens('clustering', panel)
print(result.keys())  # Shows available outputs
```

**Available lenses:**
- `magnitude` - Total movement (L2 norm)
- `pca` - Principal component contribution  
- `influence` - Volatility Ã— deviation
- `clustering` - Correlation cluster centrality
- `correlation` - Average correlation with others
- `volatility` - Rolling volatility
- `momentum` - Trend strength

In [None]:
# ðŸ”§ SANDBOX - Play here!

# Example: Look at PCA details
pca_result = run_lens('pca', panel)
print("Explained variance by component:")
print(pca_result['explained_variance_ratio'])
print(f"\nTop loadings on PC1:")
print(pca_result['loadings'][0].sort_values(ascending=False).head(5))