# CARIA V13: MANUAL DATA LOADING + GGSI

**Changes:**
1.  **Manual Data Loading**: Reads CSV files from Google Drive instead of using APIs (to avoid limits/errors).
2.  **Extended Universe**: 27 Countries (Hubs, Finance, Mfg, Energy).
3.  **Synthetic Global Stress Index (GGSI)**: Computed internally from the loaded data.

**Setup:**
1.  Mount Google Drive.
2.  Ensure data is in `/content/drive/MyDrive/Caria_Data/`.
    -   `market/`: Contains `USA.csv`, `CHN.csv`, `Gold.csv`, etc.
    -   `macro/`: Contains `USA_gdp_growth.csv`, etc.

In [None]:
!pip install torch matplotlib seaborn scikit-learn networkx

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import warnings
import os
from google.colab import drive

warnings.filterwarnings('ignore')

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"üîß Device: {DEVICE}")

# Mount Drive
drive.mount('/content/drive')
DATA_PATH = '/content/drive/MyDrive/Caria_Data'

# 1. MANUAL DATA FETCHER

In [None]:
class ManualDataFetcher:
    def __init__(self, data_path):
        self.data_path = data_path
        self.countries = [
            'USA', 'CHN', 'JPN', 'DEU', 'GBR', 'FRA', 'IND', 'BRA',
            'CAN', 'KOR', 'AUS', 'MEX', 'IDN', 'ZAF', 'CHL',
            'SGP', 'NLD', 'HKG', 'CHE', 'ARE', 'TWN', 'VNM',
            'MYS', 'THA', 'POL', 'NOR', 'QAT'
        ]

    def fetch_all(self):
        print(f"\nüìÇ Loading Data from {self.data_path}...")
        market_data = {}
        
        # 1. Load Market Data (Indices + Commodities)
        try:
            print("  Loading Market Indices...")
            for iso in self.countries:
                path = f'{self.data_path}/market/{iso}.csv'
                if os.path.exists(path):
                    try:
                        df = pd.read_csv(path, parse_dates=['Date'])
                        df.set_index('Date', inplace=True)
                        # Handle different column names
                        col = df['Close'] if 'Close' in df.columns else df.iloc[:, 0]
                        market_data[f'{iso}_index'] = col
                        print(f"    ‚úì {iso} ({len(col)} pts)")
                    except Exception as e:
                        print(f"    ‚ö†Ô∏è Error reading {iso}: {e}")
                else:
                    print(f"    ‚úó {iso} (file not found)")
            
            # Load Commodities/Global
            for glob in ['Gold', 'Oil', 'Copper', 'VIX', 'DXY']:
                path = f'{self.data_path}/market/{glob}.csv'
                if os.path.exists(path):
                    try:
                        df = pd.read_csv(path, parse_dates=['Date'])
                        df.set_index('Date', inplace=True)
                        col = df['Close'] if 'Close' in df.columns else df.iloc[:, 0]
                        market_data[glob] = col
                        print(f"    ‚úì {glob}")
                    except:
                        pass
                else:
                    print(f"    ‚úó {glob} (missing)")
                    
        except Exception as e:
            print(f"  ‚ö†Ô∏è Error loading market data: {e}")

        return market_data

    def get_macro_data(self):
        print("\n  Loading Macro Data...")
        te_data = {}
        indicators = ['gdp_growth', 'inflation', 'interest_rate', 'unemployment']
        
        for iso in self.countries:
            iso_data = {}
            for ind in indicators:
                path = f'{self.data_path}/macro/{iso}_{ind}.csv'
                if os.path.exists(path):
                    try:
                        df = pd.read_csv(path, parse_dates=['Date'])
                        df.set_index('Date', inplace=True)
                        col = df['Value'] if 'Value' in df.columns else df.iloc[:, 0]
                        iso_data[ind] = col
                    except:
                        pass
            if iso_data:
                te_data[iso] = iso_data
                print(f"    ‚úì {iso}: {len(iso_data)} indicators")
            
        return te_data

# 2. DATA ASSEMBLER & GGSI

In [None]:
class GlobalDataAssembler:
    def __init__(self, data_path):
        self.fetcher = ManualDataFetcher(data_path)
        self.countries = self.fetcher.countries

    def _to_zscore(self, series, window=60):
        if series is None or len(series) < window:
            return None
        roll_mean = series.rolling(window, min_periods=5).mean()
        roll_std = series.rolling(window, min_periods=5).std()
        z = (series - roll_mean) / (roll_std + 1e-8)
        return z.replace([np.inf, -np.inf], 0).fillna(0).clip(-5, 5)

    def _interpolate(self, series, target_index):
        if series is None:
            return pd.Series(0.0, index=target_index)
        # Handle duplicate indices if any
        series = series[~series.index.duplicated(keep='first')]
        return series.reindex(target_index).ffill().bfill().fillna(0)

    def _compute_ggsi(self, market_data, all_dates):
        print("\nüåç Constructing GGSI (Global Geopolitical Stress Index)...")
        
        # 1. Global Volatility
        majors = ['USA', 'CHN', 'DEU', 'JPN', 'GBR']
        vol_series = []
        returns_df = pd.DataFrame(index=all_dates)

        for iso in majors:
            key = f'{iso}_index'
            if key in market_data:
                s = self._interpolate(market_data[key], all_dates)
                ret = np.log(s / s.shift(1)).fillna(0)
                vol = ret.rolling(20).std() * np.sqrt(252)
                vol_series.append(self._to_zscore(vol, 252))
                returns_df[iso] = ret
        
        if not vol_series:
             return pd.Series(0, index=all_dates)

        global_vol_z = pd.concat(vol_series, axis=1).mean(axis=1).fillna(0)

        # 2. Global Correlation
        if len(returns_df.columns) > 1:
            corr_series = returns_df.rolling(60).corr().groupby(level=0).mean().mean(axis=1)
            global_corr_z = self._to_zscore(corr_series, 252)
        else:
            global_corr_z = pd.Series(0, index=all_dates)

        # 3. Safe Haven
        gold = self._interpolate(market_data.get('Gold'), all_dates)
        dxy = self._interpolate(market_data.get('DXY'), all_dates)
        safe_haven_z = (self._to_zscore(gold, 60) + self._to_zscore(dxy, 60)) / 2

        # 4. Oil Shock
        oil = self._interpolate(market_data.get('Oil'), all_dates)
        oil_ret = np.log(oil / oil.shift(1)).fillna(0)
        oil_vol_z = self._to_zscore(oil_ret.rolling(20).std(), 252)

        ggsi = (global_vol_z + global_corr_z + safe_haven_z + oil_vol_z) / 4
        return ggsi.fillna(0)

    def fetch(self):
        market_data = self.fetcher.fetch_all()
        te_data = self.fetcher.get_macro_data()

        # Master Index
        all_dates = pd.DatetimeIndex([])
        for s in market_data.values():
            if s is not None:
                all_dates = all_dates.union(s.index)
        
        if len(all_dates) == 0:
            print("‚ùå No data found! Please check your CSV files.")
            return None, None, None, [], [], []

        all_dates = all_dates.sort_values()
        
        # Compute GGSI
        ggsi = self._compute_ggsi(market_data, all_dates)

        # Build Node Features
        node_features = {}
        has_market = {}

        for iso in self.countries:
            feat = pd.DataFrame(index=all_dates)
            te_iso = te_data.get(iso, {})
            has_market[iso] = False

            # Market
            idx_key = f'{iso}_index'
            if idx_key in market_data:
                prices = self._interpolate(market_data[idx_key], all_dates)
                if (prices > 0).sum() > len(prices) * 0.3:
                    has_market[iso] = True
                    feat['market_z'] = self._to_zscore(prices, 20)
                    feat['mom_20'] = (prices / prices.shift(20) - 1).clip(-0.5, 0.5)
                    feat['vol_20'] = np.log(prices/prices.shift(1)).rolling(20).std() * np.sqrt(252)

            # Macro
            for ind_name, series in te_iso.items():
                s = self._interpolate(series, all_dates)
                feat[f'{ind_name}_z'] = self._to_zscore(s, 60)

            node_features[iso] = feat.fillna(0)

        # Global Features
        global_feats = pd.DataFrame(index=all_dates)
        global_feats['GGSI'] = ggsi
        for c in ['Oil', 'Gold', 'Copper']:
            if c in market_data:
                s = self._interpolate(market_data[c], all_dates)
                global_feats[f'{c}_z'] = self._to_zscore(s, 20)
        
        global_feats = global_feats.fillna(0)

        # Tensor Construction
        all_cols = sorted(list(set().union(*[df.columns for df in node_features.values()])))
        
        n_time = len(all_dates)
        n_nodes = len(self.countries)
        n_node_feats = len(all_cols)
        n_global_feats = len(global_feats.columns)

        tensor = np.zeros((n_time, n_nodes, n_node_feats + n_global_feats))
        
        for i, iso in enumerate(self.countries):
            df = node_features[iso]
            for col in all_cols:
                if col not in df.columns:
                    df[col] = 0
            tensor[:, i, :n_node_feats] = df[all_cols].values
            tensor[:, i, n_node_feats:] = global_feats.values

        # Labels
        labels = np.zeros((n_time, n_nodes))
        valid_mask = np.zeros((n_time, n_nodes))
        mkt_idx = all_cols.index('market_z') if 'market_z' in all_cols else 0

        for i, iso in enumerate(self.countries):
            if has_market[iso]:
                mkt_z = tensor[:, i, mkt_idx]
                labels[:-5, i] = (mkt_z[5:] > mkt_z[:-5]).astype(float)
                valid_mask[:, i] = 1.0

        return tensor, labels, valid_mask, self.countries, all_dates, all_cols

# 3. MODEL & TRAINING

In [None]:
class EconomicGraphDiscoverer(nn.Module):
    def __init__(self, num_nodes, in_feats, d_model=64, n_layers=2, dropout=0.3):
        super().__init__()
        self.num_nodes = num_nodes
        self.input_proj = nn.Sequential(
            nn.Linear(in_feats, d_model),
            nn.GELU(),
            nn.Dropout(dropout)
        )
        self.node_embed = nn.Parameter(torch.randn(num_nodes, 32) * 0.1)
        self.adj_temperature = 0.3
        self.graph_layers = nn.ModuleList([
            nn.Sequential(
                nn.Linear(d_model, d_model),
                nn.GELU(),
                nn.Dropout(dropout),
                nn.LayerNorm(d_model)
            ) for _ in range(n_layers)
        ])
        self.temporal = nn.GRU(d_model * num_nodes, d_model * 2, batch_first=True, num_layers=2, dropout=dropout)
        self.output = nn.Sequential(
            nn.Linear(d_model * 2, d_model),
            nn.GELU(),
            nn.Linear(d_model, num_nodes)
        )

    def get_adjacency(self):
        sim = torch.mm(self.node_embed, self.node_embed.t())
        return F.softmax(sim / self.adj_temperature, dim=-1)

    def forward(self, x):
        b, s, n, f = x.shape
        x = self.input_proj(x)
        adj = self.get_adjacency()
        x_flat = x.view(b * s, n, -1)
        for layer in self.graph_layers:
            x_agg = torch.bmm(adj.unsqueeze(0).expand(b*s, n, n), x_flat)
            x_flat = layer(x_flat + x_agg)
        x = x_flat.view(b, s, n, -1)
        x_temporal = x.reshape(b, s, -1)
        _, h = self.temporal(x_temporal)
        return self.output(h[-1])

class EconomicDataset(Dataset):
    def __init__(self, data, labels, mask, seq_len=45):
        self.data = data
        self.labels = labels
        self.mask = mask
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len - 5

    def __getitem__(self, idx):
        x = self.data[idx:idx+self.seq_len]
        y = self.labels[idx+self.seq_len]
        m = self.mask[idx+self.seq_len]
        return torch.FloatTensor(x), torch.FloatTensor(y), torch.FloatTensor(m)

def main():
    assembler = GlobalDataAssembler(DATA_PATH)
    tensor, labels, mask, nodes, dates, cols = assembler.fetch()
    
    if tensor is None:
        return

    print(f"\nüìä Tensor Shape: {tensor.shape} (Time, Nodes, Feats)")
    print(f"üåç Nodes: {nodes}")

    split = int(len(tensor) * 0.8)
    train_ds = EconomicDataset(tensor[:split], labels[:split], mask[:split])
    val_ds = EconomicDataset(tensor[split:], labels[split:], mask[split:])
    
    train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=64)

    model = EconomicGraphDiscoverer(len(nodes), tensor.shape[2]).to(DEVICE)
    optimizer = optim.AdamW(model.parameters(), lr=0.001)
    
    print("\nüöÄ Training...")
    for epoch in range(50):
        model.train()
        total_loss = 0
        for x, y, m in train_loader:
            x, y, m = x.to(DEVICE), y.to(DEVICE), m.to(DEVICE)
            optimizer.zero_grad()
            out = model(x)
            loss = F.binary_cross_entropy_with_logits(out, y, reduction='none')
            loss = (loss * m).sum() / (m.sum() + 1e-8)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        if (epoch+1) % 10 == 0:
            print(f"Epoch {epoch+1}: Loss {total_loss/len(train_loader):.4f}")

    adj = model.get_adjacency().cpu().detach().numpy()
    plt.figure(figsize=(12, 10))
    sns.heatmap(adj, xticklabels=nodes, yticklabels=nodes, cmap='RdYlBu_r')
    plt.title("Learned Global Economic Dependencies (V13)")
    plt.show()

if __name__ == "__main__":
    main()