## 1. Setup & Load Data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings('ignore')

sns.set_style('whitegrid')
print("✓ Setup complete")

## 2. Load Behavioral Features

In [None]:
# Load behavioral features (already standardized)
X_behavioral_train = pd.read_csv('data/processed/X_train_standardized.csv')
X_behavioral_test = pd.read_csv('data/processed/X_test_standardized.csv')
y_train = pd.read_csv('data/processed/y_train.csv', squeeze=True)
y_test = pd.read_csv('data/processed/y_test.csv', squeeze=True)

print(f"Behavioral features (train): {X_behavioral_train.shape}")
print(f"Behavioral features (test):  {X_behavioral_test.shape}")
print(f"\nFeature list: {', '.join(X_behavioral_train.columns.tolist())}")

# Save alignment report
alignment_report = pd.DataFrame({
    'Fusion Strategy': ['Early Fusion', 'Late Fusion (Behavioral)', 'Late Fusion (NLP)', 'Intermediate Fusion'],
    'Train Shape': [
        str(X_early_fusion_train.shape),
        str(X_late_fusion_behavioral_train.shape),
        str(X_late_fusion_nlp_train.shape),
        str(X_intermediate_fusion_train.shape)
    ],
    'Description': [
        'Concatenate behavioral + NLP features',
        'Behavioral features only',
        'NLP features only',
        'PCA-reduced behavioral + NLP'
    ]
})

alignment_report.to_csv('fusion_models/results/01_feature_alignment_report.csv', index=False)

print("\nFEATURE ALIGNMENT SUMMARY")
print("="*80)
print(alignment_report.to_string(index=False))
print("\n✓ Saved: fusion_models/results/01_feature_alignment_report.csv")