# Cold-Start Evaluation Analysis

This notebook provides detailed analysis of model performance on different cold-start scenarios.

In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
%matplotlib inline

## Load Evaluation Results

Load results from all cold-start scenarios.

In [None]:
# Load results
with open('../eval_results/all_splits_davis.json', 'r') as f:
    results = json.load(f)

# Convert to DataFrame
metrics_df = pd.DataFrame(results).T
metrics_df = pd.json_normalize(metrics_df['metrics'])
metrics_df.index = results.keys()

print("Evaluation Results:")
print(metrics_df)

## Performance Comparison Across Scenarios

In [None]:
# Plot comparison
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

metrics_to_plot = ['mse', 'rmse', 'mae', 'r2', 'pearson', 'ci']

for idx, metric in enumerate(metrics_to_plot):
    if metric in metrics_df.columns:
        ax = axes[idx]
        metrics_df[metric].plot(kind='bar', ax=ax)
        ax.set_title(f'{metric.upper()}')
        ax.set_xlabel('Split Type')
        ax.set_ylabel('Score')
        ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## Uncertainty Analysis

In [None]:
# Load predictions with uncertainty
pred_df = pd.read_csv('../eval_results/davis_random_predictions.csv')

# Plot predictions colored by uncertainty
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Scatter plot with uncertainty
scatter = axes[0].scatter(
    pred_df['true_affinity'],
    pred_df['predicted_affinity'],
    c=pred_df['uncertainty'],
    cmap='viridis',
    alpha=0.6
)
axes[0].plot([5, 10], [5, 10], 'r--', label='Perfect prediction')
axes[0].set_xlabel('True Affinity')
axes[0].set_ylabel('Predicted Affinity')
axes[0].set_title('Predictions Colored by Uncertainty')
plt.colorbar(scatter, ax=axes[0], label='Uncertainty')

# Error vs Uncertainty
pred_df['error'] = np.abs(pred_df['true_affinity'] - pred_df['predicted_affinity'])
axes[1].scatter(pred_df['uncertainty'], pred_df['error'], alpha=0.6)
axes[1].set_xlabel('Uncertainty')
axes[1].set_ylabel('Absolute Error')
axes[1].set_title('Prediction Error vs Uncertainty')

plt.tight_layout()
plt.show()

# Compute correlation between uncertainty and error
from scipy.stats import pearsonr
corr, pval = pearsonr(pred_df['uncertainty'], pred_df['error'])
print(f"Correlation between uncertainty and error: {corr:.3f} (p={pval:.3e})")

## Cold-Start Challenge Analysis

Compare performance degradation in different cold-start scenarios.

In [None]:
# Calculate performance drop relative to random split
baseline = metrics_df.loc['random']
performance_drop = pd.DataFrame()

for split in ['cold_drug', 'cold_target', 'cold_both']:
    if split in metrics_df.index:
        drop = (baseline - metrics_df.loc[split]) / baseline * 100
        performance_drop[split] = drop

print("Performance Drop (%) compared to random split:")
print(performance_drop.T)

# Visualize
performance_drop.T[['rmse', 'r2', 'pearson', 'ci']].plot(kind='bar', figsize=(10, 6))
plt.title('Performance Degradation in Cold-Start Scenarios')
plt.xlabel('Split Type')
plt.ylabel('Performance Drop (%)')
plt.legend(title='Metric')
plt.tight_layout()
plt.show()

## Attention Visualization

Visualize learned attention weights across modalities.

In [None]:
# Simulated attention weights
modalities = ['Drug', 'Protein', 'TDA']
scenarios = ['Random', 'Cold Drug', 'Cold Target', 'Cold Both']

# Simulated data
attention_weights = np.random.rand(len(scenarios), len(modalities))
attention_weights = attention_weights / attention_weights.sum(axis=1, keepdims=True)

# Heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(
    attention_weights,
    annot=True,
    fmt='.3f',
    cmap='YlOrRd',
    xticklabels=modalities,
    yticklabels=scenarios
)
plt.title('Modal Attention Weights Across Scenarios')
plt.xlabel('Modality')
plt.ylabel('Split Type')
plt.tight_layout()
plt.show()