# Final Submission - Model Composition

This notebook shows which models from `data/preds/` were used to create the final submission and their final weights.

In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

sns.set_style("whitegrid")
data_dir = Path("../data")

## Load Final Model Weights

Load the final weights calculated by combining all ensemble layers.

In [None]:
# Load final model weights
weights_path = data_dir / "submissions_final_reprod" / "final_model_weights.json"

with open(weights_path, 'r') as f:
    final_weights = json.load(f)

print("Loaded final weights for {} tasks".format(len(final_weights)))
print("\nTasks:")
for task in final_weights.keys():
    print(f"  - {task}: {len(final_weights[task]['models'])} models")

## Visualize Model Composition by Task

In [None]:
# Create visualization for all tasks
tasks = ['trans_340', 'trans_450', 'fluo_340_450', 'fluo_480']
task_labels = {
    'trans_340': 'Transmittance(340)',
    'trans_450': 'Transmittance(450)',
    'fluo_340_450': 'Fluorescence(340/480)',
    'fluo_480': 'Fluorescence(multiple)'
}

fig, axes = plt.subplots(2, 2, figsize=(16, 12))
axes = axes.flatten()

for idx, task in enumerate(tasks):
    models = final_weights[task]['models']
    weights = final_weights[task]['weights']
    
    # Show top 10 models only
    n_show = min(10, len(models))
    models_show = models[:n_show]
    weights_show = weights[:n_show]
    
    # Create horizontal bar chart
    y_pos = np.arange(len(models_show))
    colors = ['steelblue' if not m.startswith('multitask_') else 'orange' for m in models_show]
    
    axes[idx].barh(y_pos, weights_show, color=colors, alpha=0.7)
    axes[idx].set_yticks(y_pos)
    axes[idx].set_yticklabels(models_show, fontsize=9)
    axes[idx].set_xlabel('Weight', fontsize=11)
    axes[idx].set_title(f'{task_labels[task]}\nTop {n_show} models (total: {len(models)})', 
                       fontsize=12, fontweight='bold')
    axes[idx].invert_yaxis()
    axes[idx].grid(axis='x', alpha=0.3)
    
    # Add value labels
    for i, w in enumerate(weights_show):
        axes[idx].text(w + 0.005, i, f'{w:.3f}', va='center', fontsize=8)

# Add legend
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='steelblue', alpha=0.7, label='Single-task model'),
    Patch(facecolor='orange', alpha=0.7, label='Multi-task model')
]
fig.legend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, 0.98), 
          ncol=2, fontsize=11, frameon=True)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig('../docs/final_model_composition.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nVisualization saved to: ../docs/final_model_composition.png")

## Model Statistics

In [None]:
# Print detailed statistics
print("=" * 70)
print("Final Model Composition Summary")
print("=" * 70)

for task in tasks:
    models = final_weights[task]['models']
    weights = final_weights[task]['weights']
    description = final_weights[task]['description']
    
    print(f"\n{task_labels[task]}:")
    print(f"  Strategy: {description}")
    print(f"  Total models: {len(models)}")
    print(f"  Top 5 models:")
    for i in range(min(5, len(models))):
        print(f"    {i+1}. {models[i]:35s} {weights[i]:.6f} ({weights[i]*100:.2f}%)")
    print(f"  Total weight: {sum(weights):.6f}")

print("\n" + "=" * 70)

## Unique Models Across All Tasks

In [None]:
# Collect all unique models
all_models = set()
for task in tasks:
    all_models.update(final_weights[task]['models'])

print(f"Total unique models used: {len(all_models)}")
print(f"\nModel list:")
for i, model in enumerate(sorted(all_models), 1):
    print(f"  {i:2d}. {model}")

## Notes

- All models are from `data/preds/[model_name]/` directories
- Weights shown are the final effective weights after combining all ensemble layers
- **Transmittance(340)**: Uses top 40% of models based on CV scores
- **Other tasks**: Multi-layer ensemble combining Optuna-optimized ensemble + large pretrained models (Uni-Mol-2)