## 1. Setup Environment

In [None]:
# Check GPU
!nvidia-smi

Tue Nov 25 03:11:21 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   51C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
# Clone repository
!git clone https://github.com/kavyavenk/multilingual-backpacks.git
%cd multilingual-backpacks

In [None]:
# Install dependencies
!pip install -q transformers datasets scipy tqdm numpy torch

In [None]:
# Verify imports
import torch
import numpy as np
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 2. Prepare Data

In [None]:
# Prepare Europarl dataset (10k samples for quick training)
!python data/europarl/prepare.py --language_pair en-fr --max_samples 10000

In [None]:
# Verify data files
import os
import pickle

data_files = ['data/europarl/train.bin', 'data/europarl/val.bin', 'data/europarl/meta.pkl']
for f in data_files:
    if os.path.exists(f):
        size = os.path.getsize(f) / 1e6
        print(f"✓ {f} ({size:.2f} MB)")
    else:
        print(f"✗ {f} not found")

# Load metadata
with open('data/europarl/meta.pkl', 'rb') as f:
    meta = pickle.load(f)
print(f"\nVocab size: {meta['vocab_size']:,}")
print(f"Languages: {meta['languages']}")

## 3. Configure Model for GPU

In [None]:
# Update config for GPU training
config_update = '''
# System
device='cuda'
dtype='float16'  # Use mixed precision on GPU
compile=False  # Disable compile for tiny model

# Training - increased for GPU
batch_size=32  # Larger batch on GPU
learning_rate=3e-4
max_iters=5000  # Full training on GPU
'''

# Read current config
with open('config/train_europarl_tiny.py', 'r') as f:
    lines = f.readlines()

# Find and replace System section
new_lines = []
skip_until_data = False
for line in lines:
    if '# System' in line:
        skip_until_data = True
        new_lines.append(line)
        new_lines.append("    device='cuda',\n")
        new_lines.append("    dtype='float16',\n")
        new_lines.append("    compile=False,\n")
    elif '# Data' in line:
        skip_until_data = False
        new_lines.append("    \n")
        new_lines.append(line)
    elif not skip_until_data:
        # Update batch_size and max_iters
        if 'batch_size=' in line:
            new_lines.append("    batch_size=32,  # Larger batch on GPU\n")
        elif 'max_iters=' in line:
            new_lines.append("    max_iters=5000,  # Full training on GPU\n")
        else:
            new_lines.append(line)

# Write updated config
with open('config/train_europarl_tiny.py', 'w') as f:
    f.writelines(new_lines)

print("✓ Config updated for GPU training")

## 4. Train Model

In [None]:
# Train using the train.py script
!python train.py \
    --config train_europarl_tiny \
    --out_dir out/tiny \
    --data_dir europarl \
    --device cuda \
    --dtype float16 \
    --model_type backpack

## 5. Check Training Results

In [None]:
# Load and display training log
import json
import matplotlib.pyplot as plt

with open('out/tiny/training_log.json', 'r') as f:
    log = json.load(f)

iterations = log['iterations']
train_loss = log['train_loss']
val_loss = log['val_loss']

print(f"Training completed: {len(iterations)} checkpoints")
print(f"Final train loss: {train_loss[-1]:.4f}")
print(f"Final val loss: {val_loss[-1]:.4f}")
print(f"Loss reduction: {train_loss[0] - train_loss[-1]:.4f}")

# Plot loss curves
plt.figure(figsize=(10, 5))
plt.plot(iterations, train_loss, label='Train Loss', alpha=0.7)
plt.plot(iterations, val_loss, label='Val Loss', alpha=0.7)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Tiny Backpack Training Loss Curves')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('out/tiny/loss_curves.png', dpi=150)
plt.show()

print("\n✓ Loss curves saved to out/tiny/loss_curves.png")

## 6. Model Info

In [None]:
# Load model and check parameters
import torch
from model import BackpackLM

checkpoint = torch.load('out/tiny/ckpt.pt', map_location='cuda')
config = checkpoint['config']
model = BackpackLM(config)
model.load_state_dict(checkpoint['model'])

n_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Model Configuration:")
print(f"  Embedding dim: {config.n_embd}")
print(f"  Num senses: {config.n_senses}")
print(f"  Layers: {config.n_layer}")
print(f"  Heads: {config.n_head}")
print(f"  Vocab size: {config.vocab_size}")
print(f"\nTotal parameters: {n_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Model size: {n_params * 4 / 1e6:.2f} MB (float32)")

## 7. Run Evaluations

In [None]:
# Run full evaluation suite
!python run_full_evaluation.py --out_dir out/tiny --device cuda

## 8. Download Results

In [None]:
# Package results for download
!tar -czf tiny_model_results.tar.gz out/tiny/

from google.colab import files
files.download('tiny_model_results.tar.gz')

print("\n✓ Results packaged and ready for download")
print("\nContents:")
print("  - ckpt.pt: Model checkpoint")
print("  - training_log.json: Training metrics")
print("  - evaluation_results.json: Evaluation metrics")
print("  - loss_curves.png: Training visualization")