In [None]:
!pip install git+https://github.com/YOUR_USERNAME/mpdistil.git -q

## 1. Custom Training Configuration

In [None]:
from mpdistil import MPDistil, TrainingConfig, load_superglue_dataset

# Create custom config
config = TrainingConfig(
    # Phase 1: Teacher
    teacher_epochs=15,
    teacher_lr=1e-5,  # Lower learning rate
    
    # Phase 2: Student PKD
    student_epochs=15,
    student_lr=5e-5,
    alpha=0.7,        # More weight to soft targets
    beta=50.0,        # Less PKD loss
    temperature=3.0,  # Lower temperature
    
    # Phase 3: Meta-Teacher
    meta_lr=5e-4,
    use_competitive_loss=True,  # Try competitive instead of collaborative
    
    # Phase 4: Curriculum
    num_episodes=300,
    reward_type='real',  # Real-valued rewards
    gamma=0.95,          # Reward discount factor
    
    # System
    batch_size=16,  # Larger batch size
    seed=12345,
    verbose=True,
    
    # Checkpointing
    output_dir='./advanced_outputs',
    save_checkpoints=True
)

print(config.to_dict())

## 2. Multi-Task Curriculum Learning

In [None]:
# Load main task
cb_loaders, cb_labels = load_superglue_dataset('CB', batch_size=16)

# Load auxiliary tasks for curriculum learning
print("Loading auxiliary tasks...")
rte_loaders, _ = load_superglue_dataset('RTE', batch_size=16)
boolq_loaders, _ = load_superglue_dataset('BoolQ', batch_size=16)
copa_loaders, _ = load_superglue_dataset('COPA', batch_size=16)

# Create model
model = MPDistil(
    task_name='CB',
    num_labels=cb_labels,
    student_layers=6
)

# Train with curriculum learning on multiple tasks
history = model.fit(
    train_loader=cb_loaders['train'],
    val_loader=cb_loaders['val'],
    meta_loaders={
        'RTE': rte_loaders['val'],
        'BoolQ': boolq_loaders['val'],
        'COPA': copa_loaders['val']
    },
    config=config
)

## 3. Analyze Curriculum Learning

In [None]:
import matplotlib.pyplot as plt
import numpy as np

if 'phase4' in history:
    # Plot rewards over episodes
    rewards = history['phase4']['rewards']
    
    plt.figure(figsize=(12, 4))
    
    # Reward curve
    plt.subplot(1, 2, 1)
    plt.plot(rewards)
    plt.title('Rewards over Episodes')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.grid(True)
    
    # Task distribution
    plt.subplot(1, 2, 2)
    trajectories = history['phase4']['trajectories']
    all_tasks = [task for traj in trajectories for task in traj]
    task_counts = {}
    for task in all_tasks:
        task_counts[task] = task_counts.get(task, 0) + 1
    
    plt.bar(task_counts.keys(), task_counts.values())
    plt.title('Task Selection Frequency')
    plt.xlabel('Task')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    print(f"Average reward: {np.mean(rewards):.4f}")
    print(f"Final reward: {rewards[-1]:.4f}")

## 4. Weights & Biases Logging

In [None]:
# Login to W&B (you'll need an account)
import wandb
wandb.login()

# Train with W&B logging
history = model.fit(
    train_loader=cb_loaders['train'],
    val_loader=cb_loaders['val'],
    wandb_logging=True,
    wandb_project='mpdistil-experiments',
    teacher_epochs=5,
    student_epochs=5
)

# View results at https://wandb.ai

## 5. Different Model Architectures

In [None]:
# RoBERTa teacher → DistilBERT student
model_roberta = MPDistil(
    task_name='CB',
    num_labels=3,
    teacher_model='roberta-base',
    student_model='distilbert-base-uncased',
    student_layers=6
)

# Or BERT-large → BERT-base
model_large = MPDistil(
    task_name='CB',
    num_labels=3,
    teacher_model='bert-large-uncased',
    student_model='bert-base-uncased',
    student_layers=6
)

## 6. Hyperparameter Tuning Tips

In [None]:
# Quick hyperparameter search
alphas = [0.3, 0.5, 0.7]
betas = [50, 100, 200]

best_score = 0
best_params = {}

for alpha in alphas:
    for beta in betas:
        print(f"\nTrying alpha={alpha}, beta={beta}")
        
        model = MPDistil(task_name='CB', num_labels=3, student_layers=6)
        
        history = model.fit(
            train_loader=cb_loaders['train'],
            val_loader=cb_loaders['val'],
            teacher_epochs=3,  # Reduce for quick search
            student_epochs=3,
            alpha=alpha,
            beta=beta,
            num_episodes=0
        )
        
        # Get final score
        final_metrics = history['phase2']['val_metrics'][-1]
        score = final_metrics.get('acc', 0)
        
        print(f"Score: {score:.4f}")
        
        if score > best_score:
            best_score = score
            best_params = {'alpha': alpha, 'beta': beta}

print(f"\nBest params: {best_params}")
print(f"Best score: {best_score:.4f}")

## 7. Checkpoint Resumption

In [None]:
# Save model state during training
config_resume = TrainingConfig(
    output_dir='./checkpoints',
    save_checkpoints=True,
    teacher_epochs=10,
    student_epochs=10
)

# Train (will save checkpoints)
history = model.fit(
    train_loader=cb_loaders['train'],
    val_loader=cb_loaders['val'],
    config=config_resume
)

# Later, load best checkpoint
# Note: Currently loads automatically at end of each phase

## Summary

Advanced features covered:
- ✅ Custom TrainingConfig with all hyperparameters
- ✅ Multi-task curriculum learning
- ✅ Reward and trajectory analysis
- ✅ W&B integration for experiment tracking
- ✅ Different model architectures
- ✅ Hyperparameter tuning strategies
- ✅ Checkpoint management