## 1. Installation

Install MPDistil from GitHub:

In [None]:
!pip install git+https://github.com/YOUR_USERNAME/mpdistil.git -q

## 2. Import Libraries

In [None]:
from mpdistil import MPDistil, load_superglue_dataset
import torch

print(f"Using GPU: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")

## 3. Load SuperGLUE Dataset

We'll use the CB (CommitmentBank) task as an example.

In [None]:
# Load CB dataset
loaders, num_labels = load_superglue_dataset(
    task_name='CB',
    tokenizer_name='bert-base-uncased',
    max_length=128,
    batch_size=8
)

print(f"Number of labels: {num_labels}")
print(f"Available splits: {list(loaders.keys())}")

## 4. Initialize MPDistil Model

Create a 6-layer BERT student to learn from a 12-layer BERT teacher.

In [None]:
model = MPDistil(
    task_name='CB',
    num_labels=num_labels,
    teacher_model='bert-base-uncased',  # 12 layers
    student_model='bert-base-uncased',  # Will be reduced to 6 layers
    student_layers=6,
    device='auto'  # Automatically uses GPU if available
)

## 5. Train the Model

Train with all 4 phases:
1. Teacher fine-tuning
2. Student knowledge distillation
3. Meta-teacher learning
4. Curriculum learning (skipped if no meta_loaders)

In [None]:
# Train (reduced epochs for quick demo)
history = model.fit(
    train_loader=loaders['train'],
    val_loader=loaders['val'],
    test_loader=loaders['test'],
    teacher_epochs=3,   # Reduce for demo (use 10 for real training)
    student_epochs=3,   # Reduce for demo (use 10 for real training)
    num_episodes=0      # Skip phase 4 for quick demo
)

## 6. Evaluate Results

In [None]:
# Check training history
print("\nPhase 1 (Teacher) Final Metrics:")
if 'phase1' in history:
    print(history['phase1']['val_metrics'][-1])

print("\nPhase 2 (Student PKD) Final Metrics:")
print(history['phase2']['val_metrics'][-1])

## 7. Make Predictions

In [None]:
# Generate predictions on test set
predictions = model.predict(loaders['test'])

print(f"Generated {len(predictions)} predictions")
print(f"First 10 predictions: {predictions[:10]}")

## 8. Save the Student Model

In [None]:
# Save student model
model.save_student('./my_student_cb')

# Save predictions
model.save_predictions(
    predictions,
    './cb_predictions.tsv',
    label_mapping={0: 'entailment', 1: 'contradiction', 2: 'neutral'}
)

## 9. Load and Use Saved Model

In [None]:
# Create new model instance
new_model = MPDistil(
    task_name='CB',
    num_labels=3
)

# Load saved student
new_model.load_student('./my_student_cb')

# Use for predictions
new_predictions = new_model.predict(loaders['test'])
print(f"Predictions match: {predictions == new_predictions}")

## Next Steps

- Try other SuperGLUE tasks: RTE, BoolQ, COPA, WiC, WSC
- Experiment with different hyperparameters
- Use meta-learning with multiple tasks (see 03_advanced_config.ipynb)
- Train on your own custom datasets (see 02_custom_data.ipynb)