# English-Gujarati NMT: Evaluation and Report Generation

This notebook evaluates the trained models and generates comprehensive reports.

In [None]:
# Install dependencies
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
%pip install transformers tokenizers sentencepiece datasets sacrebleu pyyaml tqdm wandb requests matplotlib

In [None]:
# Setup
import sys
from pathlib import Path
import torch

sys.path.insert(0, str(Path.cwd()))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
# Evaluate student model
from src.utils.config import load_config
import sys
import os

config = load_config("config.yaml")
checkpoint_dir = Path(config['paths']['checkpoint_dir'])

# Find student checkpoint
student_checkpoint = checkpoint_dir / "best_model.pt"
if student_checkpoint.exists():
    print("Evaluating student model on test set...")
    # Save original argv
    original_argv = sys.argv.copy()
    try:
        sys.argv = ['evaluate.py', '--checkpoint', str(student_checkpoint), '--split', 'test']
        from scripts.evaluate import main
        main()
    finally:
        # Restore original argv
        sys.argv = original_argv
else:
    print(f"Student checkpoint not found at {student_checkpoint}")

In [None]:
# Generate translation examples
if 'student_checkpoint' in locals() and student_checkpoint.exists():
    print("\nGenerating translation examples...")
    original_argv = sys.argv.copy()
    try:
        sys.argv = ['translate_examples.py', '--checkpoint', str(student_checkpoint), '--num-examples', '10']
        from scripts.translate_examples import main
        main()
    finally:
        sys.argv = original_argv
else:
    print("Skipping translation examples (checkpoint not found)")

In [None]:
# Generate comprehensive report
from scripts.generate_report import generate_report

print("Generating evaluation report...")
report = generate_report("config.yaml")
print("\nReport generated successfully!")

In [None]:
# Visualize results
print("Generating visualizations...")
original_argv = sys.argv.copy()
try:
    sys.argv = ['visualize_results.py']
    from scripts.visualize_results import main
    main()
finally:
    sys.argv = original_argv
print("\nVisualizations generated!")

In [None]:
# Compare teacher and student models (if both exist)
teacher_ckpt = config.get('distillation', {}).get('teacher_checkpoint')
if teacher_ckpt and Path(teacher_ckpt).exists() and 'student_checkpoint' in locals() and student_checkpoint.exists():
    print("\nComparing teacher and student models...")
    original_argv = sys.argv.copy()
    try:
        sys.argv = [
            'model_comparison.py',
            '--teacher-checkpoint', str(teacher_ckpt),
            '--student-checkpoint', str(student_checkpoint),
            '--split', 'test'
        ]
        from scripts.model_comparison import main
        main()
    finally:
        sys.argv = original_argv
else:
    print("Skipping model comparison (need both teacher and student checkpoints)")
    if not teacher_ckpt:
        print("  - Teacher checkpoint path not set in config")
    elif not Path(teacher_ckpt).exists():
        print(f"  - Teacher checkpoint not found at: {teacher_ckpt}")
    elif 'student_checkpoint' not in locals() or not student_checkpoint.exists():
        print(f"  - Student checkpoint not found at: {student_checkpoint if 'student_checkpoint' in locals() else 'N/A'}")

## Evaluation Complete!

**Generated Files:**
- `checkpoints/evaluation_report.json` - Comprehensive evaluation report
- `checkpoints/predictions_test.txt` - Test set predictions
- `checkpoints/translation_examples.txt` - Sample translations
- `checkpoints/training_curves.png` - Training visualization
- `checkpoints/model_comparison.png` - Model comparison charts
- `checkpoints/model_comparison.json` - Detailed comparison metrics

**Resume Metrics:**
Check the evaluation report for:
- Model size reduction percentage
- BLEU score retention
- Parameter counts
- Performance metrics