# Sound2Sheet Training Pipeline

Complete training pipeline for audio-to-sheet-music transcription model.

## Step 1: Check GPU and Install Dependencies

In [None]:
import torch

# Check GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# Clone repository
!git clone https://github.com/k-Dispersik/Sound2Sheet.git
%cd Sound2Sheet

In [None]:
# Install system dependencies
!apt-get update -qq
!apt-get install -y -qq fluidsynth fluid-soundfont-gm

In [None]:
# Install Python packages
!pip install -q torch torchaudio transformers librosa numpy pandas tqdm pyyaml \
    pretty_midi music21 matplotlib mido midi2audio accelerate scipy scikit-learn

print("Installation complete")

In [None]:
# Verify imports
from src.model import Sound2SheetModel, Trainer, create_dataloaders

print("All imports successful")

## Step 2: Generate Dataset

Configure and generate synthetic training data.

In [None]:
# Dataset configuration
SAMPLES = 10_000          # Total samples (train + val + test)
COMPLEXITY_DIST = "beginner:0.4,intermediate:0.5,advanced:0.1"  # Complexity distribution
EPOCHS = 50           # Training epochs
BATCH_SIZE = 64         # Batch size
LEARNING_RATE = 1e-5    # Learning rate
EXPERIMENT_NAME = "Sound2Sheet_Experiment"

print(f"Configuration:")
print(f"  Samples: {SAMPLES}")
print(f"  Complexity: {COMPLEXITY_DIST}")
print(f"  Epochs: {EPOCHS}")
print(f"  Batch size: {BATCH_SIZE}")
print(f"  Learning rate: {LEARNING_RATE}")

In [None]:
# Generate dataset
!python -m src.dataset.cli generate \
    --samples {SAMPLES} \
    --complexity-dist {COMPLEXITY_DIST} \
    --name {EXPERIMENT_NAME} \
    --output-dir data/{EXPERIMENT_NAME}

In [None]:
# Set dataset path (no versioned subdirectory anymore)
dataset_path = f"data/{EXPERIMENT_NAME}"
print(f"Dataset: {dataset_path}")

# Show dataset info
!python -m src.dataset.cli info {dataset_path}

## Step 3: Configure Model

Set up model architecture and training parameters.

In [None]:
from src.model.config import ModelConfig, TrainingConfig, DataConfig
from pathlib import Path

# Use dataset_path from previous step
# dataset_path was already set in the verification step above

# Model configuration
model_config = ModelConfig(
    vocab_size=128,
    hidden_size=256,
    num_decoder_layers=4,
    num_attention_heads=8,
    dropout=0.1,
    max_sequence_length=512,
    device=device
)

# Training configuration
training_config = TrainingConfig(
    learning_rate=LEARNING_RATE,
    batch_size=BATCH_SIZE,
    num_epochs=EPOCHS,
    optimizer='adamw',
    scheduler='cosine',
    use_mixed_precision=True,
    max_grad_norm=4.0,
    gradient_accumulation_steps=1,
    checkpoint_dir=f'data/{EXPERIMENT_NAME}/checkpoints',
    log_dir=f'data/{EXPERIMENT_NAME}/logs',
    save_every_n_epochs=10,
    early_stopping_patience=10
)

# Data configuration
data_config = DataConfig(
    sample_rate=16000,
    n_mels=128,
    dataset_dir=Path(dataset_path)
)

print("Model configuration:")
print(f"  Hidden size: {model_config.hidden_size}")
print(f"  Decoder layers: {model_config.num_decoder_layers}")
print(f"  Attention heads: {model_config.num_attention_heads}")
print(f"\nTraining configuration:")
print(f"  Learning rate: {training_config.learning_rate}")
print(f"  Batch size: {training_config.batch_size}")
print(f"  Epochs: {training_config.num_epochs}")
print(f"  Mixed precision: {training_config.use_mixed_precision}")
print(f"  Gradient accumulation: {training_config.gradient_accumulation_steps}")
print(f"\nDataset path: {dataset_path}")

## Step 4: Create Data Loaders

In [None]:
# Create dataloaders
train_loader, val_loader, test_loader = create_dataloaders(
    data_config, model_config, training_config
)

print(f"Dataset splits:")
print(f"  Train samples: {len(train_loader.dataset)}")
print(f"  Val samples: {len(val_loader.dataset)}")
print(f"  Test samples: {len(test_loader.dataset)}")

## Step 5: Initialize Model

In [None]:
# Create model
model = Sound2SheetModel(model_config, freeze_encoder=True)

# Count parameters
params = model.count_parameters()
print(f"Model parameters:")
print(f"  Total: {params['total']:,}")
print(f"  Trainable: {params['trainable']:,}")
print(f"  Frozen: {params['frozen']:,}")

## Step 6: Train Model

Start training with progress tracking.

In [None]:
# Create trainer
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    model_config=model_config,
    training_config=training_config
)

# Train
print("Starting training...")
trainer.train()
print("Training complete")

## Step 7: Visualize Training Results

In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from IPython.display import display, HTML

# Load training history
history_file = Path(f'data/{EXPERIMENT_NAME}/logs/training_history.json')
if history_file.exists():
    with open(history_file, 'r') as f:
        history = json.load(f)
    
    # Create comprehensive visualization
    fig = plt.figure(figsize=(18, 10))
    
    # 1. Loss curves (top left)
    ax1 = plt.subplot(2, 3, 1)
    epochs = range(1, len(history['train_losses']) + 1)
    ax1.plot(epochs, history['train_losses'], 'b-', label='Train Loss', linewidth=2, marker='o', markersize=3)
    ax1.plot(epochs, history['val_losses'], 'r-', label='Val Loss', linewidth=2, marker='s', markersize=3)
    ax1.set_xlabel('Epoch', fontsize=12)
    ax1.set_ylabel('Loss', fontsize=12)
    ax1.set_title('Training & Validation Loss', fontsize=14, fontweight='bold')
    ax1.legend(fontsize=10)
    ax1.grid(True, alpha=0.3)
    
    # Mark best epoch
    best_epoch = history['val_losses'].index(min(history['val_losses']))
    ax1.axvline(x=best_epoch + 1, color='green', linestyle='--', alpha=0.7, label=f'Best: Epoch {best_epoch + 1}')
    ax1.scatter([best_epoch + 1], [history['val_losses'][best_epoch]], color='green', s=100, zorder=5, marker='*')
    
    # 2. Accuracy (top middle)
    ax2 = plt.subplot(2, 3, 2)
    ax2.plot(epochs, [acc * 100 for acc in history['val_accuracy']], 'g-', label='Val Accuracy', linewidth=2, marker='d', markersize=3)
    ax2.set_xlabel('Epoch', fontsize=12)
    ax2.set_ylabel('Accuracy (%)', fontsize=12)
    ax2.set_title('Validation Accuracy', fontsize=14, fontweight='bold')
    ax2.legend(fontsize=10)
    ax2.grid(True, alpha=0.3)
    ax2.scatter([best_epoch + 1], [history['val_accuracy'][best_epoch] * 100], color='green', s=100, zorder=5, marker='*')
    
    # 3. Learning Rate (top right)
    ax3 = plt.subplot(2, 3, 3)
    if 'learning_rate' in history:
        ax3.plot(epochs, history['learning_rate'], 'm-', linewidth=2)
        ax3.set_xlabel('Epoch', fontsize=12)
        ax3.set_ylabel('Learning Rate', fontsize=12)
        ax3.set_title('Learning Rate Schedule', fontsize=14, fontweight='bold')
        ax3.set_yscale('log')
        ax3.grid(True, alpha=0.3)
    else:
        ax3.text(0.5, 0.5, 'LR data not available', ha='center', va='center', fontsize=12)
        ax3.set_title('Learning Rate Schedule', fontsize=14, fontweight='bold')
    
    # 4. Loss difference (bottom left)
    ax4 = plt.subplot(2, 3, 4)
    loss_diff = [abs(t - v) for t, v in zip(history['train_losses'], history['val_losses'])]
    ax4.plot(epochs, loss_diff, 'orange', linewidth=2, marker='x', markersize=4)
    ax4.set_xlabel('Epoch', fontsize=12)
    ax4.set_ylabel('|Train Loss - Val Loss|', fontsize=12)
    ax4.set_title('Overfitting Indicator', fontsize=14, fontweight='bold')
    ax4.grid(True, alpha=0.3)
    ax4.axhline(y=0.1, color='red', linestyle='--', alpha=0.5, label='Threshold: 0.1')
    ax4.legend(fontsize=10)
    
    # 5. Loss improvement (bottom middle)
    ax5 = plt.subplot(2, 3, 5)
    val_loss_improvement = [history['val_losses'][0] - v for v in history['val_losses']]
    ax5.plot(epochs, val_loss_improvement, 'purple', linewidth=2, marker='o', markersize=3)
    ax5.set_xlabel('Epoch', fontsize=12)
    ax5.set_ylabel('Improvement from Initial', fontsize=12)
    ax5.set_title('Val Loss Improvement', fontsize=14, fontweight='bold')
    ax5.grid(True, alpha=0.3)
    ax5.axhline(y=0, color='black', linestyle='-', alpha=0.3)
    
    # 6. Statistics table (bottom right)
    ax6 = plt.subplot(2, 3, 6)
    ax6.axis('off')
    
    stats_data = [
        ['Metric', 'Value'],
        ['Total Epochs', f"{len(epochs)}"],
        ['Best Epoch', f"{best_epoch + 1}"],
        ['Best Val Loss', f"{min(history['val_losses']):.4f}"],
        ['Best Val Accuracy', f"{max(history['val_accuracy']):.2%}"],
        ['Final Train Loss', f"{history['train_losses'][-1]:.4f}"],
        ['Final Val Loss', f"{history['val_losses'][-1]:.4f}"],
        ['Improvement', f"{(history['val_losses'][0] - min(history['val_losses'])):.4f}"],
    ]
    
    table = ax6.table(cellText=stats_data, cellLoc='left', loc='center',
                      colWidths=[0.6, 0.4], bbox=[0, 0, 1, 1])
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1, 2)
    
    # Style header row
    for i in range(2):
        table[(0, i)].set_facecolor('#4CAF50')
        table[(0, i)].set_text_props(weight='bold', color='white')
    
    # Alternate row colors
    for i in range(1, len(stats_data)):
        for j in range(2):
            if i % 2 == 0:
                table[(i, j)].set_facecolor('#f0f0f0')
    
    plt.tight_layout()
    plt.show()
    
    # Print detailed summary
    print("\n" + "="*60)
    print("üìä TRAINING SUMMARY")
    print("="*60)
    print(f"‚úì Training completed after {len(epochs)} epochs")
    print(f"‚úì Best model at epoch {best_epoch + 1}")
    print(f"\nüìà Best Metrics:")
    print(f"  ‚Ä¢ Validation Loss: {min(history['val_losses']):.4f}")
    print(f"  ‚Ä¢ Validation Accuracy: {max(history['val_accuracy']):.2%}")
    print(f"\nüìâ Final Metrics:")
    print(f"  ‚Ä¢ Train Loss: {history['train_losses'][-1]:.4f}")
    print(f"  ‚Ä¢ Val Loss: {history['val_losses'][-1]:.4f}")
    print(f"  ‚Ä¢ Val Accuracy: {history['val_accuracy'][-1]:.2%}")
else:
    print("‚ùå Training history not found at checkpoints/training_history.json")

## Step 8: Continue Training from Best Model

In [None]:
# Load best model and continue training
checkpoint = torch.load(f'data/{EXPERIMENT_NAME}/checkpoints/best_model.pt', map_location=device, weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])

# New config for continued training
CONTINUE_EPOCHS = 30
NEW_LR = 5e-5

continued_config = TrainingConfig(
    learning_rate=NEW_LR,
    batch_size=BATCH_SIZE,
    num_epochs=CONTINUE_EPOCHS,
    optimizer='adamw',
    scheduler='cosine',
    use_mixed_precision=True,
    max_grad_norm=1.0,
    gradient_accumulation_steps=1,
    checkpoint_dir=f'data/{EXPERIMENT_NAME}/checkpoints',
    save_every_n_epochs=10,
    early_stopping_patience=20
)

# Train
trainer = Trainer(model, train_loader, val_loader, model_config, continued_config)
print("Starting continued training...")
trainer.train()
print("Continued training complete!")

  self.scaler = GradScaler() if training_config.use_mixed_precision else None
  super().__init__(


Starting continued training...


  with autocast():
Epoch 1: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:04<00:00,  4.12s/it, loss=4.69]
Validation: 0it [00:00, ?it/s]


Continued training complete!
