# Sound2Sheet Training Pipeline

Complete training pipeline for audio-to-sheet-music transcription model.

## Step 1: Check GPU and Install Dependencies

In [None]:
import torch

# Check GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# Clone repository
!git clone https://github.com/k-Dispersik/Sound2Sheet.git
%cd Sound2Sheet

In [None]:
# Install system dependencies
!apt-get update -qq
!apt-get install -y -qq fluidsynth fluid-soundfont-gm

In [None]:
# Install Python packages
!pip install -q torch torchaudio transformers librosa numpy pandas tqdm pyyaml \
    pretty_midi music21 matplotlib mido midi2audio accelerate scipy scikit-learn

print("Installation complete")

In [None]:
# Verify imports
from src.dataset import DatasetGenerator, MIDIGenerator
from src.core import AudioProcessor
from src.model import Sound2SheetModel, Trainer, create_dataloaders
from src.converter import NoteBuilder
from src.evaluation import Evaluator

print("All imports successful")

## Step 2: Generate Dataset

Configure and generate synthetic training data.

In [None]:
# Dataset configuration
SAMPLES = 1000          # Total samples (train + val + test)
COMPLEXITY = "medium"   # simple, medium, complex
EPOCHS = 50             # Training epochs
BATCH_SIZE = 16         # Batch size
LEARNING_RATE = 1e-4    # Learning rate
EXPERIMENT_NAME = "training_run_1"

print(f"Configuration:")
print(f"  Samples: {SAMPLES}")
print(f"  Complexity: {COMPLEXITY}")
print(f"  Epochs: {EPOCHS}")
print(f"  Batch size: {BATCH_SIZE}")
print(f"  Learning rate: {LEARNING_RATE}")

In [None]:
# Generate dataset
!python -m src.dataset.cli generate \
    --samples {SAMPLES} \
    --complexity {COMPLEXITY} \
    --name {EXPERIMENT_NAME} \
    --output-dir data/datasets/{EXPERIMENT_NAME}

In [None]:
# Verify dataset
!python -m src.dataset.cli info \
    --dataset-dir data/datasets/{EXPERIMENT_NAME}

## Step 3: Configure Model

Set up model architecture and training parameters.

In [None]:
from src.model.config import ModelConfig, TrainingConfig, DataConfig
from pathlib import Path

# Model configuration
model_config = ModelConfig(
    vocab_size=128,
    hidden_size=256,
    num_decoder_layers=6,
    num_attention_heads=8,
    dropout=0.1,
    max_sequence_length=512,
    device=device
)

# Training configuration
training_config = TrainingConfig(
    learning_rate=LEARNING_RATE,
    batch_size=BATCH_SIZE,
    num_epochs=EPOCHS,
    optimizer='adamw',
    scheduler='cosine',
    use_amp=True,
    max_grad_norm=1.0,
    gradient_accumulation_steps=4,
    checkpoint_dir='checkpoints',
    save_every_n_epochs=0,  # Only save best and final
    early_stopping_patience=10
)

# Data configuration
data_config = DataConfig(
    sample_rate=16000,
    n_mels=128,
    max_audio_length=10.0,
    max_notes=512,
    manifest_dir=Path(f'data/datasets/{EXPERIMENT_NAME}')
)

print("Model configuration:")
print(f"  Hidden size: {model_config.hidden_size}")
print(f"  Decoder layers: {model_config.num_decoder_layers}")
print(f"  Attention heads: {model_config.num_attention_heads}")
print(f"\nTraining configuration:")
print(f"  Learning rate: {training_config.learning_rate}")
print(f"  Batch size: {training_config.batch_size}")
print(f"  Epochs: {training_config.num_epochs}")
print(f"  Mixed precision: {training_config.use_amp}")
print(f"  Gradient accumulation: {training_config.gradient_accumulation_steps}")

## Step 4: Create Data Loaders

In [None]:
# Create dataloaders
train_loader, val_loader, test_loader = create_dataloaders(
    data_config, model_config, training_config
)

print(f"Dataset splits:")
print(f"  Train samples: {len(train_loader.dataset)}")
print(f"  Val samples: {len(val_loader.dataset)}")
print(f"  Test samples: {len(test_loader.dataset)}")

## Step 5: Initialize Model

In [None]:
# Create model
model = Sound2SheetModel(model_config, freeze_encoder=True)

# Count parameters
params = model.count_parameters()
print(f"Model parameters:")
print(f"  Total: {params['total']:,}")
print(f"  Trainable: {params['trainable']:,}")
print(f"  Frozen: {params['frozen']:,}")

## Step 6: Train Model

Start training with progress tracking.

In [None]:
# Create trainer
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    model_config=model_config,
    training_config=training_config
)

# Train
print("Starting training...")
trainer.train()
print("Training complete")

## Step 7: Visualize Training Results

In [None]:
import json
import matplotlib.pyplot as plt
from pathlib import Path

# Load training history
history_file = Path('checkpoints/training_history.json')
if history_file.exists():
    with open(history_file, 'r') as f:
        history = json.load(f)
    
    # Plot training curves
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Loss
    axes[0].plot(history['train_loss'], label='Train Loss', linewidth=2)
    axes[0].plot(history['val_loss'], label='Val Loss', linewidth=2)
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].set_title('Training and Validation Loss')
    axes[0].legend()
    axes[0].grid(alpha=0.3)
    
    # Accuracy
    axes[1].plot(history['val_accuracy'], label='Val Accuracy', color='green', linewidth=2)
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy')
    axes[1].set_title('Validation Accuracy')
    axes[1].legend()
    axes[1].grid(alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Print best metrics
    best_epoch = history['val_loss'].index(min(history['val_loss']))
    print(f"Best results:")
    print(f"  Epoch: {best_epoch + 1}")
    print(f"  Val Loss: {history['val_loss'][best_epoch]:.4f}")
    print(f"  Val Accuracy: {history['val_accuracy'][best_epoch]:.4f}")
else:
    print("Training history not found")

## Step 8: Evaluate Model

Run evaluation on test set.

In [None]:
from src.evaluation import EvaluationConfig

# Load best model
checkpoint = torch.load('checkpoints/best_model.pt', map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# Evaluation configuration
eval_config = EvaluationConfig(
    onset_tolerance=0.05,
    offset_tolerance=0.05
)

# Create evaluator
evaluator = Evaluator(model, model_config, eval_config)

# Evaluate
print("Evaluating on test set...")
metrics = evaluator.evaluate(test_loader)

# Print results
print(f"\nEvaluation results:")
print(f"  Note Accuracy: {metrics['note_accuracy']:.2%}")
print(f"  Onset F1: {metrics['onset_f1']:.2%}")
print(f"  Offset F1: {metrics['offset_f1']:.2%}")
print(f"  Pitch Precision: {metrics['pitch_precision']:.2%}")
print(f"  Pitch Recall: {metrics['pitch_recall']:.2%}")
print(f"  Timing Deviation: {metrics['timing_deviation_ms']:.1f}ms")

## Step 9: Test Inference

Transcribe a sample from the test set.

In [None]:
from src.model.config import InferenceConfig
from src.converter import NoteBuilder

# Get a test sample
test_sample = test_loader.dataset[0]
mel_spec = test_sample['mel_spec'].unsqueeze(0).to(device)

# Inference configuration
inference_config = InferenceConfig(
    strategy='greedy',
    max_length=512
)

# Generate predictions
print("Transcribing sample...")
with torch.no_grad():
    predictions = model.generate(mel_spec, inference_config)

print(f"Predicted {len(predictions)} notes")
print(f"Notes: {predictions[:20]}...")

# Convert to MIDI
note_builder = NoteBuilder()
note_sequence = note_builder.build_from_predictions(
    predictions,
    quantize=True,
    quantization_resolution=16
)

# Save MIDI
note_sequence.to_midi('transcribed_sample.mid')
print("\nSaved: transcribed_sample.mid")

## Step 10: Save Final Model

Save the trained model checkpoint.

In [None]:
# Model is already saved in checkpoints/
# best_model.pt - best model by validation loss
# final_model.pt - final model after all epochs

print("Model files:")
print("  checkpoints/best_model.pt")
print("  checkpoints/final_model.pt")
print("  checkpoints/training_history.json")
print("\nDownload these files to use the model locally")

## Optional: Resume Training

If training was interrupted, resume from checkpoint.

In [None]:
# Resume training example (uncomment to use)
"""
# Load checkpoint
checkpoint = torch.load('checkpoints/best_model.pt', map_location=device)

# Create new trainer with loaded model
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    model_config=model_config,
    training_config=training_config,
    resume_from='checkpoints/best_model.pt'
)

# Continue training
trainer.train()
"""
print("Resume training code available (commented out)")

## Optional: Custom Configuration

Adjust hyperparameters for different experiments.

In [None]:
# Example: Larger model
"""
model_config = ModelConfig(
    hidden_size=512,
    num_decoder_layers=8,
    num_attention_heads=16,
    dropout=0.1
)

# Example: More aggressive training
training_config = TrainingConfig(
    learning_rate=5e-5,
    batch_size=8,
    gradient_accumulation_steps=16,
    num_epochs=100
)
"""
print("Custom configuration examples available (commented out)")

## Training Complete

Model is trained and ready for use. Download checkpoint files to use locally.