# Initial Project Setup

In [1]:
import torch
# midiutil
# Check GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

Device: cpu


### Install Python packages

In [None]:
# Clone repository
!git clone https://github.com/k-Dispersik/Sound2Sheet.git
%cd Sound2Sheet

In [None]:
# Install system dependencies
!apt-get update -qq
!apt-get install -y -qq fluidsynth fluid-soundfont-gm

In [None]:
# Install Python packages
!pip install -r requirements.txt
print("Installation complete")

In [2]:
# Verify imports
from src.model import Sound2SheetModel, Trainer, create_dataloaders

print("All imports successful")

  from .autonotebook import tqdm as notebook_tqdm


All imports successful


# Training Pipeline

Pipeline for training Sound2Sheet model with synthetic piano data.

In [3]:
# Import pipeline functions
from run_pipeline import (
    read_config,
    generate_synthetic_data,
    create_model_config,
    create_loader,
    create_model,
    run_train
)

print("Pipeline functions imported successfully")

Pipeline functions imported successfully


## Load Configuration

Read experiment settings from `config.json`.

In [4]:
# Load configuration
config = read_config("config.json")

print(f"Experiment: {config['experiment_name']}")
print(f"Dataset samples: {config['dataset']['total_samples']}")
print(f"Epochs: {config['training']['num_epochs']}")
print(f"Batch size: {config['training']['batch_size']}")

Experiment: data/sound2sheet_experiment_01
Dataset samples: 10
Epochs: 3
Batch size: 32


## Step 1: Generate Synthetic Data

Create training dataset with MIDI and audio files.

In [5]:
%%time

data = generate_synthetic_data(
    total_samples=config["dataset"]["total_samples"],
    complexity_distribution=config["dataset"]["complexity_distribution"],
    output_dir=config["experiment_name"]
)

print(f"✓ Dataset generated: {config['experiment_name']}")

Generating train: 100%|██████████| 7/7 [00:02<00:00,  3.07sample/s, complexity=int, tempo=67] 
Generating train: 100%|██████████| 7/7 [00:02<00:00,  3.07sample/s, complexity=int, tempo=67]
Generating val: 100%|██████████| 1/1 [00:00<00:00,  4.68sample/s, complexity=beg, tempo=94]
Generating val: 100%|██████████| 1/1 [00:00<00:00,  4.68sample/s, complexity=beg, tempo=94]
Generating test: 100%|██████████| 2/2 [00:00<00:00,  5.55sample/s, complexity=beg, tempo=172]

✓ Dataset generated: data/sound2sheet_experiment_01
CPU times: user 214 ms, sys: 53.9 ms, total: 268 ms
Wall time: 2.86 s





## Step 2: Create DataLoaders

Prepare train/validation/test dataloaders.

In [6]:
# Create model config
model_config = create_model_config(config, device)

# Create dataloaders
loaders, training_config = create_loader(config, model_config)
train_loader, val_loader, test_loader = loaders

print(f"✓ Train batches: {len(train_loader)}")
print(f"✓ Val batches: {len(val_loader)}")
print(f"✓ Test batches: {len(test_loader)}")

✓ Train batches: 1
✓ Val batches: 1
✓ Test batches: 1


## Step 3: Create Model

Initialize Sound2Sheet model (AST encoder + Piano Roll Classifier).

In [8]:
model = create_model(config, device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"✓ Model created on {device}")
print(f"  Total parameters: {total_params:,}")
print(f"  Trainable parameters: {trainable_params:,}")

✓ Model created on cpu
  Total parameters: 88,047,704
  Trainable parameters: 1,860,440


## Step 4: Train Model

Run training loop with mixed precision and checkpointing.

In [9]:
%%time

trainer = run_train(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    model_config=model_config,
    training_config=training_config
)

print("✓ Training complete!")
print(f"  Best model saved to: {training_config.checkpoint_dir}")

Epoch 1: 100%|██████████| 1/1 [00:38<00:00, 38.46s/it, loss=0.72]
Validation:   0%|          | 0/1 [00:00<?, ?it/s]
Validation: 100%|██████████| 1/1 [00:01<00:00,  1.98s/it]

Epoch 2: 100%|██████████| 1/1 [00:37<00:00, 37.59s/it, loss=0.72]
Validation:   0%|          | 0/1 [00:00<?, ?it/s]
Validation: 100%|██████████| 1/1 [00:02<00:00,  2.07s/it]
Validation: 100%|██████████| 1/1 [00:02<00:00,  2.07s/it]
Epoch 3: 100%|██████████| 1/1 [00:38<00:00, 38.51s/it, loss=0.72]
Epoch 3: 100%|██████████| 1/1 [00:38<00:00, 38.51s/it, loss=0.72]
Validation: 100%|██████████| 1/1 [00:02<00:00,  2.16s/it]



✓ Training complete!
  Best model saved to: data/sound2sheet_experiment_01/checkpoints
CPU times: user 9min 57s, sys: 5.8 s, total: 10min 3s
Wall time: 2min 2s


## Training Metrics

View training history and loss curves.

In [11]:
import matplotlib.pyplot as plt

# Plot training history
if hasattr(trainer, 'history'):
    history = trainer.history
    
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # Loss
    axes[0].plot(history['train_loss'], label='Train Loss')
    axes[0].plot(history['val_loss'], label='Val Loss')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].set_title('Training Loss')
    axes[0].legend()
    axes[0].grid(True)
    
    # Accuracy
    if 'train_acc' in history:
        axes[1].plot(history['train_acc'], label='Train Acc')
        axes[1].plot(history['val_acc'], label='Val Acc')
        axes[1].set_xlabel('Epoch')
        axes[1].set_ylabel('Accuracy')
        axes[1].set_title('Training Accuracy')
        axes[1].legend()
        axes[1].grid(True)
    
    plt.tight_layout()
    plt.show()
    
    print(f"Best epoch: {history['best_epoch']}")
    print(f"Best val loss: {history['best_val_loss']:.4f}")