# Music Feature Extraction and Deep Learning Model

This notebook demonstrates feature extraction from sheet music using `music21` and a Siamese ResNet model for page matching.

In [65]:
# Import required libraries
import sys
sys.path.append('../src')

import numpy as np
import torch
from music21 import corpus

from features import MeasureFeatureExtractor, load_and_extract_features
from model import create_model, SiamesePageMatcher

## 1. Feature Extraction

First, let's extract features from a Bach chorale using our `MeasureFeatureExtractor`.

In [66]:
# Load a sample score from the music21 corpus
score = corpus.parse('bach/bwv65.2.xml')

# Create feature extractor
extractor = MeasureFeatureExtractor()

# Extract features from the score
features = extractor.extract_from_score(score)

print(f"Extracted features shape: {features.shape}")
print(f"Number of measures: {features.shape[0]}")
print(f"Features per measure: {features.shape[1]}")

Feature dimension: 25
Extracted features shape: (17, 25)
Number of measures: 17
Features per measure: 25


In [67]:
# Display feature names
feature_names = extractor.get_feature_names()
print("\nFeature names:")
for i, name in enumerate(feature_names):
    print(f"  {i:2d}. {name}")


Feature names:
   0. mean_pitch
   1. pitch_range
   2. pitch_std
   3. min_pitch
   4. max_pitch
   5. pc_C
   6. pc_C#
   7. pc_D
   8. pc_D#
   9. pc_E
  10. pc_F
  11. pc_F#
  12. pc_G
  13. pc_G#
  14. pc_A
  15. pc_A#
  16. pc_B
  17. note_density
  18. mean_duration
  19. duration_std
  20. min_duration
  21. max_duration
  22. key_signature
  23. time_sig_numerator
  24. time_sig_denominator


In [68]:
# Show features for the first measure
print("\nFirst measure features:")
for name, value in zip(feature_names, features[0]):
    print(f"  {name:25s}: {value:.3f}")


First measure features:
  mean_pitch               : 69.000
  pitch_range              : 0.000
  pitch_std                : 0.000
  min_pitch                : 69.000
  max_pitch                : 69.000
  pc_C                     : 0.000
  pc_C#                    : 0.000
  pc_D                     : 0.000
  pc_D#                    : 0.000
  pc_E                     : 0.000
  pc_F                     : 0.000
  pc_F#                    : 0.000
  pc_G                     : 0.000
  pc_G#                    : 0.000
  pc_A                     : 1.000
  pc_A#                    : 0.000
  pc_B                     : 0.000
  note_density             : 1.000
  mean_duration            : 1.000
  duration_std             : 0.000
  min_duration             : 1.000
  max_duration             : 1.000
  key_signature            : 0.000
  time_sig_numerator       : 3.000
  time_sig_denominator     : 4.000


## 2. Deep Learning Model

Now let's create and test the Siamese ResNet model for page matching.

In [69]:
# Create model configuration
config = {
    'feature_dim': 25,
    'embedding_dim': 128,
    'num_blocks_per_stage': (2, 2, 2, 2),  # ResNet-18 style
    'base_channels': 64,
    'similarity_metric': 'cosine'
}

# Initialize model
model = create_model(config)

# Count parameters
num_params = sum(p.numel() for p in model.parameters())
print(f"Model created with {num_params:,} parameters")

Model created with 3,920,320 parameters


### Test the model with extracted features

In [70]:
# Convert features to PyTorch tensor
# Add batch dimension and ensure we have 32 measures (pad if necessary)
target_measures = 32
current_measures = features.shape[0]

if current_measures < target_measures:
    # Pad with zeros
    padding = np.zeros((target_measures - current_measures, features.shape[1]), dtype=np.float32)
    features_padded = np.vstack([features, padding])
else:
    # Take first 32 measures
    features_padded = features[:target_measures]

# Convert to tensor and add batch dimension (ensure float32)
page_tensor = torch.from_numpy(features_padded).float().unsqueeze(0)
print(f"Page tensor shape: {page_tensor.shape}")  # Should be (1, 32, 25)
print(f"Page tensor dtype: {page_tensor.dtype}")  # Should be torch.float32

Page tensor shape: torch.Size([1, 32, 25])
Page tensor dtype: torch.float32


In [71]:
# Get embedding for the page
model.eval()  # Set to evaluation mode
with torch.no_grad():
    embedding = model.get_embeddings(page_tensor)
    print(f"Embedding shape: {embedding.shape}")  # Should be (1, 128)
    print(f"Embedding norm: {torch.norm(embedding).item():.3f}")

Embedding shape: torch.Size([1, 128])
Embedding norm: 76.741


In [72]:
# Test similarity between the page and itself (should be ~1.0)
with torch.no_grad():
    similarity = model(page_tensor, page_tensor)
    print(f"Similarity of page with itself: {similarity.item():.4f}")
    
# Test with a different random page (should be lower)
random_page = torch.randn(1, 32, 25)
with torch.no_grad():
    similarity_random = model(page_tensor, random_page)
    print(f"Similarity with random page: {similarity_random.item():.4f}")

Similarity of page with itself: 1.0000
Similarity with random page: 0.9599


## 3. Compare Multiple Bach Chorales

Let's extract features from multiple pieces and compare their embeddings.

In [73]:
# Load several Bach chorales
bach_pieces = [
    'bach/bwv65.2.xml',
    'bach/bwv66.6.xml',
    'bach/bwv69.6.xml'
]

embeddings_list = []
piece_names = []

for piece in bach_pieces:
    score = corpus.parse(piece)
    feats = extractor.extract_from_score(score, max_measures=32)
    
    # Pad if necessary
    if feats.shape[0] < 32:
        padding = np.zeros((32 - feats.shape[0], feats.shape[1]), dtype=np.float32)
        feats = np.vstack([feats, padding])
    else:
        feats = feats[:32]
    
    # Convert to tensor (ensure float32)
    page_tensor = torch.from_numpy(feats).float().unsqueeze(0)
    
    # Get embedding
    with torch.no_grad():
        emb = model.get_embeddings(page_tensor)
        embeddings_list.append(emb)
    
    piece_names.append(piece.split('/')[-1])
    print(f"Processed {piece_names[-1]}")

Processed bwv65.2.xml
Processed bwv66.6.xml
Processed bwv69.6.xml


In [74]:
# Compute pairwise similarities
print("\nPairwise similarities:")
for i in range(len(embeddings_list)):
    for j in range(i, len(embeddings_list)):
        emb1 = embeddings_list[i]
        emb2 = embeddings_list[j]
        
        similarity = torch.nn.functional.cosine_similarity(emb1, emb2, dim=1)
        similarity = (similarity + 1) / 2  # Map to [0, 1]
        
        print(f"  {piece_names[i]} <-> {piece_names[j]}: {similarity.item():.4f}")


Pairwise similarities:
  bwv65.2.xml <-> bwv65.2.xml: 1.0000
  bwv65.2.xml <-> bwv66.6.xml: 0.9684
  bwv65.2.xml <-> bwv69.6.xml: 0.9762
  bwv66.6.xml <-> bwv66.6.xml: 1.0000
  bwv66.6.xml <-> bwv69.6.xml: 0.9507
  bwv69.6.xml <-> bwv69.6.xml: 1.0000


## 4. Training the Model

To train the model, we need to create a dataset of page pairs with labels indicating whether they match or not.

In [79]:
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import random

class PagePairDataset(Dataset):
    """Dataset of page pairs for training."""
    
    def __init__(self, scores, extractor, num_pairs=1000, target_measures=16, overlap=0):
        """
        Args:
            scores: List of music21 scores
            extractor: MeasureFeatureExtractor instance
            num_pairs: Number of training pairs to generate
            target_measures: Number of measures per page
            overlap: Number of measures to overlap between pages (for sliding window)
        """
        self.num_pairs = num_pairs
        self.target_measures = target_measures
        
        # Pre-extract multiple pages from each score using sliding window
        self.pages_by_piece = []  # List of lists: each inner list contains pages from one piece
        
        for score in scores:
            # Extract all features from the score
            all_feats = extractor.extract_from_score(score)
            
            # Create multiple pages using sliding window
            pages_from_this_piece = []
            stride = target_measures - overlap  # How much to shift the window
            
            for start_idx in range(0, all_feats.shape[0], stride):
                end_idx = start_idx + target_measures
                
                # Extract page
                if end_idx <= all_feats.shape[0]:
                    page = all_feats[start_idx:end_idx]
                else:
                    # Last page - pad if necessary
                    page = all_feats[start_idx:]
                    if page.shape[0] < target_measures:
                        padding = np.zeros((target_measures - page.shape[0], all_feats.shape[1]), dtype=np.float32)
                        page = np.vstack([page, padding])
                
                pages_from_this_piece.append(page)
            
            # Only add pieces that have at least 2 pages (so we can make positive pairs)
            if len(pages_from_this_piece) >= 2:
                self.pages_by_piece.append(pages_from_this_piece)
        
        if len(self.pages_by_piece) < 2:
            raise ValueError(f"Need at least 2 pieces with multiple pages, but only found {len(self.pages_by_piece)}. "
                           f"Try using longer pieces or reducing target_measures.")
        
        print(f"Loaded {len(self.pages_by_piece)} pieces with multiple pages")
        for i, pages in enumerate(self.pages_by_piece):
            print(f"  Piece {i}: {len(pages)} pages")
    
    def __len__(self):
        return self.num_pairs
    
    def __getitem__(self, idx):
        # Generate positive pair (same piece, different pages) or negative pair (different pieces)
        is_match = random.random() > 0.5
        
        if is_match:
            # Positive pair: different pages from the same piece
            piece_idx = random.randint(0, len(self.pages_by_piece) - 1)
            pages = self.pages_by_piece[piece_idx]
            
            # Select two different pages from this piece
            page_idx1, page_idx2 = random.sample(range(len(pages)), 2)
            page1 = pages[page_idx1]
            page2 = pages[page_idx2]
            label = 1.0
        else:
            # Negative pair: pages from different pieces
            piece_idx1, piece_idx2 = random.sample(range(len(self.pages_by_piece)), 2)
            
            page1 = random.choice(self.pages_by_piece[piece_idx1])
            page2 = random.choice(self.pages_by_piece[piece_idx2])
            label = 0.0
        
        return (
            torch.from_numpy(page1).float(),
            torch.from_numpy(page2).float(),
            torch.tensor(label, dtype=torch.float32)
        )

In [80]:
# Load multiple Bach chorales for training
print("Loading training data...")
train_pieces = [
    'bach/bwv65.2.xml',
    'bach/bwv66.6.xml',
    'bach/bwv69.6.xml',
    'bach/bwv4.8.xml',
    'bach/bwv5.7.xml',
    'bach/bwv6.6.xml',
    'bach/bwv7.7.xml',
    'bach/bwv10.7.xml'
]

train_scores = []
for piece in train_pieces:
    try:
        score = corpus.parse(piece)
        train_scores.append(score)
        print(f"  Loaded {piece}")
    except:
        print(f"  Failed to load {piece}")

print(f"\nLoaded {len(train_scores)} pieces for training")

Loading training data...
  Loaded bach/bwv65.2.xml
  Loaded bach/bwv66.6.xml
  Loaded bach/bwv69.6.xml
  Loaded bach/bwv4.8.xml
  Loaded bach/bwv5.7.xml
  Loaded bach/bwv6.6.xml
  Loaded bach/bwv7.7.xml
  Loaded bach/bwv10.7.xml

Loaded 8 pieces for training


In [81]:
# Create dataset and dataloader
train_dataset = PagePairDataset(train_scores, extractor, num_pairs=200, target_measures=16)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

print(f"Training dataset size: {len(train_dataset)} pairs")
print(f"Number of batches: {len(train_loader)}")

Loaded 4 pieces with multiple pages
  Piece 0: 2 pages
  Piece 1: 2 pages
  Piece 2: 2 pages
  Piece 3: 2 pages
Training dataset size: 200 pairs
Number of batches: 25


### Training Loop

In [None]:
# Training setup
model.train()  # Set to training mode
criterion = torch.nn.BCELoss()  # Binary cross-entropy loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
print("Starting training...")

for epoch in range(num_epochs):
    total_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (page1, page2, labels) in enumerate(train_loader):
        # Forward pass
        optimizer.zero_grad()
        similarity = model(page1, page2)
        
        # Compute loss
        loss = criterion(similarity, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        total_loss += loss.item()
        predictions = (similarity > 0.5).float()
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
    
    # Print epoch statistics
    avg_loss = total_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

print("\nTraining complete!")

Starting training...
Epoch [1/10] - Loss: 0.5759, Accuracy: 69.50%
Epoch [2/10] - Loss: 0.2733, Accuracy: 91.00%
Epoch [3/10] - Loss: 0.3389, Accuracy: 86.00%
Epoch [4/10] - Loss: 0.2704, Accuracy: 92.00%
Epoch [5/10] - Loss: 0.2761, Accuracy: 90.50%
Epoch [6/10] - Loss: 0.2683, Accuracy: 90.00%
Epoch [7/10] - Loss: 0.2601, Accuracy: 91.00%
Epoch [8/10] - Loss: 0.2916, Accuracy: 92.00%
Epoch [9/10] - Loss: 0.2915, Accuracy: 93.50%
Epoch [10/10] - Loss: 0.2810, Accuracy: 91.00%

Training complete!


## 5. Testing the Trained Model

Now let's test the model on new pieces it hasn't seen during training.

In [82]:
# Load test pieces (different from training)
print("Loading test data...")
test_pieces = [
    'bach/bwv11.6.xml',
    'bach/bwv12.7.xml',
    'bach/bwv13.6.xml',
    'bach/bwv14.5.xml'
]

test_scores = []
for piece in test_pieces:
    try:
        score = corpus.parse(piece)
        test_scores.append(score)
        print(f"  Loaded {piece}")
    except:
        print(f"  Failed to load {piece}")

print(f"\nLoaded {len(test_scores)} pieces for testing")

Loading test data...
  Loaded bach/bwv11.6.xml
  Loaded bach/bwv12.7.xml
  Loaded bach/bwv13.6.xml
  Loaded bach/bwv14.5.xml

Loaded 4 pieces for testing


In [84]:
# Create test dataset
test_dataset = PagePairDataset(test_scores, extractor, num_pairs=100)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

print(f"Test dataset size: {len(test_dataset)} pairs")

ValueError: Need at least 2 pieces with multiple pages, but only found 1. Try using longer pieces or reducing target_measures.

In [None]:
# Evaluate on test set
model.eval()  # Set to evaluation mode
test_loss = 0.0
correct = 0
total = 0

true_positives = 0
false_positives = 0
true_negatives = 0
false_negatives = 0

print("Evaluating model on test set...")

with torch.no_grad():
    for page1, page2, labels in test_loader:
        # Forward pass
        similarity = model(page1, page2)
        
        # Compute loss
        loss = criterion(similarity, labels)
        test_loss += loss.item()
        
        # Predictions
        predictions = (similarity > 0.5).float()
        correct += (predictions == labels).sum().item()
        total += labels.size(0)
        
        # Confusion matrix
        for pred, label in zip(predictions, labels):
            if pred == 1 and label == 1:
                true_positives += 1
            elif pred == 1 and label == 0:
                false_positives += 1
            elif pred == 0 and label == 0:
                true_negatives += 1
            elif pred == 0 and label == 1:
                false_negatives += 1

# Print results
avg_test_loss = test_loss / len(test_loader)
accuracy = 100 * correct / total
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

print(f"\nTest Results:")
print(f"  Loss: {avg_test_loss:.4f}")
print(f"  Accuracy: {accuracy:.2f}%")
print(f"  Precision: {precision:.4f}")
print(f"  Recall: {recall:.4f}")
print(f"  F1 Score: {f1:.4f}")
print(f"\nConfusion Matrix:")
print(f"  True Positives: {true_positives}")
print(f"  False Positives: {false_positives}")
print(f"  True Negatives: {true_negatives}")
print(f"  False Negatives: {false_negatives}")

Evaluating model on test set...


ValueError: Sample larger than population or is negative

### Visualize Test Results

In [None]:
# Test with specific examples
print("Testing specific piece comparisons:\n")

test_features = []
for i, score in enumerate(test_scores):
    feats = extractor.extract_from_score(score, max_measures=32)
    if feats.shape[0] < 32:
        padding = np.zeros((32 - feats.shape[0], feats.shape[1]), dtype=np.float32)
        feats = np.vstack([feats, padding])
    else:
        feats = feats[:32]
    test_features.append(feats)

# Compare all pairs
with torch.no_grad():
    for i in range(len(test_features)):
        for j in range(i, len(test_features)):
            page1 = torch.from_numpy(test_features[i]).float().unsqueeze(0)
            page2 = torch.from_numpy(test_features[j]).float().unsqueeze(0)
            
            similarity = model(page1, page2).item()
            
            piece1 = test_pieces[i].split('/')[-1]
            piece2 = test_pieces[j].split('/')[-1]
            
            match_type = "SAME" if i == j else "DIFF"
            print(f"[{match_type}] {piece1} <-> {piece2}: {similarity:.4f}")

Testing specific piece comparisons:

[SAME] bwv11.6.xml <-> bwv11.6.xml: 1.0000
[DIFF] bwv11.6.xml <-> bwv12.7.xml: 0.3361
[DIFF] bwv11.6.xml <-> bwv13.6.xml: 0.3294
[DIFF] bwv11.6.xml <-> bwv14.5.xml: 0.2468
[SAME] bwv12.7.xml <-> bwv12.7.xml: 1.0000
[DIFF] bwv12.7.xml <-> bwv13.6.xml: 0.9994
[DIFF] bwv12.7.xml <-> bwv14.5.xml: 0.2937
[SAME] bwv13.6.xml <-> bwv13.6.xml: 1.0000
[DIFF] bwv13.6.xml <-> bwv14.5.xml: 0.3048
[SAME] bwv14.5.xml <-> bwv14.5.xml: 1.0000


## 6. Save and Load the Model

In [None]:
# Save the trained model
model_path = '../models/page_matcher.pth'
import os
os.makedirs('../models', exist_ok=True)

torch.save({
    'model_state_dict': model.state_dict(),
    'config': config,
    'optimizer_state_dict': optimizer.state_dict(),
}, model_path)

print(f"Model saved to {model_path}")

Model saved to ../models/page_matcher.pth


In [None]:
# Load the model (example)
checkpoint = torch.load(model_path)

# Create new model instance
loaded_model = create_model(checkpoint['config'])
loaded_model.load_state_dict(checkpoint['model_state_dict'])
loaded_model.eval()

print("Model loaded successfully!")

# Test that it works
with torch.no_grad():
    test_input = torch.randn(1, 32, 25)
    output = loaded_model.get_embeddings(test_input)
    print(f"Test output shape: {output.shape}")

Model loaded successfully!
Test output shape: torch.Size([1, 128])
