In this project, our goal is to predict the next note given the previous notes.

In [1]:
# # Install PyTorch (if needed)
# !pip install torch torchvision torchaudio --quiet

In [2]:
# !pip install music21 --quiet

In [3]:
!git clone https://github.com/jukedeck/nottingham-dataset.git

Cloning into 'nottingham-dataset'...
remote: Enumerating objects: 3119, done.[K
remote: Total 3119 (delta 0), reused 0 (delta 0), pack-reused 3119 (from 1)[K
Receiving objects: 100% (3119/3119), 879.17 KiB | 2.27 MiB/s, done.
Resolving deltas: 100% (1432/1432), done.


In [4]:
# !pip install torch scikit-learn --quiet

In [5]:
# STEP 3: Parse melody for next-note prediction
import os
from music21 import converter, note
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset, DataLoader

# Parse melodies and build note sequences
def parse_melody_sequences(folder_path, max_files=50):
    note_seqs = []
    file_list = sorted([f for f in os.listdir(folder_path) if f.endswith('.abc')])[:max_files]
    for file in file_list:
        try:
            score = converter.parse(os.path.join(folder_path, file))
            notes = [el.name for el in score.flat.notes if isinstance(el, note.Note)]
            if len(notes) > 10:
                note_seqs.append(notes)
        except:
            continue
    return note_seqs

dataset_path = "/content/nottingham-dataset/ABC_cleaned"
melody_sequences = parse_melody_sequences(dataset_path)

# Encode notes
all_notes = sorted(set(nt for seq in melody_sequences for nt in seq))
note_encoder = LabelEncoder().fit(all_notes)
encoded_sequences = [note_encoder.transform(seq) for seq in melody_sequences]

# Create (input, target) pairs for next-note prediction
seq_length = 8
input_seqs, target_seqs = [], []
for seq in encoded_sequences:
    for i in range(len(seq) - seq_length):
        input_seqs.append(seq[i:i+seq_length])
        target_seqs.append(seq[i+seq_length])

class MelodyContinuationDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets
    def __len__(self):
        return len(self.inputs)
    def __getitem__(self, idx):
        return torch.tensor(self.inputs[idx], dtype=torch.long), torch.tensor(self.targets[idx], dtype=torch.long)

dataset = MelodyContinuationDataset(input_seqs, target_seqs)
loader = DataLoader(dataset, batch_size=32, shuffle=True)


  return self.iter().getElementsByClass(classFilterList)


Because this model is already training from scratch, on a relatively small dataset. So:

	•	There’s no pretrained model to fine-tune in this case
	•	We train the entire LSTM end-to-end from the Nottingham melodies
	•	“Fine-tuning” usually refers to taking a pretrained model (like GPT, BERT, MusicGen, etc.) and adapting it to a new task

In [6]:
# STEP 4: Define LSTM model
import torch.nn as nn

class NextNoteLSTM(nn.Module):
    def __init__(self, note_vocab):
        super().__init__()
        self.embed = nn.Embedding(note_vocab, 32)
        self.lstm = nn.LSTM(32, 64, batch_first=True)
        self.fc = nn.Linear(64, note_vocab)

    def forward(self, x):
        x = self.embed(x)
        x, _ = self.lstm(x)
        return self.fc(x[:, -1, :])  # predict only the next note

model = NextNoteLSTM(len(all_notes))
loss_fn = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.005)


### The current features are:
	•	A sequence of 8 previous notes
	•	Encoded as integers using LabelEncoder
	•	Each note is one of the 21 unique note names (C, F#, A-, etc.)

### Why these features?
	•	The previous notes define musical context
	•	Music has strong sequential structure (e.g., scales, motifs, repetition)
	•	An LSTM can learn patterns like “C → D → E → F → probably G”

In [7]:
correct = 0
total = 0

# STEP 5: Train the model
for epoch in range(15):
    total_loss = 0
    for x, y in loader:
        opt.zero_grad()
        out = model(x)
        loss = loss_fn(out, y)
        loss.backward()
        opt.step()
        total_loss += loss.item()
        # Calculate accuracy
        preds = out.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)
    avg_loss = total_loss / len(loader)
    accuracy = correct / total
    print(f"Epoch {epoch+1} - Avg Loss: {avg_loss:.4f} - Accuracy: {accuracy:.4f}")


Epoch 1 - Avg Loss: 1.7527 - Accuracy: 0.3490
Epoch 2 - Avg Loss: 1.6901 - Accuracy: 0.3626
Epoch 3 - Avg Loss: 1.6647 - Accuracy: 0.3706
Epoch 4 - Avg Loss: 1.6460 - Accuracy: 0.3764
Epoch 5 - Avg Loss: 1.6299 - Accuracy: 0.3812
Epoch 6 - Avg Loss: 1.6187 - Accuracy: 0.3853
Epoch 7 - Avg Loss: 1.6063 - Accuracy: 0.3889
Epoch 8 - Avg Loss: 1.5948 - Accuracy: 0.3922
Epoch 9 - Avg Loss: 1.5872 - Accuracy: 0.3950
Epoch 10 - Avg Loss: 1.5787 - Accuracy: 0.3976
Epoch 11 - Avg Loss: 1.5685 - Accuracy: 0.4001
Epoch 12 - Avg Loss: 1.5639 - Accuracy: 0.4024
Epoch 13 - Avg Loss: 1.5602 - Accuracy: 0.4044
Epoch 14 - Avg Loss: 1.5558 - Accuracy: 0.4064
Epoch 15 - Avg Loss: 1.5481 - Accuracy: 0.4082


In [8]:
print("Number of unique notes:", len(all_notes))

Number of unique notes: 21


In [9]:
all_notes

['A',
 'A#',
 'A-',
 'B',
 'B#',
 'B-',
 'C',
 'C#',
 'C-',
 'D',
 'D#',
 'D-',
 'E',
 'E#',
 'E-',
 'F',
 'F#',
 'F-',
 'G',
 'G#',
 'G-']

In [10]:
# STEP 6: Generate 3 variations of melody
from music21 import stream, note as m21note
import torch.nn.functional as F
import random

for i in range(1, 4):
    with torch.no_grad():
        start = random.choice(input_seqs)
        generated = list(start)
        for _ in range(32):
            input_tensor = torch.tensor([generated[-seq_length:]]).long()
            probs = F.softmax(model(input_tensor), dim=-1)
            next_note = torch.multinomial(probs, num_samples=1).item()
            generated.append(next_note)
        decoded = note_encoder.inverse_transform(generated)

        # Save each version
        s = stream.Stream()
        for n in decoded:
            s.append(m21note.Note(n))
        s.write('midi', fp=f'symbolic_conditioned_{i}.mid')


In [11]:
# !pip install fluidsynth

In [12]:
import fluidsynth
import os

# Convert MIDI to audio using the fluidsynth command-line tool
# Ensure the output directory exists if needed
output_dir = "/content"
os.makedirs(output_dir, exist_ok=True)

midi_file = "/content/symbolic_conditioned_1.mid"
output_wav = os.path.join(output_dir, "output.wav")
soundfont_path = "/usr/share/sounds/sf2/FluidR3_GM.sf2"  # default SoundFont

command = f"fluidsynth -ni -a alsa -m alsa_seq {soundfont_path} {midi_file} -F {output_wav}"

# Execute the command
# The '!' prefix in a Jupyter notebook executes the command in the shell
!{command}

# Play the result
import IPython.display as ipd

# Correct playback from file
ipd.Audio(filename="/content/output.wav")

ModuleNotFoundError: No module named 'fluidsynth'

In [None]:
midi_file = "/content/symbolic_conditioned_2.mid"

# Execute the command
# The '!' prefix in a Jupyter notebook executes the command in the shell
!{command}

# Play the result
import IPython.display as ipd

# Correct playback from file
ipd.Audio(filename="/content/output.wav")

In [None]:
midi_file = "/content/symbolic_conditioned_3.mid"

# Execute the command
# The '!' prefix in a Jupyter notebook executes the command in the shell
!{command}

# Play the result
import IPython.display as ipd

# Correct playback from file
ipd.Audio(filename="/content/output.wav")

### Add All Useful Features

From each note:

	•	Pitch height: note.pitch.midi
	•	Rhythmic duration: note.quarterLength
	•	Interval: difference between pitch[i] and pitch[i-1]
	•	(Optional) Key: use score.analyze('key')
	•	(Optional) Time signature: extract from score

In [None]:
# STEP 3: Parse melody for pitch, duration, and interval features
import os
from music21 import converter, note
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Parse melodies and extract features
def parse_feature_sequences(folder_path, max_files=50):
    pitch_seqs = []
    duration_seqs = []
    interval_seqs = []
    file_list = sorted([f for f in os.listdir(folder_path) if f.endswith('.abc')])[:max_files]
    for file in file_list:
        try:
            score = converter.parse(os.path.join(folder_path, file))
            pitches = []
            durations = []
            for el in score.flat.notes:
                if isinstance(el, note.Note):
                    # Ensure pitch is an integer
                    pitches.append(int(el.pitch.midi))
                    durations.append(el.quarterLength)

            if len(pitches) > 10:
                intervals = [0] + [pitches[i+1] - pitches[i] for i in range(len(pitches)-1)]
                pitch_seqs.append(pitches)
                duration_seqs.append(durations)
                interval_seqs.append(intervals)
        except:
            continue
    return pitch_seqs, duration_seqs, interval_seqs

dataset_path = "/content/nottingham-dataset/ABC_cleaned"
pitch_seqs, duration_seqs, interval_seqs = parse_feature_sequences(dataset_path)

# Create a new LabelEncoder specifically for the unique MIDI pitch numbers
all_pitches = sorted(list(set(p for seq in pitch_seqs for p in seq)))
pitch_class_encoder = LabelEncoder().fit(all_pitches)


# Normalize durations and intervals
scaler_dur = MinMaxScaler()
scaler_int = MinMaxScaler()
# Flatten lists of lists into a single list for fitting the scalers
flat_durations = np.array([item for sublist in duration_seqs for item in sublist]).reshape(-1, 1)
flat_intervals = np.array([item for sublist in interval_seqs for item in sublist]).reshape(-1, 1)

scaler_dur.fit(flat_durations)
scaler_int.fit(flat_intervals)

norm_duration_seqs = [scaler_dur.transform(np.array(seq).reshape(-1, 1)).flatten() for seq in duration_seqs]
norm_interval_seqs = [scaler_int.transform(np.array(seq).reshape(-1, 1)).flatten() for seq in interval_seqs]

# Create (input, target) pairs for all 3 features
seq_length = 8
input_features, target_classes = [], []
for pitch_seq, dur_seq, int_seq in zip(pitch_seqs, norm_duration_seqs, norm_interval_seqs):
    for i in range(len(pitch_seq) - seq_length):
        pitch_window = pitch_seq[i:i+seq_length]
        dur_window = dur_seq[i:i+seq_length]
        int_window = int_seq[i:i+seq_length]
        # Ensure pitch data is float32 for input tensor
        combined = np.stack([pitch_window, dur_window, int_window], axis=1).astype(np.float32)  # shape (seq_len, 3)
        input_features.append(combined)
        # Use the new pitch_class_encoder for the target
        target_class = pitch_class_encoder.transform([pitch_seq[i+seq_length]])[0]
        target_classes.append(target_class)

class MelodyFeatureDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets
    def __len__(self):
        return len(self.inputs)
    def __getitem__(self, idx):
        # Inputs are already float32 from numpy array
        return torch.tensor(self.inputs[idx], dtype=torch.float32), torch.tensor(self.targets[idx], dtype=torch.long)

# Use the new target_classes list for the dataset
loader = DataLoader(MelodyFeatureDataset(input_features, target_classes), batch_size=32, shuffle=True)

In [None]:
# STEP 4: Define LSTM model with feature input
import torch.nn as nn

class FeatureLSTM(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.lstm = nn.LSTM(input_size=3, hidden_size=64, batch_first=True)
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(hn[-1])  # output shape: (batch_size, num_classes)

num_classes = len(pitch_class_encoder.classes_)
model = FeatureLSTM(num_classes=num_classes) # Re-instantiate model with correct output size
# You would typically load trained weights here if you stopped and restarted the kernel
# model.load_state_dict(torch.load('model_weights.pth')) # Example loading

# Re-define loss and optimizer if you are re-running the script from here
loss_fn = torch.nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.005)

In [None]:
# STEP 5: Train the model
for epoch in range(15):
    total_loss = 0
    for x, y in loader:
        opt.zero_grad()
        out = model(x)
        loss = loss_fn(out, y)
        loss.backward()
        opt.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(loader)
    print(f"Epoch {epoch+1} - Avg Loss: {avg_loss:.4f}")


In [None]:
# STEP 6: Generate new melody
from music21 import stream, note as m21note

with torch.no_grad():
    # Select a random starting sequence of features
    start_idx = np.random.randint(len(input_features))
    seed_features = torch.tensor(input_features[start_idx], dtype=torch.float32).unsqueeze(0) # Add batch dimension

    # Initialize generated sequence with the starting pitch values (not encoded classes)
    generated_pitches = list(seed_features[0, :, 0].numpy().astype(int)) # Get raw pitches from seed

    # Use the seed features as the initial input to the model
    current_input_seq = seed_features.clone()

    for _ in range(32): # Generate 32 new notes
        # Model predicts the probability distribution over pitch classes for the *next* note
        # The input to the model is the sequence of (pitch, duration, interval) features
        output_logits = model(current_input_seq) # Shape: (batch_size, num_classes)
        probs = F.softmax(output_logits, dim=-1)

        # Sample the next pitch class from the predicted distribution
        predicted_pitch_class_tensor = torch.multinomial(probs, num_samples=1)
        predicted_pitch_class = predicted_pitch_class_tensor.item()

        # Decode the predicted pitch class back to a MIDI pitch number
        next_pitch = pitch_class_encoder.inverse_transform([predicted_pitch_class])[0]

        # For generation, we need to make up the duration and interval for the next note.
        # A simple approach is to reuse the duration and interval of the last note in the input sequence.
        # A more complex approach could involve a separate model or sampling strategy for duration and interval.
        last_note_features = current_input_seq[0, -1, :] # Get features of the last note in the sequence
        # The duration and interval features are already normalized in the input sequence
        next_dur_norm = last_note_features[1].item()
        next_int_norm = last_note_features[2].item() # This interval is between the last two notes, not the next one

        # A better interval estimate would be the difference between the *new* pitch
        # and the last pitch in the sequence.
        last_pitch_raw = current_input_seq[0, -1, 0].item() # Get the raw pitch of the last note
        next_interval_raw = next_pitch - last_pitch_raw
        # Normalize this calculated interval
        next_int_norm = scaler_int.transform([[next_interval_raw]])[0, 0]


        # Create the feature vector for the next note (pitch, duration, interval)
        # The pitch is the decoded pitch from the model prediction
        # The duration and interval are derived (e.g., copied from the last note or calculated)
        next_features = torch.tensor([[next_pitch, next_dur_norm, next_int_norm]], dtype=torch.float32) # Shape: (1, 3)

        # Append the new features to the input sequence and drop the oldest
        # The input sequence needs to maintain the shape (batch_size, seq_length, num_features)
        current_input_seq = torch.cat((current_input_seq[:, 1:, :], next_features.unsqueeze(0)), dim=1)

        # Add the new raw pitch to the generated list
        generated_pitches.append(next_pitch)


# STEP 7: Save melody to MIDI
s = stream.Stream()
for p in generated_pitches:
    # music21 note.Note takes a MIDI pitch number
    s.append(m21note.Note(int(round(p)))) # Round to the nearest integer just in case

# You might want to add durations to the notes when saving the MIDI file
# This would require storing generated durations as well.
# For now, the default duration will be used by music21.

s.write('midi', fp='symbolic_conditioned_with_features.mid')

In [None]:
import fluidsynth
import os

# Convert MIDI to audio using the fluidsynth command-line tool
# Ensure the output directory exists if needed
output_dir = "/content"
os.makedirs(output_dir, exist_ok=True)

midi_file = "/content/symbolic_conditioned_with_features.mid"
output_wav = os.path.join(output_dir, "output.wav")
soundfont_path = "/usr/share/sounds/sf2/FluidR3_GM.sf2"  # default SoundFont

command = f"fluidsynth -ni -a alsa -m alsa_seq {soundfont_path} {midi_file} -F {output_wav}"

# Execute the command
# The '!' prefix in a Jupyter notebook executes the command in the shell
!{command}

# Play the result
import IPython.display as ipd
ipd.Audio(output_wav)

In [None]:
# STEP 3: Parse melody for pitch, duration, and interval features
import os
import numpy as np
import torch
from music21 import converter, note
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from torch.utils.data import Dataset, DataLoader
import pandas as pd

# Parse melodies and extract features
def parse_feature_sequences(folder_path, max_files=50):
    pitch_seqs, duration_seqs, interval_seqs = [], [], []
    file_list = sorted([f for f in os.listdir(folder_path) if f.endswith('.abc')])[:max_files]
    for file in file_list:
        try:
            score = converter.parse(os.path.join(folder_path, file))
            pitches, durations = [], []
            for el in score.flat.notes:
                if isinstance(el, note.Note):
                    pitches.append(el.pitch.nameWithOctave)
                    durations.append(el.quarterLength)
            if len(pitches) > 10:
                midi_vals = [note.Note(p).pitch.midi for p in pitches]
                intervals = [0] + [midi_vals[i+1] - midi_vals[i] for i in range(len(midi_vals)-1)]
                pitch_seqs.append(pitches)
                duration_seqs.append(durations)
                interval_seqs.append(intervals)
        except:
            continue
    return pitch_seqs, duration_seqs, interval_seqs

def build_dataset(pitch_seqs, duration_seqs, interval_seqs, features, seq_length=8):
    note_encoder = LabelEncoder().fit([p for seq in pitch_seqs for p in seq])
    scaler_dur = MinMaxScaler()
    scaler_int = MinMaxScaler()
    flat_durs = np.concatenate(duration_seqs).reshape(-1, 1)
    flat_ints = np.concatenate(interval_seqs).reshape(-1, 1)
    scaler_dur.fit(flat_durs)
    scaler_int.fit(flat_ints)

    input_features, target_classes = [], []
    for p_seq, d_seq, i_seq in zip(pitch_seqs, duration_seqs, interval_seqs):
        if len(p_seq) <= seq_length:
            continue
        p_encoded = note_encoder.transform(p_seq)
        d_scaled = scaler_dur.transform(np.array(d_seq).reshape(-1, 1)).flatten()
        i_scaled = scaler_int.transform(np.array(i_seq).reshape(-1, 1)).flatten()

        for i in range(len(p_seq) - seq_length):
            window_feats = []
            if 'pitch' in features:
                window_feats.append(p_encoded[i:i+seq_length])
            if 'duration' in features:
                window_feats.append(d_scaled[i:i+seq_length])
            if 'interval' in features:
                window_feats.append(i_scaled[i:i+seq_length])

            combined = np.stack(window_feats, axis=1)
            input_features.append(combined)
            target_classes.append(p_encoded[i+seq_length])

    return input_features, target_classes, note_encoder

class MelodyDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets
    def __len__(self):
        return len(self.inputs)
    def __getitem__(self, idx):
        return torch.tensor(self.inputs[idx], dtype=torch.float32), torch.tensor(self.targets[idx], dtype=torch.long)

import torch.nn as nn
class FeatureLSTM(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.lstm = nn.LSTM(input_size, 128, batch_first=True)
        self.fc = nn.Linear(128, output_size)
    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(hn[-1])

# STEP 4: Experiment loop
pitch_seqs, dur_seqs, int_seqs = parse_feature_sequences("/content/nottingham-dataset/ABC_cleaned")
feature_sets = {
    "pitch_only": ['pitch'],
    "pitch_duration": ['pitch', 'duration'],
    "pitch_interval": ['pitch', 'interval'],
    "all": ['pitch', 'duration', 'interval']
}

results = []
for name, feats in feature_sets.items():
    print(f"\nTraining with features: {feats}")
    X, y, encoder = build_dataset(pitch_seqs, dur_seqs, int_seqs, feats)
    dataset = MelodyDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, shuffle=True)

    model = FeatureLSTM(input_size=len(feats), output_size=len(encoder.classes_))
    loss_fn = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=0.005)

    for epoch in range(5):
        total_loss = 0
        for xb, yb in loader:
            opt.zero_grad()
            out = model(xb)
            loss = loss_fn(out, yb)
            loss.backward()
            opt.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(loader)
        print(f"Epoch {epoch+1} - Avg Loss: {avg_loss:.4f}")
    results.append({"features": feats, "final_loss": avg_loss})

# STEP 5: Show results
results

Add Valdation Set

In [None]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import pandas as pd

# STEP 4: Experiment loop with validation
results = []
for name, feats in feature_sets.items():
    print(f"\nTraining with features: {feats}")
    X, y, encoder = build_dataset(pitch_seqs, dur_seqs, int_seqs, feats)

    # Add validation split
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    train_loader = DataLoader(MelodyDataset(X_train, y_train), batch_size=32, shuffle=True)
    val_loader = DataLoader(MelodyDataset(X_val, y_val), batch_size=32, shuffle=False)

    model = FeatureLSTM(input_size=len(feats), output_size=len(encoder.classes_))
    loss_fn = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=0.005)

    for epoch in range(15):
        model.train()
        total_loss = 0
        for xb, yb in train_loader:
            opt.zero_grad()
            out = model(xb)
            loss = loss_fn(out, yb)
            loss.backward()
            opt.step()
            total_loss += loss.item()
        avg_train_loss = total_loss / len(train_loader)

        # Validation
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for xb, yb in val_loader:
                out = model(xb)
                loss = loss_fn(out, yb)
                val_loss += loss.item()
                preds = torch.argmax(out, dim=1)
                correct += (preds == yb).sum().item()
                total += yb.size(0)
        avg_val_loss = val_loss / len(val_loader)
        val_acc = correct / total
        print(f"Epoch {epoch+1} - Train Loss: {avg_train_loss:.4f} - Val Loss: {avg_val_loss:.4f} - Val Acc: {val_acc:.4f}")

    results.append({"features": feats, "val_loss": avg_val_loss, "val_acc": val_acc})

# STEP 5: Show results
results_df = pd.DataFrame(results)
print("\nAblation Results with Validation:")
print(results_df)


### train with xgboost

In [None]:
# # STEP 1: Install dependencies (in Colab)
# !pip install music21 xgboost scikit-learn pandas optuna --quiet

In [None]:
# STEP 3: Extract enriched structured features for tuning
import os
import numpy as np
import pandas as pd
from music21 import converter, note, key, meter
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import optuna

# Extract enhanced features
# note_encoder is initialized outside the function so it can be accessed later for decoding
note_encoder = LabelEncoder()
def extract_enriched_features(folder_path, max_files=50, seq_length=8):
    X = []
    all_next_notes = []

    file_list = sorted([f for f in os.listdir(folder_path) if f.endswith('.abc')])[:max_files]
    for file in file_list:
        try:
            score = converter.parse(os.path.join(folder_path, file))
            key_sig = score.analyze('key').tonic.name if score.analyze('key') else 'C'
            time_sigs = list(score.recurse().getElementsByClass(meter.TimeSignature))
            time_sig = str(time_sigs[0].ratioString) if time_sigs else "4/4"

            parts = [n for n in score.flat.notes if isinstance(n, note.Note)]
            if len(parts) <= seq_length:
                continue

            for i in range(len(parts) - seq_length):
                window = parts[i:i+seq_length]
                next_note = parts[i+seq_length]
                pitches = [n.pitch.midi for n in window]
                octaves = [n.pitch.octave for n in window]
                durations = [n.quarterLength for n in window]
                intervals = [0] + [pitches[j+1] - pitches[j] for j in range(len(pitches)-1)]
                offsets = [n.offset % 4.0 for n in window]  # rough bar position

                features = {
                    "last_pitch": pitches[-1],
                    "mean_pitch": np.mean(pitches),
                    "std_pitch": np.std(pitches),
                    "last_octave": octaves[-1],
                    "mean_octave": np.mean(octaves),
                    "mean_duration": np.mean(durations),
                    "std_duration": np.std(durations),
                    "mean_interval": np.mean(intervals),
                    "std_interval": np.std(intervals),
                    "mean_offset": np.mean(offsets),
                    "key": key_sig,
                    "time_sig": time_sig
                }
                X.append(features)
                all_next_notes.append(next_note.nameWithOctave)
        except:
            continue

    # Encode the target variable (next notes) here, inside the function
    y_encoded = note_encoder.fit_transform(all_next_notes)
    df = pd.DataFrame(X)
    df = pd.get_dummies(df)  # One-hot encode categorical features like key/time_sig
    # Ensure column names are valid for XGBoost (replace problematic characters)
    df.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in df.columns]

    return df, y_encoded # Return the correctly encoded target

# Call the function to get the features and the correctly encoded target variable
X_df, y = extract_enriched_features("/content/nottingham-dataset/ABC_cleaned")

# y is now the correctly encoded target variable (starting from 0)
# Remove the incorrect re-encoding and shifting lines:
# y = note_encoder.fit_transform(y)  # already done - REMOVE THIS LINE
# y = y - y.min()  # shift so labels start at 0 - REMOVE THIS LINE

X_train, X_val, y_train, y_val = train_test_split(X_df, y, test_size=0.2, random_state=42)

In [None]:
!pip install xgboost --quiet

In [None]:
# STEP 3: Extract enriched structured features for tuning
import os
import numpy as np
import pandas as pd
from music21 import converter, note, key, meter
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import optuna

# Extract enhanced features
def extract_enriched_features(folder_path, max_files=50, seq_length=8):
    X = []
    all_next_notes = []

    file_list = sorted([f for f in os.listdir(folder_path) if f.endswith('.abc')])[:max_files]
    for file in file_list:
        try:
            score = converter.parse(os.path.join(folder_path, file))
            key_sig = score.analyze('key').tonic.name if score.analyze('key') else 'C'
            time_sigs = list(score.recurse().getElementsByClass(meter.TimeSignature))
            time_sig = str(time_sigs[0].ratioString) if time_sigs else "4/4"

            parts = [n for n in score.flatten().notes if isinstance(n, note.Note)]
            if len(parts) <= seq_length:
                continue

            for i in range(len(parts) - seq_length):
                window = parts[i:i+seq_length]
                next_note = parts[i+seq_length]
                pitches = [n.pitch.midi for n in window]
                octaves = [n.pitch.octave for n in window]
                durations = [n.quarterLength for n in window]
                intervals = [0] + [pitches[j+1] - pitches[j] for j in range(len(pitches)-1)]
                offsets = [n.offset % 4.0 for n in window]  # rough bar position

                features = {
                    "last_pitch": pitches[-1],
                    "mean_pitch": np.mean(pitches),
                    "std_pitch": np.std(pitches),
                    "last_octave": octaves[-1],
                    "mean_octave": np.mean(octaves),
                    "mean_duration": np.mean(durations),
                    "std_duration": np.std(durations),
                    "mean_interval": np.mean(intervals),
                    "std_interval": np.std(intervals),
                    "mean_offset": np.mean(offsets),
                    "key": key_sig,
                    "time_sig": time_sig
                }
                X.append(features)
                all_next_notes.append(next_note.nameWithOctave)
        except:
            continue

    df = pd.DataFrame(X)
    df = pd.get_dummies(df)  # One-hot encode categorical features like key/time_sig

    note_encoder = LabelEncoder()
    encoded = note_encoder.fit_transform(all_next_notes)
    used_classes = np.unique(encoded)
    remap = {old: new for new, old in enumerate(used_classes)}
    y = np.array([remap[val] for val in encoded])

    print("✔️ Final y classes:", np.unique(y))
    return df, y

# Load data and split
X_df, y = extract_enriched_features("/content/nottingham-dataset/ABC_cleaned")
X_train, X_val, y_train, y_val = train_test_split(X_df, y, test_size=0.2, random_state=42)


In [None]:
# STEP 4: Optuna tuning with XGBoost

def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'eval_metric': 'mlogloss',
        'random_state': 42
    }

    model = XGBClassifier(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    acc = accuracy_score(y_val, preds)
    return acc

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

print("\nBest trial:")
print(study.best_trial)
print("\nBest parameters:")
print(study.best_params)


In [None]:
# STEP 1: Install dependencies (in Colab)
!pip install music21 xgboost scikit-learn pandas optuna --quiet

# STEP 2: Clone Nottingham dataset
!git clone https://github.com/jukedeck/nottingham-dataset.git

# STEP 3: Extract enriched structured features for tuning
import os
import numpy as np
import pandas as pd
from music21 import converter, note, key, meter
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import optuna

# Extract enhanced features
def extract_enriched_features(folder_path, max_files=50, seq_length=8):
    X = []
    all_next_notes = []

    file_list = sorted([f for f in os.listdir(folder_path) if f.endswith('.abc')])[:max_files]
    for file in file_list:
        try:
            score = converter.parse(os.path.join(folder_path, file))
            key_sig = score.analyze('key').tonic.name if score.analyze('key') else 'C'
            time_sigs = list(score.recurse().getElementsByClass(meter.TimeSignature))
            time_sig = str(time_sigs[0].ratioString) if time_sigs else "4/4"

            parts = [n for n in score.flatten().notes if isinstance(n, note.Note)]
            if len(parts) <= seq_length:
                continue

            for i in range(len(parts) - seq_length):
                window = parts[i:i+seq_length]
                next_note = parts[i+seq_length]
                pitches = [n.pitch.midi for n in window]
                octaves = [n.pitch.octave for n in window]
                durations = [n.quarterLength for n in window]
                intervals = [0] + [pitches[j+1] - pitches[j] for j in range(len(pitches)-1)]
                offsets = [n.offset % 4.0 for n in window]  # rough bar position

                features = {
                    "last_pitch": pitches[-1],
                    "mean_pitch": np.mean(pitches),
                    "std_pitch": np.std(pitches),
                    "last_octave": octaves[-1],
                    "mean_octave": np.mean(octaves),
                    "mean_duration": np.mean(durations),
                    "std_duration": np.std(durations),
                    "mean_interval": np.mean(intervals),
                    "std_interval": np.std(intervals),
                    "mean_offset": np.mean(offsets),
                    "key": key_sig,
                    "time_sig": time_sig
                }
                X.append(features)
                all_next_notes.append(next_note.nameWithOctave)
        except:
            continue

    df = pd.DataFrame(X)
    df = pd.get_dummies(df)  # One-hot encode categorical features like key/time_sig

    note_encoder = LabelEncoder()
    encoded = note_encoder.fit_transform(all_next_notes)
    used_classes = np.unique(encoded)
    remap = {old: new for new, old in enumerate(used_classes)}
    y = np.array([remap[val] for val in encoded])

    print("✔️ Final y classes:", np.unique(y))
    return df, y

# Load data and split
X_df, y = extract_enriched_features("/content/nottingham-dataset/ABC_cleaned")
X_train, X_val, y_train, y_val = train_test_split(X_df, y, test_size=0.2, random_state=42)

# STEP 4: Optuna tuning with XGBoost

def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'eval_metric': 'mlogloss',
        'use_label_encoder': False,
        'random_state': 42
    }

    model = XGBClassifier(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    acc = accuracy_score(y_val, preds)
    return acc

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

print("\nBest trial:")
print(study.best_trial)
print("\nBest parameters:")
print(study.best_params)

# Final model with best params
best_model = XGBClassifier(**study.best_params, use_label_encoder=False, eval_metric='mlogloss')
best_model.fit(X_train, y_train)
preds = best_model.predict(X_val)
print("\nFinal Validation Accuracy:", accuracy_score(y_val, preds))


In [None]:
X_df

In [None]:
y

In [None]:
# Final model with best params
best_model = XGBClassifier(**study.best_params, use_label_encoder=False, eval_metric='mlogloss')
best_model.fit(X_train, y_train)
preds = best_model.predict(X_val)
print("\nFinal Validation Accuracy:", accuracy_score(y_val, preds))