In [None]:
pip install pretty_midi

Collecting pretty_midi
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m108.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mido>=1.1.16 (from pretty_midi)
  Downloading mido-1.3.3-py3-none-any.whl.metadata (6.4 kB)
Downloading mido-1.3.3-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pretty_midi
  Building wheel for pretty_midi (setup.py) ... [?25l[?25hdone
  Created wheel for pretty_midi: filename=pretty_midi-0.2.10-py3-none-any.whl size=5592286 sha256=d821d66e11b0e87dbca43e316ae1699c574288306b3e3ec35ad5ea8b8391a945
  Stored in directory: /root/.cache/pip/wheels/e6/95/ac/15ceaeb2823b04d8e638fd1495357adb8d26c00ccac9d7782e
Successfully built pretty_midi
Installing collected packages: mido, pretty_midi
Success

In [None]:
import os
import numpy as np
import pretty_midi
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from google.colab import drive
import random

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Define paths
base_path = '/content/drive/MyDrive/NEW PROJECT LIST/DL COURSE SUMMER/Group Project/Composer_Dataset/NN_midi_files_extended'
train_path = os.path.join(base_path, 'train')
test_path = os.path.join(base_path, 'test')
composers = ['bach', 'bartok', 'chopin', 'mozart']
composer_to_idx = {composer: idx for idx, composer in enumerate(composers)}


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Custom Dataset
class MidiDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        midi_file = self.file_paths[idx]
        label = self.labels[idx]

        # Load and preprocess MIDI
        try:
            midi_data = pretty_midi.PrettyMIDI(midi_file)
            piano_roll = midi_data.get_piano_roll(fs=100)  # Shape: (128, time_steps)

            # Transpose to (time_steps, 128) if needed
            piano_roll = piano_roll.T  # Shape: (time_steps, 128)

            # Normalize
            piano_roll = piano_roll / 127.0

            # Pad or truncate to fixed time length (1000)
            target_length = 1000
            if piano_roll.shape[0] < target_length:
                pad_width = ((0, target_length - piano_roll.shape[0]), (0, 0))
                piano_roll = np.pad(piano_roll, pad_width, mode='constant')
            else:
                piano_roll = piano_roll[:target_length, :]

            # Ensure pitch dimension is exactly 128
            if piano_roll.shape[1] != 128:
                # If pitch dimension is not 128, pad or truncate
                if piano_roll.shape[1] < 128:
                    pad_width = ((0, 0), (0, 128 - piano_roll.shape[1]))
                    piano_roll = np.pad(piano_roll, pad_width, mode='constant')
                else:
                    piano_roll = piano_roll[:, :128]

            # Add channel dimension: (1, time_steps, pitches)
            piano_roll = np.expand_dims(piano_roll, axis=0)  # Shape: (1, 1000, 128)

            # Apply data augmentation
            if self.transform:
                piano_roll = self.transform(piano_roll)

            return torch.FloatTensor(piano_roll), torch.LongTensor([label])[0]
        except Exception as e:
            print(f"Error processing {midi_file}: {e}")
            # Return a dummy sample if MIDI processing fails
            return torch.zeros((1, 1000, 128)), torch.LongTensor([label])[0]

In [None]:
# Data augmentation functions
def time_shift(piano_roll, max_shift=50):
    shift = random.randint(-max_shift, max_shift)
    return np.roll(piano_roll, shift, axis=1)

def pitch_shift(piano_roll, max_shift=5):
    shift = random.randint(-max_shift, max_shift)
    return np.roll(piano_roll, shift, axis=2)

def add_noise(piano_roll, noise_factor=0.05):
    noise = np.random.normal(0, noise_factor, piano_roll.shape)
    return np.clip(piano_roll + noise, 0, 1)

def augment_data(piano_roll):
    if random.random() > 0.5:
        piano_roll = time_shift(piano_roll)
    if random.random() > 0.5:
        piano_roll = pitch_shift(piano_roll)
    if random.random() > 0.5:
        piano_roll = add_noise(piano_roll)
    return piano_roll

In [None]:
# Load MIDI files
def load_midi_files(data_path):
    file_paths = []
    labels = []

    for composer in composers:
        composer_path = os.path.join(data_path, composer)
        for file in os.listdir(composer_path):
            if file.endswith('.mid') or file.endswith('.midi'):
                file_paths.append(os.path.join(composer_path, file))
                labels.append(composer_to_idx[composer])

    return file_paths, labels

In [None]:
# CNN Model
class ComposerCNN(nn.Module):
    def __init__(self, num_classes=4):
        super(ComposerCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=(5, 5), stride=1, padding=2),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=(3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=(3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # Calculate the size of flattened features
        self.flatten_size = 128 * (1000 // 8) * (128 // 8)

        self.fc_layers = nn.Sequential(
            nn.Linear(self.flatten_size, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

In [None]:
# Load data
train_files, train_labels = load_midi_files(train_path)
test_files, test_labels = load_midi_files(test_path)

# Create datasets
train_dataset = MidiDataset(train_files, train_labels, transform=augment_data)
test_dataset = MidiDataset(test_files, test_labels)

In [None]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Initialize model, loss, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ComposerCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
# Training loop
num_epochs = 10 #try 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}')

Epoch 1/10, Loss: 65.5071
Epoch 2/10, Loss: 38.3645
Epoch 3/10, Loss: 36.5484
Epoch 4/10, Loss: 20.6337
Epoch 5/10, Loss: 11.0510
Epoch 6/10, Loss: 16.7203
Epoch 7/10, Loss: 7.8755
Epoch 8/10, Loss: 7.2693
Epoch 9/10, Loss: 9.1226
Epoch 10/10, Loss: 6.7830


In [None]:
# Evaluation
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

In [None]:
# Calculate metrics
accuracy = accuracy_score(all_labels, all_preds)
precision, recall, _, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')

print(f'\nTest Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')


Test Accuracy: 0.3125
Precision: 0.3214
Recall: 0.3125


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
