#Final Team Project: Music Genre and Composer Classification   Using Deep Learning

In [None]:
pip install pretty_midi

Collecting pretty_midi
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/5.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m5.5/5.6 MB[0m [31m166.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m96.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mido>=1.1.16 (from pretty_midi)
  Downloading mido-1.3.3-py3-none-any.whl.metadata (6.4 kB)
Downloading mido-1.3.3-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pretty_midi
  Building wheel for pretty_midi (setup.py) ... [?25l[?25hdone
  Created wheel for pretty_midi: filename=pretty_midi-0.2.10-py3-none-any.whl size=5592286 sha256=8e4ce8f3b8a38a1896

In [None]:
import os
import shutil
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pretty_midi
from tqdm import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd drive/MyDrive/Masters_AI/6-Neural Networks and Deep Learning/Project/

/content/drive/MyDrive/Masters_AI/6-Neural Networks and Deep Learning/Project


In [None]:
#Flatten the subfolders so that all .mid files are present in the respective composer folder

def flatten_midi_files(base_dir, target_composers=["Bach", "Beethoven", "Chopin", "Mozart"]):
    """
    Recursively finds all .mid/.midi files inside nested subfolders under each composer's directory
    and copies them directly into the composer's root folder.

    Renames files if a file with the same name already exists.
    """
    for composer in target_composers:
        composer_path = os.path.join(base_dir, composer)
        for root, _, files in os.walk(composer_path):
            for file in files:
                if file.lower().endswith(('.mid', '.midi')):
                    source = os.path.join(root, file)
                    filename = os.path.basename(file)
                    destination = os.path.join(composer_path, filename)

                    # If filename already exists, add suffix to avoid overwrite
                    counter = 1
                    while os.path.exists(destination):
                        name, ext = os.path.splitext(filename)
                        destination = os.path.join(composer_path, f"{name}_{counter}{ext}")
                        counter += 1

                    if source != destination:
                        shutil.copy(source, destination)

    print("✅ All MIDI files copied to root folders per composer.")

# Example usage
base_dir = "/content/drive/MyDrive/Masters_AI/6-Neural Networks and Deep Learning/Project/data"
flatten_midi_files(base_dir)


In [None]:
#Extract features from .mid files composed by composers Bach, Beethoven, Chopin and Mozart

def extract_midi_features(base_dir, output_csv="midi_note_features.csv"):
    all_data = []
    error_files = []

    composers = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

    for composer in composers:
        composer_dir = os.path.join(base_dir, composer)

        for root, _, files in os.walk(composer_dir):
            for file in files:
                if file.lower().endswith(('.mid', '.midi')):
                    file_path = os.path.join(root, file)
                    try:
                        pm = pretty_midi.PrettyMIDI(file_path)
                        for instrument in pm.instruments:
                            if instrument.is_drum:
                                continue  # skip drums
                            for note in instrument.notes:
                                all_data.append({
                                    "composer": composer,
                                    "file": file,
                                    "instrument": instrument.name,
                                    "pitch": note.pitch,
                                    "start": note.start,
                                    "end": note.end,
                                    "duration": note.end - note.start,
                                    "velocity": note.velocity
                                })
                    except Exception as e:
                        print(f"❌ Error parsing {file_path}: {e}")
                        error_files.append(file_path)

    # Save to CSV
    df = pd.DataFrame(all_data)
    df.to_csv(output_csv, index=False)
    print(f"✅ Extracted {len(df)} note rows")
    print(f"📁 Saved as {output_csv}")
    if error_files:
        print(f"⚠️ {len(error_files)} files had errors and were skipped.")

# 🔁 Call the function with your dataset directory
extract_midi_features("/content/drive/MyDrive/Masters_AI/6-Neural Networks and Deep Learning/Project/data")


In [None]:

def extract_all_midi_features(folder_path, save_path='full_midi_features.csv'):
    data = []

    for root, _, files in os.walk(folder_path):
        for file in tqdm(files):
            if file.endswith(".mid") or file.endswith(".midi"):
                file_path = os.path.join(root, file)
                try:
                    pm = pretty_midi.PrettyMIDI(file_path)
                    composer = os.path.basename(root)

                    # Extract tempo (average if multiple)
                    tempo_times, tempos = pm.get_tempo_changes()
                    tempo = float(tempos.mean()) if len(tempos) > 0 else None

                    # Extract time signature
                    ts_changes = pm.time_signature_changes
                    time_signature = f"{ts_changes[0].numerator}/{ts_changes[0].denominator}" if ts_changes else "Unknown"

                    # Get piano roll and compute densities
                    piano_roll = pm.get_piano_roll(fs=100)
                    note_density = (piano_roll > 0).sum() / piano_roll.shape[1]  # notes per time slice
                    polyphony = (piano_roll > 0).sum(axis=0).mean()              # avg overlapping notes
                    chord_density = (piano_roll > 0).any(axis=0).sum() / piano_roll.shape[1]

                    for instrument in pm.instruments:
                        if instrument.is_drum:
                            continue
                        for note in instrument.notes:
                            data.append({
                                "composer": composer,
                                "file": file,
                                "instrument": instrument.name or "Unknown",
                                "pitch": note.pitch,
                                "start": note.start,
                                "end": note.end,
                                "duration": note.end - note.start,
                                "velocity": note.velocity,
                                "pitch_bends": len(instrument.pitch_bends),
                                "control_changes": len(instrument.control_changes),
                                "note_density": note_density,
                                "polyphony": polyphony,
                                "chord_density": chord_density,
                                "time_signature": time_signature,
                                "tempo": tempo
                            })

                except Exception as e:
                    print(f"❌ Error parsing {file_path}: {e}")

    df = pd.DataFrame(data)
    df.to_csv(save_path, index=False)
    print(f"✅ Features saved to {save_path}")

    # ✅ Call the function
data_folder = '/content/drive/MyDrive/Masters_AI/6-Neural Networks and Deep Learning/Project/data'
extract_all_midi_features(data_folder, save_path='full_midi_features.csv')


0it [00:00, ?it/s]
 73%|███████▎  | 255/347 [01:09<00:31,  2.90it/s]

❌ Error parsing /content/drive/MyDrive/Masters_AI/6-Neural Networks and Deep Learning/Project/data/Mozart/K281 Piano Sonata n03 3mov.mid: Could not decode key with 2 flats and mode 2


100%|██████████| 347/347 [01:56<00:00,  2.98it/s]
 13%|█▎        | 46/353 [00:37<02:49,  1.81it/s]

❌ Error parsing /content/drive/MyDrive/Masters_AI/6-Neural Networks and Deep Learning/Project/data/Beethoven/Anhang 14-3.mid: Could not decode key with 3 flats and mode 255


 41%|████      | 145/353 [01:23<00:32,  6.45it/s]

❌ Error parsing /content/drive/MyDrive/Masters_AI/6-Neural Networks and Deep Learning/Project/data/Beethoven/Anhang 14-3_1.mid: Could not decode key with 3 flats and mode 255


100%|██████████| 353/353 [04:10<00:00,  1.41it/s]
100%|██████████| 272/272 [01:03<00:00,  4.25it/s]
100%|██████████| 1155/1155 [02:45<00:00,  6.96it/s]


✅ Features saved to full_midi_features.csv
