# Deep learning techniques to identify the composer of a music piece

**Objective**  
The primary objective of this project is to develop a deep learning model that can predict the composer of a given musical score accurately. The project aims to accomplish this objective by using two deep learning techniques: Long Short-Term Memory (LSTM) and Convolutional Neural Network (CNN).

**Dataset**  
The project will use a dataset consisting of musical scores from various composers. The dataset contain MIDI files and sheet music of compositions from well-known classical composers like Bach, Beethoven, Chopin, Mozart, Schubert, etc. The dataset should be labeled with the name of the composer for each score.

## Data Collection

## Install required libraries

In [23]:
# Import all dependent libraries
import os
import csv
import zipfile
import shutil

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings
import math
import keras.optimizers
import tensorflow as tf

from google.colab import drive

# Music related libraries
import music21

# Machine Learning Libraries
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, recall_score, precision_score
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.model_selection import TimeSeriesSplit

# Deep Learning Libraries
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, LSTM, Activation
from sklearn.preprocessing import MinMaxScaler
from keras.utils import pad_sequences # not used , should we remove it?
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

# Suppress warnings for cleaner output
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

In [6]:
# Mount google drive to load the input dataset.
drive.mount('/content/drive' , force_remount=True)

Mounted at /content/drive


In [26]:
# Root path
root_path = '/content/drive/MyDrive/AAI-511-IN2 Neural Networks and Deep Learning/Project'

# File path in Google Drive
file_path = os.path.join(root_path, 'Composer_Dataset.zip')

# Extract file path
extract_path = os.path.join(root_path, 'Composer_Dataset')

# Dataset folders
dataset_path = os.path.join(extract_path, 'Composer_Dataset/NN_midi_files_extended')

# CSV index file
csv_file = os.path.join(dataset_path, 'composer_dataset_index.csv')

In [14]:
# Check if folder already exists
if not os.path.exists(extract_path):
    print("Extracting dataset...")
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
else:
    print("Dataset already extracted.")

Extracting dataset...


In [21]:
def delete_hidden_folders(root_folder):
    for root, dirs, _ in os.walk(root_folder):
        for d in dirs:
            if d.startswith('.'):
                dir_path = os.path.join(root, d)
                print(f"Deleting hidden folder: {dir_path}")
                shutil.rmtree(dir_path)
            elif d == '__MACOSX':
                dir_path = os.path.join(root, d)
                print(f"Deleting __MACOSX folder: {dir_path}")
                shutil.rmtree(dir_path)

# Run this on the outer Composer_Dataset
delete_hidden_folders(extract_path)

Deleting __MACOSX folder: /content/drive/MyDrive/AAI-511-IN2 Neural Networks and Deep Learning/Project/Composer_Dataset/__MACOSX


In [28]:
# Function to create index files
def create_midi_file_index_csv(root_dir, output_csv):
    rows = []

    for split in ['train', 'test', 'dev']:
        split_path = os.path.join(root_dir, split)
        if not os.path.isdir(split_path):
            continue

        for composer in os.listdir(split_path):
            composer_path = os.path.join(split_path, composer)
            if not os.path.isdir(composer_path) or composer.startswith('.'):
                continue

            for filename in os.listdir(composer_path):
                if not filename.endswith('.mid') or filename.startswith('.'):
                    continue

                filepath = os.path.join(split, composer, filename)
                rows.append({
                    'split': split,
                    'composer': composer,
                    'filename': filename,
                    'filepath': filepath
                })

    # Write to CSV
    with open(output_csv, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=['split', 'composer', 'filename', 'filepath'])
        writer.writeheader()
        writer.writerows(rows)

    print(f"CSV created: {output_csv} with {len(rows)} entries.")

# Create a csv file
create_midi_file_index_csv(
    root_dir=dataset_path,
    output_csv=csv_file
)

CSV created: /content/drive/MyDrive/AAI-511-IN2 Neural Networks and Deep Learning/Project/Composer_Dataset/Composer_Dataset/NN_midi_files_extended/composer_dataset_index.csv with 439 entries.


## Data Pre-processing

## Feature Extraction

## Model Building

## Model Training

## Model Evaluation

## Model Optimization