<a href="https://colab.research.google.com/github/seema469/AAI_511_DeepLearning/blob/main/Chopin_EDA_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Importing Libraries
import sys
import warnings
import os  # Import the os module
import numpy as np
import pandas as pd
from collections import Counter
import random
from IPython.display import Image, Audio
import music21 as m21
# from music21 import *
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
warnings.filterwarnings("ignore")
warnings.simplefilter("ignore")
np.random.seed(42)

# Reference: https://capital-g.github.io/musikinformatik-sose2021/01_midi_drums/01_midi_drums.html

In [None]:
def extract_and_aggregate_features(file_path):
    midi_file = m21.converter.parse(file_path)
    notes = []

    for element in midi_file.flat.notes:
        if isinstance(element, m21.note.Note):
            notes.append({
                'pitch': element.pitch.midi,
                'duration': element.quarterLength,
                'offset': element.offset,
                'velocity': element.volume.velocity if element.volume.velocity is not None else 64
            })
        elif isinstance(element, m21.chord.Chord):
            for pitch in element.pitches:
                notes.append({
                    'pitch': pitch.midi,
                    'duration': element.quarterLength,
                    'offset': element.offset,
                    'velocity': element.volume.velocity if element.volume.velocity is not None else 64
                })

    notes_df = pd.DataFrame(notes)

    aggregated_features = {
        'pitch_mean': notes_df['pitch'].mean(),
        'pitch_std': notes_df['pitch'].std(),
        'duration_mean': notes_df['duration'].mean(),
        'duration_std': notes_df['duration'].std(),
        'offset_mean': notes_df['offset'].mean(),
        'offset_std': notes_df['offset'].std(),
        'velocity_mean': notes_df['velocity'].mean(),
        'velocity_std': notes_df['velocity'].std()
    }

    key = midi_file.analyze('key')
    aggregated_features['key'] = key.tonicPitchNameWithCase

    temp = []
    tempos = midi_file.metronomeMarkBoundaries()
    for _,_,mark in tempos:
        temp.append(mark.number)

    aggregated_features['tempos'] = np.mean(temp)

    return aggregated_features


In [None]:
# filename = 'Composer/Chopin/Ballade No.2 in F Major.mid'
# df = extract_and_aggregate_features(filename)

In [None]:
directory = 'Composer/'
all_features = []
composer_map = {}
composer_id = 0

for composer in os.listdir(directory):
    composer_path = os.path.join(directory, composer)
    if os.path.isdir(composer_path):
        composer_map[composer] = composer_id
        for file in glob(composer_path+'/*.mid', recursive=True):
            try:

                features = extract_and_aggregate_features(file)
                features['composer'] = composer_id
                features['file'] = file
                all_features.append(features)
            except:
                print(file)
                pass
        composer_id += 1

Composer/Chopin/Sonata op35 n2.mid


In [None]:
features_df = pd.DataFrame(all_features)
features_df

Unnamed: 0,pitch_mean,pitch_std,duration_mean,duration_std,offset_mean,offset_std,velocity_mean,velocity_std,key,tempos,composer,file
0,62.174714,13.938658,0.310633,0.237050,258.676492,132.704560,63.748484,14.129585,F,67.500000,0,Composer/Chopin/Ballade No.2 in F Major.mid
1,61.909468,14.027218,0.538240,1.579656,343.861596,254.845068,121.226676,3.489613,A,100.000000,0,"Composer/Chopin/Polonaise Opus.40, No.1 (Milit..."
2,63.369607,11.081783,0.527892,0.359343,284.101516,153.052830,82.092608,22.625229,c#,75.903164,0,Composer/Chopin/2 Polonaises op26 n1.mid
3,58.700473,10.894998,0.546374,0.470162,180.897005,82.107724,50.679453,14.716931,E,58.825362,0,Composer/Chopin/Nocturne No.18 in E Major Op62...
4,66.724182,13.024930,0.694999,1.103513,259.278616,156.021411,64.309817,14.609124,E,158.821908,0,Composer/Chopin/Etude op25 n05.mid
...,...,...,...,...,...,...,...,...,...,...,...,...
130,56.889205,10.299092,0.489962,0.124378,146.563636,84.812073,50.682386,9.634463,b-,224.775554,0,Composer/Chopin/Sonata op35 n4 .mid
131,62.351478,11.894760,0.714532,0.627048,249.651142,122.324626,49.699672,24.393428,E,150.000000,0,Composer/Chopin/Etude op10 n03.mid
132,71.688712,13.730459,0.678956,0.535320,251.136893,139.286831,59.851007,12.664095,g#,180.221276,0,Composer/Chopin/Etude op25 n06.mid
133,63.058140,10.450977,0.399128,0.340597,199.489099,113.494247,107.995930,10.785361,D,150.590164,0,Composer/Chopin/Mazurka op33 n2 Drchew.mid
