In [1]:
import pandas as pd
import glob
import os
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import r2_score as r2
from sklearn.linear_model import LinearRegression

In [2]:
musicnet_path = './input/musicnet'

data = pd.read_csv(musicnet_path + '/musicnet_metadata.csv')
data.head()

Unnamed: 0,id,composer,composition,movement,ensemble,source,transcriber,catalog_name,seconds
0,1727,Schubert,Piano Quintet in A major,2. Andante,Piano Quintet,European Archive,http://tirolmusic.blogspot.com/,OP114,447
1,1728,Schubert,Piano Quintet in A major,3. Scherzo: Presto,Piano Quintet,European Archive,http://tirolmusic.blogspot.com/,OP114,251
2,1729,Schubert,Piano Quintet in A major,4. Andantino - Allegretto,Piano Quintet,European Archive,http://tirolmusic.blogspot.com/,OP114,444
3,1730,Schubert,Piano Quintet in A major,5. Allegro giusto,Piano Quintet,European Archive,http://tirolmusic.blogspot.com/,OP114,368
4,1733,Schubert,Piano Sonata in A major,2. Andantino,Solo Piano,Museopen,Segundo G. Yogore,D959,546


In [3]:
path_train = musicnet_path + "/musicnet/train_labels"
path_test = musicnet_path + "/musicnet/test_labels"

split = []

for path in (path_train, path_test):
    all_files = glob.glob(path + "/*.csv")
    li = []
    for filename in all_files:
        df = pd.read_csv(filename)
        df['id'] = os.path.basename(filename[:-4])
        li.append(df)
    res = pd.concat(li, axis=0, ignore_index=True)
    res['id'] = res['id'].astype(str).astype(int)
    split.append(res)

In [4]:
train, test = split

In [5]:
drop_list = 
data_train = data.merge(train, on = 'id')
data_train = data_train.drop(, axis=1)

data_test = data.merge(test, on = 'id')
data_test = data_test.drop(['source', 'transcriber', 'catalog_name'], axis=1)

In [6]:
entire_data = pd.concat([data_test, data_train])
entire_data.head(5)

Unnamed: 0,id,composer,composition,movement,ensemble,seconds,start_time,end_time,instrument,note,start_beat,end_beat,note_value
0,1759,Schubert,Piano Sonata in C minor,3. Menuetto and Trio,Solo Piano,194,90078,124382,1,63,0.0,1.0,Quarter
1,1759,Schubert,Piano Sonata in C minor,3. Menuetto and Trio,Solo Piano,194,90078,124382,1,75,0.0,1.0,Quarter
2,1759,Schubert,Piano Sonata in C minor,3. Menuetto and Trio,Solo Piano,194,90078,110558,1,48,0.0,0.375,Dotted Sixteenth
3,1759,Schubert,Piano Sonata in C minor,3. Menuetto and Trio,Solo Piano,194,114654,122334,1,55,0.5,0.375,Dotted Sixteenth
4,1759,Schubert,Piano Sonata in C minor,3. Menuetto and Trio,Solo Piano,194,124382,139742,1,65,1.0,1.0,Quarter


In [7]:
solo_piano = entire_data[entire_data['ensemble'] == 'Solo Piano']
solo_piano = solo_piano.drop(['id', 'composition', 'movement', 'ensemble', 'instrument', 'note_value'], axis=1)
solo_piano.head(5)

Unnamed: 0,composer,seconds,start_time,end_time,note,start_beat,end_beat
0,Schubert,194,90078,124382,63,0.0,1.0
1,Schubert,194,90078,124382,75,0.0,1.0
2,Schubert,194,90078,110558,48,0.0,0.375
3,Schubert,194,114654,122334,55,0.5,0.375
4,Schubert,194,124382,139742,65,1.0,1.0


In [8]:
solo_piano['composer'].replace({'Beethoven':0,'Bach':1, 'Schubert':2,'Mozart':3, 'Brahms':4,'Cambini':5, 'Dvorak':6,'Faure':7, 'Ravel':8,'Haydn':9},inplace=True)

In [9]:
X_piano = solo_piano.drop(['note'], axis=1) # Features
y_piano = solo_piano['note'] # Labels

In [10]:
X_train_p, X_test_p, y_train_p, y_test_p = train_test_split(X_piano, y_piano, test_size = 0.2, random_state=10)

In [11]:
# Decision tree regressor
tree_p = DecisionTreeRegressor()
tree_p.fit(X_train_p, y_train_p)
y_pred_tree_p = tree_p.predict(X_test_p)

In [12]:
test_p_ = X_test_p.copy()
test_p_['note'] = y_pred_tree_p
test_p_ = test_p_.sort_values(by=['start_time'])
test_p_.tail()

Unnamed: 0,composer,seconds,start_time,end_time,start_beat,end_beat,note
921999,0,1067,45952478,46009822,2577.0,1.958333,81.5
922000,0,1067,45981662,46009822,2578.0,0.958333,43.0
922002,0,1067,46010846,46037982,2579.0,0.958333,61.0
922009,0,1067,46039006,46103518,2580.0,1.958333,74.0
922016,0,1067,46141406,46219230,2583.0,1.958333,78.0


In [13]:
r2_tree_p = r2(y_test_p, y_pred_tree_p)
print('Tree regression r^2:', r2_tree_p)

Tree regression r^2: 0.21729437625217918


In [14]:
row = 1757  # change this to another song id to get the predicted notes for different songs
data_sample = entire_data[entire_data['id'] == row]
data_sample = data_sample.drop(['id', 'composition', 'movement', 'ensemble', 'instrument', 'note_value'], axis=1)
data_sample['composer'].replace({'Beethoven':0,'Bach':1, 'Schubert':2,'Mozart':3, 'Brahms':4,'Cambini':5, 'Dvorak':6,'Faure':7, 'Ravel':8,'Haydn':9},inplace=True)

In [15]:
X_sample = data_sample.drop(['note'], axis=1)
y_sample = data_sample['note']
y_pred_tree_sample = tree_p.predict(X_sample)
print('Tree regression r^2 for row ', row, ':', r2(y_sample, y_pred_tree_sample))

Tree regression r^2 for row  1757 : 0.6846842852833303


In [16]:
import numpy as np
y_pred_tree_sample = y_pred_tree_sample.astype(int)
notes_pred_sample = pd.Series(y_pred_tree_sample)

In [17]:
from midiutil.MidiFile import MIDIFile


# create a MIDI file
midi_file = MIDIFile(numTracks=1)
track = 0
channel = 0
time = 0

# add MIDI notes to the file

notes_sample = list(zip(notes_pred_sample, data_sample['start_time'], data_sample['end_time']))[:700]

timescale = 0.3

filename = "output.mid"

for i in range(len(notes_sample)):
    tup = notes_sample[i]
    start_new = ((tup[1])/10000)*timescale
    end_new = ((tup[2])/10000)*timescale
    notes_sample[i] = (tup[0], start_new, end_new)
velocity = 1
for note, start_time, end_time in notes_sample:
    duration = end_time - start_time
    midi_file.addNote(track, channel, note, start_time, duration, velocity)

# write the MIDI file
with open(filename, "wb") as output_file:
    midi_file.writeFile(output_file)


IndexError: pop from empty list