In [640]:
# Features that can determine the similarity between short (10s) midi files:
'''
tempo
number of notes
pitch
genre
------------------------- FEATURES FOR EVALUATING FILES SIMILARITY ---------------------------------
    
    1
    # empirical estimate of its global tempo
    # max probab Based on “Automatic Extraction of Tempo and Beat from Expressive Performance”, Dixon 2001
    midi_data.estimate_tempo()
    
    2
    estimate beat start
    midi_data.estimate_beat_start()
    
    3
    onset: beginning of a musical note or other sound
    times of all onsets (in s) of all notes from all instruments. May have duplicate entries
    midi_data.get_onsets()
    
    4
    histogram of pitches (notes)
    midi_data.get_pitch_class_histogram()
    
    5
    transition of pitches
    12x12 matrix when there's a silence > time_thresh
    midi_data.get_pitch_class_transition_matrix()
    
    6
    total velocity
    total_velocity = sum(sum(midi_data.get_chroma()))
    
    7
    proxy for key: relative amount of each semitone
    Compute the relative amount of each semitone across the entire song, a proxy for key
    print([sum(semitone)/total_velocity for semitone in midi_data.get_chroma()])
    
    print('\n')
    --------------------------------------- ANALYSIS -----------------------------------------
    
    1
    tempo()
    
    2
    beat_start()
    
    3
    get_onsets()
    
    4
    pitch
    
    5
    pitch_transition
    
    6
    total velocity
    
    7
    key: relative amount of each semitone
    
    print('\n')
    ------------------------------------ EXAMPLES & COMMENTS ---------------------------------------
    various random files selected for tuning the similarity measurement

yes
['04c4330daf5279b9b33200075fa0837a_17641.midi_9_1.midi',
 '04c4330daf5279b9b33200075fa0837a_17641.midi_9_3.midi',
 
 
yes
 '09aed5c312bff121c373e849af39f15c_11236.midi_6_1.midi',
 '09aed5c312bff121c373e849af39f15c_11236.midi_6_3.midi',
 
 
    70% quite similar
0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_1.midi
0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_3.midi

    
    90% same
0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_1.midi
0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_3.midi


    90% same
0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_1.midi
0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_3.midi


    10% not similar
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_1.midi
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_3.midi


    70% quite similar
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_4_1.midi
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_4_3.midi


    90% same
0bb8f4fc16294bf3094669844e33569a_1073.midi_7_1.midi same song 1
0bb8f4fc16294bf3094669844e33569a_1073.midi_7_3.midi


    90% same
0bb8f4fc16294bf3094669844e33569a_17917.midi_12_1.midi same song
0bb8f4fc16294bf3094669844e33569a_17917.midi_12_3.midi



    70% quite similar
0bb8f4fc16294bf3094669844e33569a_17917.midi_1_1.midi same song
0bb8f4fc16294bf3094669844e33569a_17917.midi_1_3.midi


    30% not similar
2de41ab084f34390266e0144397aa1c6_12062.midi_7_1.midi same song 2
2de41ab084f34390266e0144397aa1c6_12062.midi_7_3.midi


    50% resemblance
2de41ab084f34390266e0144397aa1c6_12062.midi_8_1.midi same song
2de41ab084f34390266e0144397aa1c6_12062.midi_8_3.midi


    30% not similar
2de41ab084f34390266e0144397aa1c6_12062.midi_9_1.midi same song
2de41ab084f34390266e0144397aa1c6_12062.midi_9_3.midi


    70% quite similar
2e690364b9409ee1d3f25be8266e9570_3.midi_0_1.midi same song 3
2e690364b9409ee1d3f25be8266e9570_3.midi_0_3.midi


    90% same
2e690364b9409ee1d3f25be8266e9570_3.midi_17_1.midi
2e690364b9409ee1d3f25be8266e9570_3.midi_17_3.midi


    50% resemblance
2e690364b9409ee1d3f25be8266e9570_3.midi_18_1.midi
2e690364b9409ee1d3f25be8266e9570_3.midi_18_3.midi


    90% same
2e690364b9409ee1d3f25be8266e9570_3.midi_19_1.midi
2e690364b9409ee1d3f25be8266e9570_3.midi_19_3.midi


    70% quite similar
2e690364b9409ee1d3f25be8266e9570_3.midi_1_1.midi
2e690364b9409ee1d3f25be8266e9570_3.midi_1_3.midi


    70% quite similar
2e690364b9409ee1d3f25be8266e9570_3.midi_20_1.midi
2e690364b9409ee1d3f25be8266e9570_3.midi_20_3.midi


    90% same
2e690364b9409ee1d3f25be8266e9570_3.midi_2_1.midi
2e690364b9409ee1d3f25be8266e9570_3.midi_2_3.midi


    70% quite similar
2e690364b9409ee1d3f25be8266e9570_3.midi_3_1.midi
2e690364b9409ee1d3f25be8266e9570_3.midi_3_3.midi

no
3a44ff379436cff87840aec33ab6ecf0_1490.midi_14_1.midi
3a44ff379436cff87840aec33ab6ecf0_1490.midi_14_3.midi

no
3a44ff379436cff87840aec33ab6ecf0_1490.midi_17_1.midi
3a44ff379436cff87840aec33ab6ecf0_1490.midi_17_3.midi

no
3a44ff379436cff87840aec33ab6ecf0_1490.midi_19_1.midi
3a44ff379436cff87840aec33ab6ecf0_1490.midi_19_3.midi


no
3bad77c0db7a79546b57a6b2cb14b16d_8023.midi_4_1.midi
3bad77c0db7a79546b57a6b2cb14b16d_8023.midi_4_3.midi

no
3bad77c0db7a79546b57a6b2cb14b16d_8023.midi_5_1.midi
3bad77c0db7a79546b57a6b2cb14b16d_8023.midi_5_3.midi

yes
3c347e39a6b213ee369f23715b4f4ffa_842.midi_11_1.midi
3c347e39a6b213ee369f23715b4f4ffa_842.midi_11_3.midi

no
4d71f018f3514ddeec0ae6df6e4feb6b_21007.midi_4_1.midi
4d71f018f3514ddeec0ae6df6e4feb6b_21007.midi_4_3.midi

no
4e19e3897f314d72146dbb2e5207d956_11585.midi_16_1.midi
4e19e3897f314d72146dbb2e5207d956_11585.midi_16_3.midi

yes
4e19e3897f314d72146dbb2e5207d956_11585.midi_8_1.midi
4e19e3897f314d72146dbb2e5207d956_11585.midi_8_3.midi

yes
4f58c70ca9d32429a5174081c633b31a_18384.midi_18_1.midi
4f58c70ca9d32429a5174081c633b31a_18384.midi_18_3.midi

no
5a65323fe3ba1c143a276ca06a0a1a3d_641.midi_7_1.midi
5a65323fe3ba1c143a276ca06a0a1a3d_641.midi_7_3.midi

yes
5a70b9b417abe327bb3e6da697536fd4_17551.midi_4_1.midi
5a70b9b417abe327bb3e6da697536fd4_17551.midi_4_3.midi

no
5ae731ae4008a65c0dd58c2d28bc9d74_15146.midi_9_1.midi
5ae731ae4008a65c0dd58c2d28bc9d74_15146.midi_9_3.midi

yes
5c224dbb4dffa02476c6fe3ac9d7877a_11092.midi_0_1.midi
5c224dbb4dffa02476c6fe3ac9d7877a_11092.midi_0_3.midi

no
5c224dbb4dffa02476c6fe3ac9d7877a_11092.midi_20_1.midi
5c224dbb4dffa02476c6fe3ac9d7877a_11092.midi_20_3.midi

yes
5c224dbb4dffa02476c6fe3ac9d7877a_11092.midi_3_1.midi
5c224dbb4dffa02476c6fe3ac9d7877a_11092.midi_3_3.midi

yes
5db87d8020936af2aa252dfa9c9fabca_6075.midi_2_1.midi
5db87d8020936af2aa252dfa9c9fabca_6075.midi_2_3.midi

no
5ed33ceaddeecdcf54b85dcb881f0da8_4924.midi_27_1.midi
5ed33ceaddeecdcf54b85dcb881f0da8_4924.midi_27_3.midi

no
5ef665905aee411922ed2c0b36faa7b9_15905.midi_1_1.midi
5ef665905aee411922ed2c0b36faa7b9_15905.midi_1_3.midi

yes
8f563713ae218b1b3d8c41af396f9e62_11570.midi_3_1.midi
8f563713ae218b1b3d8c41af396f9e62_11570.midi_3_3.midi

no
8fde61c9bb15fcf313880f4d50a2d53a_8542.midi_14_1.midi
8fde61c9bb15fcf313880f4d50a2d53a_8542.midi_14_3.midi

no
9bbe6eccf416db8ff9d326596e1161de_4822.midi_1_1.midi
9bbe6eccf416db8ff9d326596e1161de_4822.midi_1_3.midi

no
9ceaf57be69ee1b1633fd07e33ef17fc_11410.midi_18_1.midi
9ceaf57be69ee1b1633fd07e33ef17fc_11410.midi_18_3.midi

no
9ceaf57be69ee1b1633fd07e33ef17fc_1552.midi_12_1.midi
9ceaf57be69ee1b1633fd07e33ef17fc_1552.midi_12_3.midi
'''
import pretty_midi
import os
import numpy as np
import pandas as pd
from music21 import midi
from sklearn.model_selection import train_test_split

In [641]:
folder = 'lahk_midi_interpolation'
folder_sorted = sorted(os.listdir(folder))
directory = sorted(os.listdir(folder))
#print(directory)
for filename in folder_sorted:
        print(filename)
print('\n')
directory

04c4330daf5279b9b33200075fa0837a_17641.midi_9_1.midi
04c4330daf5279b9b33200075fa0837a_17641.midi_9_3.midi
09aed5c312bff121c373e849af39f15c_11236.midi_6_1.midi
09aed5c312bff121c373e849af39f15c_11236.midi_6_3.midi
0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_1.midi
0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_3.midi
0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_1.midi
0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_3.midi
0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_1.midi
0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_3.midi
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_1.midi
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_3.midi
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_4_1.midi
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_4_3.midi
0bb8f4fc16294bf3094669844e33569a_1073.midi_7_1.midi
0bb8f4fc16294bf3094669844e33569a_1073.midi_7_3.midi
0bb8f4fc16294bf3094669844e33569a_17917.midi_12_1.midi
0bb8f4fc16294bf3094669844e33569a_17917.midi_12_3.midi
0bb8f4fc16294bf3094669844e33569a_17917.midi_1_1.midi

['04c4330daf5279b9b33200075fa0837a_17641.midi_9_1.midi',
 '04c4330daf5279b9b33200075fa0837a_17641.midi_9_3.midi',
 '09aed5c312bff121c373e849af39f15c_11236.midi_6_1.midi',
 '09aed5c312bff121c373e849af39f15c_11236.midi_6_3.midi',
 '0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_1.midi',
 '0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_3.midi',
 '0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_1.midi',
 '0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_3.midi',
 '0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_1.midi',
 '0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_3.midi',
 '0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_1.midi',
 '0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_3.midi',
 '0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_4_1.midi',
 '0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_4_3.midi',
 '0bb8f4fc16294bf3094669844e33569a_1073.midi_7_1.midi',
 '0bb8f4fc16294bf3094669844e33569a_1073.midi_7_3.midi',
 '0bb8f4fc16294bf3094669844e33569a_17917.midi_12_1.midi',
 '0bb8f4fc16294bf3094669844e33569a_179

In [642]:
for i in range(0,len(directory)):
    # Song's name
    print(directory[i])
    # Load MIDI file into PrettyMIDI object
    midi_data = pretty_midi.PrettyMIDI('lahk_midi_interpolation_10s/'+directory[i])
    
    # Print an empirical estimate of its global tempo
    # max probab Based on “Automatic Extraction of Tempo and Beat from Expressive Performance”, Dixon 2001
    #print('tempo:',midi_data.estimate_tempo())
    #print('estimate beat start:',(midi_data.estimate_beat_start()))
    
    # onset: beginning of a musical note or other sound
    # times of all onsets (in s) of all notes from all instruments. May have duplicate entries
    #print('onsets:',(midi_data.get_onsets()))
    
    print('pitch histogram:',(midi_data.get_pitch_class_histogram(use_duration=True, use_velocity=True,
                                                                normalize=True).shape))
    
    # 12x12 matrix when there's a silence > time_thresh
    print('pitch transition:',(midi_data.get_pitch_class_transition_matrix(normalize=True, time_thresh=0.05).shape))
    
    # Compute the relative amount of each semitone across the entire song, a proxy for key
    total_velocity = sum(sum(midi_data.get_chroma()))
    #print('total velocity:',total_velocity)
    #print([sum(semitone)/total_velocity for semitone in midi_data.get_chroma()])
    
    print('\n')

04c4330daf5279b9b33200075fa0837a_17641.midi_9_1.midi
pitch histogram: (12,)
pitch transition: (12, 12)


04c4330daf5279b9b33200075fa0837a_17641.midi_9_3.midi
pitch histogram: (12,)
pitch transition: (12, 12)


09aed5c312bff121c373e849af39f15c_11236.midi_6_1.midi
pitch histogram: (12,)
pitch transition: (12, 12)


09aed5c312bff121c373e849af39f15c_11236.midi_6_3.midi
pitch histogram: (12,)
pitch transition: (12, 12)


0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_1.midi
pitch histogram: (12,)
pitch transition: (12, 12)


0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_3.midi
pitch histogram: (12,)
pitch transition: (12, 12)


0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_1.midi
pitch histogram: (12,)
pitch transition: (12, 12)


0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_3.midi
pitch histogram: (12,)
pitch transition: (12, 12)


0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_1.midi
pitch histogram: (12,)
pitch transition: (12, 12)


0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_3.midi
pitc

In [643]:
print(4%2)
for i in range(1,len(directory)+1):
    if i%2==0:
        # Song's name
        print(directory[i-2])
        print(directory[i-1])
        # Load MIDI file into PrettyMIDI object
        midi_data_1 = pretty_midi.PrettyMIDI('lahk_midi_interpolation_10s/'+directory[i-2])
        midi_data_2 = pretty_midi.PrettyMIDI('lahk_midi_interpolation_10s/'+directory[i-1])
        
        #print(onset_1.astype(int))

        print('\n')

0
04c4330daf5279b9b33200075fa0837a_17641.midi_9_1.midi
04c4330daf5279b9b33200075fa0837a_17641.midi_9_3.midi


09aed5c312bff121c373e849af39f15c_11236.midi_6_1.midi
09aed5c312bff121c373e849af39f15c_11236.midi_6_3.midi


0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_1.midi
0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_3.midi


0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_1.midi
0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_3.midi


0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_1.midi
0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_3.midi


0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_1.midi
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_3.midi


0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_4_1.midi
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_4_3.midi


0bb8f4fc16294bf3094669844e33569a_1073.midi_7_1.midi
0bb8f4fc16294bf3094669844e33569a_1073.midi_7_3.midi


0bb8f4fc16294bf3094669844e33569a_17917.midi_12_1.midi
0bb8f4fc16294bf3094669844e33569a_17917.midi_12_3.midi


0bb8f4fc16294bf3094669844e33569a

In [644]:
lst = []
for i in range(1,len(directory)+1):
    if i%2==0:
        #print(directory[i-2])
        #print(i)
        lst.append(directory[i-2])
len(lst)

45

In [590]:
d = {
    'name': [],
    'tempo': [],
    'onset': [],
    'onset_cov': [], 'onset_var': [], 'onset_std': [], 'onset_mean': [], 'onset_av': [],
    'onset_median': [], 'onset_perc': [], 'onset_ptp': [], 'onset_max': [], 'onset_min': [],
    'pitch_histogram':[],
    'pitch_histogram_cov': [], 'pitch_histogram_var': [], 'pitch_histogram_std': [], 'pitch_histogram_mean': [], 'pitch_histogram_av': [],
    'pitch_histogram_median': [], 'pitch_histogram_perc': [], 'pitch_histogram_ptp': [], 'pitch_histogram_max': [], 'pitch_histogram_min': [],
    'pitch_transition': [],
    'pitch_transition_cov': [], 'pitch_transition_var': [], 'pitch_transition_std': [], 'pitch_transition_mean': [], 'pitch_transition_av': [],
    'pitch_transition_median': [], 'pitch_transition_perc': [], 'pitch_transition_ptp': [], 'pitch_transition_max': [], 'pitch_transition_min': [],
    'velocity': [], 
    'key': [],
    
    '_onset': [],
    '_onset_cov': [], '_onset_var': [], '_onset_std': [], '_onset_mean': [], '_onset_av': [],
    '_onset_median': [], '_onset_perc': [], '_onset_ptp': [], '_onset_max': [], '_onset_min': [],
    '_pitch_histogram':[],
    '_pitch_histogram_cov': [], '_pitch_histogram_var': [], '_pitch_histogram_std': [], '_pitch_histogram_mean': [], '__pitch_histogram_av': [],
    '_pitch_histogram_median': [], '_pitch_histogram_perc': [], '_pitch_histogram_ptp': [], '_pitch_histogram_max': [], '_pitch_histogram_min': [],
    '_pitch_transition': [],
    '_pitch_transition_cov': [], '_pitch_transition_var': [], '_pitch_transition_std': [], '_pitch_transition_mean': [], '_pitch_transition_av': [],
    '_pitch_transition_median': [], '_pitch_transition_perc': [], '_pitch_transition_ptp': [], '_pitch_transition_max': [], '_pitch_transition_min': [],
    '_velocity': [], 
    '_key': [],
    'collectable': []
    } #create empty dictionary

In [677]:
# create empty arrays
tempo = []
onset_cov = []
onset_var = []
onset_std = []
onset_mean = []
onset_av = []
onset_median = []
onset_perc = []
onset_ptp = []
onset_max = []
onset_min = []

pitch_histogram = []
pitch_histogram_cov = []
pitch_histogram_var = []
pitch_histogram_std = []
pitch_histogram_mean = []
pitch_histogram_av = []
pitch_histogram_median = []
pitch_histogram_perc = []
pitch_histogram_ptp = []
pitch_histogram_max = []
pitch_histogram_min = []

pitch_transition = []
pitch_transition_cov = []
pitch_transition_var =[]
pitch_transition_std = []
pitch_transition_mean = []
pitch_transition_av = []
pitch_transition_median = []
pitch_transition_perc = []
pitch_transition_ptp = []
pitch_transition_max = []
pitch_transition_min = []
velocity = []
key = []

_tempo = []
_onset_cov = []
_onset_var = []
_onset_std = []
_onset_mean = []
_onset_av = []
_onset_median = []
_onset_perc = []
_onset_ptp = []
_onset_max = []
_onset_min = []

_pitch_histogram = []
_pitch_histogram_cov = []
_pitch_histogram_var = []
_pitch_histogram_std = []
_pitch_histogram_mean = []
_pitch_histogram_av = []
_pitch_histogram_median = []
_pitch_histogram_perc = []
_pitch_histogram_ptp = []
_pitch_histogram_max = []
_pitch_histogram_min = []

_pitch_transition = []
_pitch_transition_cov = []
_pitch_transition_var =[]
_pitch_transition_std = []
_pitch_transition_mean = []
_pitch_transition_av = []
_pitch_transition_median = []
_pitch_transition_perc = []
_pitch_transition_ptp = []
_pitch_transition_max = []
_pitch_transition_min = []
_velocity = []
_key = []

In [678]:
df = pd.DataFrame(data=lst)

for i in range(1,len(directory)+1):
    if i%2==0:
        song = directory[i-2]
        print(song)
        midi_data = pretty_midi.PrettyMIDI('lahk_midi_interpolation_10s/'+song)
        onset_1 = midi_data.get_onsets()
        pitch_histogram = midi_data.get_pitch_class_histogram(use_duration=True, use_velocity=True, normalize=True)
        pitch_transition = midi_data.get_pitch_class_transition_matrix(normalize=True, time_thresh=0.05)

        tempo.append(midi_data.estimate_tempo())
        onset_cov.append(np.cov(onset_1))
        onset_var.append(np.var(onset_1))
        onset_std.append(np.std(onset_1))
        onset_mean.append(np.mean(onset_1))
        onset_av.append(np.average(onset_1))
        onset_median.append(np.median(onset_1))
        onset_perc.append(np.percentile(onset_1,100))
        onset_ptp.append(np.ptp(onset_1))
        onset_max.append(np.amax(onset_1))
        onset_min.append(np.amin(onset_1))

        #pitch_histogram = (pitch_histogram)
        pitch_histogram_cov.append(np.cov(pitch_histogram))
        pitch_histogram_var.append(np.var(pitch_histogram))
        pitch_histogram_std.append(np.std(pitch_histogram))
        pitch_histogram_mean.append(np.mean(pitch_histogram))
        pitch_histogram_av.append(np.average(pitch_histogram))
        pitch_histogram_median.append(np.median(pitch_histogram))
        pitch_histogram_perc.append(np.percentile(pitch_histogram,100))
        pitch_histogram_ptp.append(np.ptp(pitch_histogram))
        pitch_histogram_max.append(np.amax(pitch_histogram))
        pitch_histogram_min.append(np.amin(pitch_histogram))

        #pitch_transition = (pitch_transition)
        pitch_transition_cov.append(np.cov(pitch_transition))
        pitch_transition_var.append(np.var(pitch_transition))
        pitch_transition_std.append(np.std(pitch_transition))
        pitch_transition_mean.append(np.mean(pitch_transition))
        pitch_transition_av.append(np.average(pitch_transition))
        pitch_transition_median.append(np.median(pitch_transition))
        pitch_transition_perc.append(np.percentile(pitch_transition,100))
        pitch_transition_ptp.append(np.ptp(pitch_transition))
        pitch_transition_max.append(np.amax(pitch_transition))
        pitch_transition_min.append(np.amin(pitch_transition))
        
        print('append of 1st song done')

        song = directory[i-1]
        print(song)
        midi_data = pretty_midi.PrettyMIDI('lahk_midi_interpolation_10s/'+song)
        onset_1 = midi_data.get_onsets()
        pitch_histogram = midi_data.get_pitch_class_histogram(use_duration=True, use_velocity=True, normalize=True)
        pitch_transition = midi_data.get_pitch_class_transition_matrix(normalize=True, time_thresh=0.05)

        _tempo.append(midi_data.estimate_tempo())
        _onset_cov.append(np.cov(onset_1))
        _onset_var.append(np.var(onset_1))
        _onset_std.append(np.std(onset_1))
        _onset_mean.append(np.mean(onset_1))
        _onset_av.append(np.average(onset_1))
        _onset_median.append(np.median(onset_1))
        _onset_perc.append(np.percentile(onset_1,100))
        _onset_ptp.append(np.ptp(onset_1))
        _onset_max.append(np.amax(onset_1))
        _onset_min.append(np.amin(onset_1))

        #pitch_histogram = (pitch_histogram)
        _pitch_histogram_cov.append(np.cov(pitch_histogram))
        _pitch_histogram_var.append(np.var(pitch_histogram))
        _pitch_histogram_std.append(np.std(pitch_histogram))
        _pitch_histogram_mean.append(np.mean(pitch_histogram))
        _pitch_histogram_av.append(np.average(pitch_histogram))
        _pitch_histogram_median.append(np.median(pitch_histogram))
        _pitch_histogram_perc.append(np.percentile(pitch_histogram,100))
        _pitch_histogram_ptp.append(np.ptp(pitch_histogram))
        _pitch_histogram_max.append(np.amax(pitch_histogram))
        _pitch_histogram_min.append(np.amin(pitch_histogram))

        #pitch_transition = (pitch_transition)
        _pitch_transition_cov.append(np.cov(pitch_transition))
        _pitch_transition_var.append(np.var(pitch_transition))
        _pitch_transition_std.append(np.std(pitch_transition))
        _pitch_transition_mean.append(np.mean(pitch_transition))
        _pitch_transition_av.append(np.average(pitch_transition))
        _pitch_transition_median.append(np.median(pitch_transition))
        _pitch_transition_perc.append(np.percentile(pitch_transition,100))
        _pitch_transition_ptp.append(np.ptp(pitch_transition))
        _pitch_transition_max.append(np.amax(pitch_transition))
        _pitch_transition_min.append(np.amin(pitch_transition))

        _total_velocity = sum(sum(midi_data.get_chroma()))
        _velocity.append(total_velocity)
        _key.append([sum(semitone)/total_velocity for semitone in midi_data.get_chroma()])
        
        print('append of 2nd song done')

df.insert(1, "tempo", tempo, True)
df.insert(2, "onset_cov", onset_cov, True)
df.insert(3, "onset_var", onset_var, True)
df.insert(4, "onset_std", onset_std, True)
df.insert(5, "onset_mean", onset_mean, True)
df.insert(6, "onset_av", onset_av, True)
df.insert(7, "onset_median", onset_median, True)
df.insert(8, "onset_perc", onset_perc, True)
df.insert(9, "onset_ptp", onset_ptp, True)
df.insert(10, "onset_max", onset_max, True)
df.insert(11, "onset_min", onset_min, True)

#df.insert(12, "pitch_histogram", pitch_histogram, True)
df.insert(12, "pitch_histogram_cov", pitch_histogram_cov, True)
df.insert(13, "pitch_histogram_var", pitch_histogram_var, True)
df.insert(14, "pitch_histogram_std", pitch_histogram_std, True)
df.insert(15, "pitch_histogram_mean", pitch_histogram_mean, True)
df.insert(16, "pitch_histogram_av", pitch_histogram_av, True)
df.insert(17, "pitch_histogram_median", pitch_histogram_median, True)
df.insert(18, "pitch_histogram_perc", pitch_histogram_perc, True)
df.insert(19, "pitch_histogram_ptp", pitch_histogram_ptp, True)
df.insert(20, "pitch_histogram_max", pitch_histogram_max, True)
df.insert(21, "pitch_histogram_min", pitch_histogram_min, True)

#df.insert(23, "pitch_transition", pitch_transition, True)
df.insert(22, "pitch_transition_cov", pitch_transition_cov, True)
df.insert(23, "pitch_transition_var", pitch_transition_var, True)
df.insert(24, "pitch_transition_std", pitch_transition_std, True)
df.insert(25, "pitch_transition_mean", pitch_transition_mean, True)
df.insert(26, "pitch_transition_av", pitch_transition_av, True)
df.insert(27, "pitch_transition_median", pitch_transition_median, True)
df.insert(28, "pitch_transition_perc", pitch_transition_perc, True)
df.insert(29, "pitch_transition_ptp", pitch_transition_ptp, True)
df.insert(30, "pitch_transition_max", pitch_transition_max, True)
df.insert(31, "pitch_transition_min", pitch_transition_min, True)

df.insert(32, "_tempo", _tempo, True)
df.insert(33, "_onset_cov", _onset_cov, True)
df.insert(34, "_onset_var", _onset_var, True)
df.insert(35, "_onset_std", _onset_std, True)
df.insert(36, "_onset_mean", _onset_mean, True)
df.insert(37, "_onset_av", _onset_av, True)
df.insert(38, "_onset_median", _onset_median, True)
df.insert(39, "_onset_perc", _onset_perc, True)
df.insert(40, "_onset_ptp", _onset_ptp, True)
df.insert(41, "_onset_max", _onset_max, True)
df.insert(42, "_onset_min", _onset_min, True)

#df.insert(12, "pitch_histogram", pitch_histogram, True)
df.insert(43, "_pitch_histogram_cov", _pitch_histogram_cov, True)
df.insert(44, "_pitch_histogram_var", _pitch_histogram_var, True)
df.insert(45, "_pitch_histogram_std", _pitch_histogram_std, True)
df.insert(46, "_pitch_histogram_mean", _pitch_histogram_mean, True)
df.insert(47, "_pitch_histogram_av", _pitch_histogram_av, True)
df.insert(48, "_pitch_histogram_median", _pitch_histogram_median, True)
df.insert(49, "_pitch_histogram_perc", _pitch_histogram_perc, True)
df.insert(50, "_pitch_histogram_ptp", _pitch_histogram_ptp, True)
df.insert(51, "_pitch_histogram_max", _pitch_histogram_max, True)
df.insert(52, "_pitch_histogram_min", _pitch_histogram_min, True)

#df.insert(23, "pitch_transition", pitch_transition, True)
df.insert(53, "_pitch_transition_cov", _pitch_transition_cov, True)
df.insert(54, "_pitch_transition_var", _pitch_transition_var, True)
df.insert(55, "_pitch_transition_std", _pitch_transition_std, True)
df.insert(56, "_pitch_transition_mean", _pitch_transition_mean, True)
df.insert(57, "_pitch_transition_av", _pitch_transition_av, True)
df.insert(58, "_pitch_transition_median", _pitch_transition_median, True)
df.insert(59, "_pitch_transition_perc", _pitch_transition_perc, True)
df.insert(60, "_pitch_transition_ptp", _pitch_transition_ptp, True)
df.insert(61, "_pitch_transition_max", _pitch_transition_max, True)
df.insert(62, "_pitch_transition_min", _pitch_transition_min, True)

df

04c4330daf5279b9b33200075fa0837a_17641.midi_9_1.midi
append of 1st song done
04c4330daf5279b9b33200075fa0837a_17641.midi_9_3.midi
append of 2nd song done
09aed5c312bff121c373e849af39f15c_11236.midi_6_1.midi
append of 1st song done
09aed5c312bff121c373e849af39f15c_11236.midi_6_3.midi
append of 2nd song done
0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_1.midi
append of 1st song done
0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_3.midi
append of 2nd song done
0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_1.midi
append of 1st song done
0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15_3.midi
append of 2nd song done
0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_1.midi
append of 1st song done
0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_3.midi
append of 2nd song done
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_1.midi
append of 1st song done
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_3.midi
append of 2nd song done
0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_4_1.midi
append of 1st song done
0b79

Unnamed: 0,0,tempo,onset_cov,onset_var,onset_std,onset_mean,onset_av,onset_median,onset_perc,onset_ptp,...,_pitch_transition_cov,_pitch_transition_var,_pitch_transition_std,_pitch_transition_mean,_pitch_transition_av,_pitch_transition_median,_pitch_transition_perc,_pitch_transition_ptp,_pitch_transition_max,_pitch_transition_min
0,04c4330daf5279b9b33200075fa0837a_17641.midi_9_...,140.425532,9.800836789302236,9.681314,3.111481,4.801275,4.801275,5.040909,10.229545,10.229545,...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.000741,0.027215,0.006944,0.006944,0.0,0.24,0.24,0.24,0.0
1,09aed5c312bff121c373e849af39f15c_11236.midi_6_...,240.0,10.179933358015548,10.042367,3.168969,4.891892,4.891892,5.125,10.0,10.0,...,"[[0.00021482182940516276, 0.0, 2.6304713804713...",0.000279,0.016692,0.006944,0.006944,0.0,0.111111,0.111111,0.111111,0.0
2,0a088b58f5924caea0b1ecff3f39a6fe_4726.midi_6_1...,197.858672,10.234100978596912,10.110799,3.179748,5.119743,5.119743,5.0,10.25,10.25,...,"[[0.0005543237250554324, 0.0, 0.00018252122654...",0.000311,0.01764,0.006944,0.006944,0.0,0.085366,0.085366,0.085366,0.0
3,0ac9dd1c27a0363cf2672924cc3df47d_21148.midi_15...,240.479893,9.315801681179634,9.246281,3.04077,5.110753,5.110753,5.229545,10.229545,10.229545,...,"[[0.0005630407720696588, 0.0, -8.7975120635884...",0.000245,0.015656,0.006944,0.006944,0.0,0.072289,0.072289,0.072289,0.0
4,0addb8ef21c3f6779009058dd2d96c78_5040.midi_9_1...,160.0,6.867660984848484,6.65955,2.58061,6.378788,6.378788,6.5,10.0,10.0,...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.000509,0.022562,0.006944,0.006944,0.0,0.111111,0.111111,0.111111,0.0
5,0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_0_1...,227.671541,8.099329876979263,7.994144,2.827392,4.200443,4.200443,4.020455,9.75,9.75,...,"[[0.0006674821127946126, 0.0, 0.0, 4.274515993...",0.000289,0.01701,0.006944,0.006944,0.0,0.104167,0.104167,0.104167,0.0
6,0b79c1cbe957b992dec2aba47cc08ec9_6004.midi_4_1...,77.723258,11.996115897187966,11.680429,3.417664,4.794438,4.794438,4.104545,10.084091,10.084091,...,"[[0.0016368672878368175, 0.0, 0.0, 6.295643414...",0.000327,0.01808,0.006944,0.006944,0.0,0.105263,0.105263,0.105263,0.0
7,0bb8f4fc16294bf3094669844e33569a_1073.midi_7_1...,215.803815,9.587916754349044,9.469547,3.077263,5.238496,5.238496,5.104545,10.354545,10.354545,...,"[[0.001080586720725129, 0.0, 0.000777672923001...",0.000595,0.024397,0.006944,0.006944,0.0,0.117647,0.117647,0.117647,0.0
8,0bb8f4fc16294bf3094669844e33569a_17917.midi_12...,168.0,9.44857834240774,9.285672,3.04724,5.106661,5.106661,5.020455,10.270455,10.25,...,"[[0.0013598570741427886, 0.0, 0.00091939806225...",0.000607,0.024647,0.006944,0.006944,0.0,0.114286,0.114286,0.114286,0.0
9,0bb8f4fc16294bf3094669844e33569a_17917.midi_1_...,168.0,9.29794313369631,9.137634,3.022852,5.101332,5.101332,5.084091,10.084091,10.0,...,"[[0.0014814814814814816, 0.0, 0.00208754208754...",0.001155,0.033992,0.006944,0.006944,0.0,0.2,0.2,0.2,0.0


In [679]:
collectable = [1,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,0,0,0]
df.insert(63, "collectable", collectable, True)

In [680]:
df = df.drop([31])

In [681]:
def is_unique(s):
    a = s.to_numpy() # s.values (pandas<0.24)
    return (a[0] == a[1:]).all()

is_unique(df['pitch_histogram_min'])
# False

False

In [682]:
print(df.columns)

Index([                         0,                    'tempo',
                      'onset_cov',                'onset_var',
                      'onset_std',               'onset_mean',
                       'onset_av',             'onset_median',
                     'onset_perc',                'onset_ptp',
                      'onset_max',                'onset_min',
            'pitch_histogram_cov',      'pitch_histogram_var',
            'pitch_histogram_std',     'pitch_histogram_mean',
             'pitch_histogram_av',   'pitch_histogram_median',
           'pitch_histogram_perc',      'pitch_histogram_ptp',
            'pitch_histogram_max',      'pitch_histogram_min',
           'pitch_transition_cov',     'pitch_transition_var',
           'pitch_transition_std',    'pitch_transition_mean',
            'pitch_transition_av',  'pitch_transition_median',
          'pitch_transition_perc',     'pitch_transition_ptp',
           'pitch_transition_max',     'pitch_transitio

In [683]:
df.drop(columns=0, inplace=True)

In [684]:
#df.drop(columns='_pitch_transition_min', inplace=True)
#df.drop(columns='_pitch_transition_median', inplace=True)
#df.drop(columns='pitch_transition_min', inplace=True)
#df.drop(columns='pitch_transition_median', inplace=True)
df.drop(columns='pitch_transition_cov', inplace=True)
df.drop(columns='_pitch_transition_cov', inplace=True)

In [705]:
train, test = train_test_split(df, test_size=0.2)
test['collectable']

23    0
26    1
14    0
34    1
37    1
11    1
30    1
29    1
32    1
Name: collectable, dtype: int64

In [707]:
target = 60
x_train = train.iloc[:,:target]
y_train = train.iloc[:,target]

x_test = test.iloc[:,:target]
y_test = test.iloc[:,target]
y_test.head()

23    0
26    1
14    0
34    1
37    1
Name: collectable, dtype: int64

In [708]:
y_test

23    0
26    1
14    0
34    1
37    1
11    1
30    1
29    1
32    1
Name: collectable, dtype: int64

In [709]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(units=60, activation='relu', input_dim=60))
model.add(Dense(units=30, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
# x_train and y_train are Numpy arrays --just like in the Scikit-Learn API.
model.fit(x_train, y_train, epochs=100, batch_size=5)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7f548c262950>

In [710]:
classes = model.predict(x_test, batch_size=5)

In [711]:
print(classes)

[[0.19187501]
 [0.08911729]
 [0.14280123]
 [0.1977286 ]
 [0.05811958]
 [0.94754106]
 [0.0692631 ]
 [0.04360723]
 [0.42443615]]


In [712]:
y_test

23    0
26    1
14    0
34    1
37    1
11    1
30    1
29    1
32    1
Name: collectable, dtype: int64