In [27]:
# Useful starting lines
%matplotlib inline
import numpy as np
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Read data

In [28]:
from sqlalchemy import create_engine
import pandas as pd

path = "../Data/wjazzd.db"
engine = create_engine(f"sqlite:///{path}")

melody = pd.read_sql("melody", engine)
beats = pd.read_sql("beats", engine)

df_melody = pd.DataFrame(melody)
df_beats = pd.DataFrame(beats)

## Fill the chords column with the last known chord

In [29]:
#df_chords = df_beats[df_beats['chord'] != '']
df_chords = df_beats.replace({'chord': {'': np.nan}}).ffill()
#df_chords.to_csv('df_chords.csv')
df_chords = df_beats.copy()
df_chords.info()
df_chords.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132329 entries, 0 to 132328
Data columns (total 10 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   beatid      132329 non-null  int64  
 1   melid       132329 non-null  int64  
 2   onset       132329 non-null  float64
 3   bar         132329 non-null  int64  
 4   beat        132329 non-null  int64  
 5   signature   132329 non-null  object 
 6   chord       132329 non-null  object 
 7   form        132329 non-null  object 
 8   bass_pitch  122540 non-null  float64
 9   chorus_id   132329 non-null  int64  
dtypes: float64(2), int64(5), object(3)
memory usage: 10.1+ MB


Unnamed: 0,beatid,melid,onset,bar,beat,signature,chord,form,bass_pitch,chorus_id
0,1,1,9.171882,-1,1,,,I1,42.0,0
1,2,1,9.488254,-1,2,,,,42.0,0
2,3,1,9.779955,-1,3,,,,40.0,0
3,4,1,10.052608,-1,4,,,,40.0,0
4,5,1,10.339796,0,1,,Bb6,,50.0,0
5,6,1,10.631542,0,2,,,,50.0,0
6,7,1,10.918163,0,3,,,,50.0,0
7,8,1,11.217007,0,4,,,,42.0,0
8,9,1,11.514127,1,1,4/4,Bb6,A1,42.0,1
9,10,1,11.82415,1,2,,,,42.0,1


## Extract corresponding row in beats using the (melid, bar, beat) features

In [30]:
# Define new index with the key (melid, bar, beat)
new_index = ['melid', 'bar', 'beat']
df_chords_new = df_chords.set_index(new_index, drop=True)
df_melody_new = df_melody.set_index(new_index, drop=True)

# Merge the dataframes using the new index and then reset the index again
df_beats_mel = df_chords_new.merge(df_melody_new, left_on=new_index, right_on=new_index, how='outer')
df_beats_mel = df_beats_mel.reset_index(drop=False)

df_beats_mel = df_beats_mel[['melid', 'bar', 'beat', 'beatid', 'onset_x', 'chord', 'bass_pitch', 'eventid', 'pitch', 'duration']]

df_beats_mel.head(15)


Unnamed: 0,melid,bar,beat,beatid,onset_x,chord,bass_pitch,eventid,pitch,duration
0,1,-1,1,1,9.171882,,42.0,,,
1,1,-1,2,2,9.488254,,42.0,,,
2,1,-1,3,3,9.779955,,40.0,,,
3,1,-1,4,4,10.052608,,40.0,,,
4,1,0,1,5,10.339796,Bb6,50.0,1.0,65.0,0.138776
5,1,0,2,6,10.631542,,50.0,2.0,63.0,0.171247
6,1,0,2,6,10.631542,,50.0,3.0,58.0,0.08127
7,1,0,3,7,10.918163,,50.0,4.0,61.0,0.235102
8,1,0,4,8,11.217007,,42.0,5.0,63.0,0.130612
9,1,1,1,9,11.514127,Bb6,42.0,6.0,58.0,0.188662


In [31]:
df_beats_mel.loc[df_beats_mel['melid']==2].head(15)

Unnamed: 0,melid,bar,beat,beatid,onset_x,chord,bass_pitch,eventid,pitch,duration
735,2,1,1,530,2.78059,Eb7,34.0,,,
736,2,1,2,531,3.105669,,36.0,531.0,70.0,0.119002
737,2,1,3,532,3.413333,,37.0,532.0,73.0,0.156735
738,2,1,3,532,3.413333,,37.0,533.0,75.0,0.110295
739,2,1,4,533,3.72542,,38.0,,,
740,2,2,1,534,4.032653,Ab7,39.0,534.0,63.0,0.08127
741,2,2,2,535,4.35102,,40.0,535.0,66.0,0.148027
742,2,2,3,536,4.669388,,41.0,536.0,68.0,0.557279
743,2,2,4,537,4.99161,,41.0,,,
744,2,3,1,538,5.297959,Eb7,42.0,,,


In [32]:
df_beats_mel.loc[df_beats_mel['melid']==3].head(15)

Unnamed: 0,melid,bar,beat,beatid,onset_x,chord,bass_pitch,eventid,pitch,duration
1590,3,0,1,1061,2.052063,C7alt,48.0,1197.0,64.0,0.103039
1591,3,0,1,1061,2.052063,C7alt,48.0,1198.0,60.0,0.062404
1592,3,0,2,1062,2.487438,,36.0,1199.0,58.0,0.111746
1593,3,0,2,1062,2.487438,,36.0,1200.0,62.0,0.12771
1594,3,0,3,1063,2.893878,,37.0,1201.0,60.0,0.119002
1595,3,0,3,1063,2.893878,,37.0,1202.0,58.0,0.091429
1596,3,0,3,1063,2.893878,,37.0,1203.0,61.0,0.095782
1597,3,0,4,1064,3.330612,,36.0,1204.0,63.0,0.111746
1598,3,0,4,1064,3.330612,,36.0,1205.0,61.0,0.081633
1599,3,0,4,1064,3.330612,,36.0,1206.0,60.0,0.077551


In [33]:
import sys
sys.path.append('../data')

from combine_melody_beats import encode_pitch
from multi_hot_encoding import preprocess_chords

## Note sequence encoding

In [43]:
beats_encoded = preprocess_chords(df_beats)
#beats_encoded.drop(['signature', 'onset', 'form', 'chorus_id'], axis=1, inplace=True)
pitch_per_chord = encode_pitch(df_melody, beats_encoded, pitch_sequence=False)
pitch_sequence_per_chord = encode_pitch(df_melody, beats_encoded, pitch_sequence=True)

  beats['chord_info'] = beats['chord_info'].str.replace('\/(.*)','')


In [44]:
pitch_per_chord.head(15)

Unnamed: 0,melid,bar,beat,beatid,chord,final_pitch,chord_info,bass_pitch,new_chord,mode,extra_note,eventid,pitch,duration,pitch_encoded,bass_pitch_encoded
0,1,-1,1,1,-1,-1,-1,42.0,-1,0,-1,-1.0,-1.0,-1.0,-1,6
1,1,-1,2,2,-1,-1,-1,42.0,-1,0,-1,-1.0,-1.0,-1.0,-1,6
2,1,-1,3,3,-1,-1,-1,40.0,-1,0,-1,-1.0,-1.0,-1.0,-1,4
3,1,-1,4,4,-1,-1,-1,40.0,-1,0,-1,-1.0,-1.0,-1.0,-1,4
4,1,0,1,5,Bb6,A#,6,50.0,A#6,0,6,1.0,65.0,0.1388,5,2
5,1,0,2,6,Bb6,A#,6,50.0,A#6,0,6,2.0,63.0,0.1712,3,2
6,1,0,2,6,Bb6,A#,6,50.0,A#6,0,6,3.0,58.0,0.0813,10,2
7,1,0,3,7,Bb6,A#,6,50.0,A#6,0,6,4.0,61.0,0.2351,1,2
8,1,0,4,8,Bb6,A#,6,42.0,A#6,0,6,5.0,63.0,0.1306,3,6
9,1,1,1,9,Bb6,A#,6,42.0,A#6,0,6,6.0,58.0,0.1887,10,6


In [75]:
pitch_sequence_per_chord.head(15)

Unnamed: 0,melid,bar,beat,beatid,chord,final_pitch,chord_info,bass_pitch,new_chord,mode,extra_note,eventid,pitch,duration,pitch_encoded,pitch_sequence,bass_pitch_sequence,duration_sequence
3,1,-1,4,4,-1,-1,-1,40.0,-1,0,-1,-1.0,-1.0,-1.0,-1,"[-1, -1, -1, -1]","[6, 6, 4, 4]","[-1.0, -1.0, -1.0, -1.0]"
10,1,1,2,10,Bb6,A#,6,42.0,A#6,0,6,7.0,58.0,0.4818,10,"[5, 3, 10, 1, 3, 10, 10]","[2, 2, 2, 2, 6, 6, 6]","[0.1388, 0.1712, 0.0813, 0.2351, 0.1306, 0.188..."
12,1,1,4,12,G7,G,7,43.0,G7,0,7,-1.0,-1.0,-1.0,-1,"[-1, -1]","[7, 7]","[-1.0, -1.0]"
14,1,2,2,14,C-7,C,-7,40.0,C-7,0,7,-1.0,-1.0,-1.0,-1,"[-1, -1]","[3, 4]","[-1.0, -1.0]"
16,1,2,4,16,F7,F,7,42.0,F7,0,7,-1.0,-1.0,-1.0,-1,"[-1, -1]","[5, 6]","[-1.0, -1.0]"
18,1,3,2,18,Bb,A#,7,39.0,A#,0,7,-1.0,-1.0,-1.0,-1,"[-1, -1]","[2, 3]","[-1.0, -1.0]"
21,1,3,4,20,G-7,G,-7,42.0,G-7,0,7,10.0,60.0,0.1103,0,"[2, 9, 0]","[5, 6, 6]","[0.1596, 0.1451, 0.1103]"
24,1,4,2,22,C-7,C,-7,38.0,C-7,0,7,13.0,58.0,0.09,10,"[10, 7, 10]","[7, 7, 2]","[0.1306, 0.0972, 0.09]"
27,1,4,4,24,F7,F,7,33.0,F7,0,7,16.0,58.0,0.0871,10,"[1, 0, 10]","[3, 9, 9]","[0.283, 0.1509, 0.0871]"
29,1,5,2,26,F-7,F,-7,32.0,F-7,0,7,18.0,58.0,0.1335,10,"[0, 10]","[10, 8]","[0.3837, 0.1335]"


In [37]:
pitch_sequence_per_chord.loc[pitch_sequence_per_chord['melid']==2].head(15)

Unnamed: 0,melid,bar,beat,beatid,chord,final_pitch,chord_info,bass_pitch,new_chord,mode,extra_note,eventid,pitch,duration,pitch_encoded,pitch_sequence,bass_pitch_sequence,duration_sequence
739,2,1,4,533,Eb7,D#,7,38.0,D#7,0,7,-1.0,-1.0,-1.0,-1,"[-1, 10, 1, 3, -1]","[10, 0, 1, 1, 2]","[-1.0, 0.11900226757369614, 0.156734693877551,..."
743,2,2,4,537,Ab7,G#,7,41.0,G#7,0,7,-1.0,-1.0,-1.0,-1,"[3, 6, 8, -1]","[3, 4, 5, 5]","[0.08126984126984127, 0.14802721088435375, 0.5..."
752,2,4,4,545,Eb7,D#,7,37.0,D#7,0,7,541.0,70.0,0.072562,10,"[-1, 3, 7, 10, -1, -1, -1, 7, 10]","[6, 7, 8, 8, 2, 1, 1, 1, 1]","[-1.0, 0.08417233560090703, 0.1770521541950113..."
762,2,6,4,553,Ab7,G#,7,43.0,G#7,0,7,548.0,73.0,0.08127,1,"[-1, 10, 3, 10, -1, 10, 1, 3, -1, 1]","[0, 0, 0, 0, 0, 10, 10, 10, 9, 7]","[-1.0, 0.1102947845804998, 0.18866213151927438..."
772,2,8,4,561,Eb7,D#,7,37.0,D#7,0,7,554.0,73.0,0.159637,1,"[5, 10, -1, -1, -1, -1, 10, 3, 10, 1]","[7, 7, 8, 10, 11, 0, 4, 4, 7, 1]","[0.18866213151927438, 0.14222222222222222, -1...."
778,2,9,4,565,F-7,F,-7,39.0,F-7,0,7,559.0,68.0,0.644354,8,"[-1, 10, 3, 1, 10, 8]","[0, 7, 5, 5, 3, 3]","[-1.0, 0.12480725623582767, 0.1654421768707483..."
782,2,10,4,569,Bb7,A#,7,43.0,A#7,0,7,561.0,68.0,0.159637,8,"[-1, -1, 10, 8]","[2, 0, 10, 7]","[-1.0, -1.0, 0.47020408163265304, 0.1596371882..."
788,2,11,4,573,Eb7,D#,7,43.0,D#7,0,7,-1.0,-1.0,-1.0,-1,"[5, 7, 3, 1, 3, -1]","[7, 7, 11, 11, 0, 7]","[0.0783673469387755, 0.23219954648526078, 0.20..."
792,2,12,4,577,Bb7,A#,7,43.0,A#7,0,7,-1.0,-1.0,-1.0,-1,"[-1, -1, -1, -1]","[5, 7, 8, 7]","[-1.0, -1.0, -1.0, -1.0]"
799,2,13,4,581,Eb7,D#,7,43.0,D#7,0,7,573.0,75.0,0.432472,3,"[3, 3, 3, 7, 10, 0, 3]","[3, 3, 3, 8, 8, 7, 7]","[0.11319727891156463, 0.11319727891156463, 0.1..."


In [38]:
pitch_sequence_per_chord.loc[pitch_sequence_per_chord['melid']==3].head(15)

Unnamed: 0,melid,bar,beat,beatid,chord,final_pitch,chord_info,bass_pitch,new_chord,mode,extra_note,eventid,pitch,duration,pitch_encoded,pitch_sequence,bass_pitch_sequence,duration_sequence
1600,3,0,4,1064,C7alt,C,7alt,36.0,C7alt,0,7,1207.0,58.0,0.058163,10,"[4, 0, 10, 2, 0, 10, 1, 3, 1, 0, 10]","[0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0]","[0.10303854875283447, 0.062403628117913834, 0...."
1616,3,2,4,1072,Fj7,F,7,38.0,F7,0,7,1222.0,72.0,0.127551,0,"[9, -1, 0, 9, 0, 4, 7, 4, 0, 4, 7, 4, 5, 9, 2, 0]","[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 2, 2]","[0.11938775510204082, -1.0, 0.1088435374149659..."
1627,3,4,4,1080,G7alt,G,7alt,43.0,G7alt,0,7,1231.0,67.0,0.093878,7,"[-1, -1, 7, 9, 11, 3, 11, 2, 11, 2, 7]","[7, 2, 11, 0, 0, 2, 7, 7, 7, 7, 7]","[-1.0, -1.0, 0.09287981859410431, 0.1044897959..."
1634,3,5,4,1084,G-7,G,-7,43.0,G-7,0,7,1236.0,64.0,0.114286,4,"[-1, -1, 10, 9, 7, 5, 4]","[7, 7, 1, 1, 7, 7, 7]","[-1.0, -1.0, 0.08061224489795918, 0.1010204081..."
1643,3,6,4,1088,C7,C,7,34.0,C7,0,7,1245.0,60.0,0.107392,0,"[7, 5, 4, 2, 0, 0, 2, 7, 0]","[0, 0, 0, 0, 10, 10, 10, 10, 10]","[0.10102040816326531, 0.10612244897959183, 0.0..."
1648,3,7,4,1092,Am7b5,A,-7b5,33.0,A-7b5,0,7b5,-1.0,-1.0,-1.0,-1,"[-1, -1, 9, 0, -1]","[9, 9, 9, 9, 9]","[-1.0, -1.0, 0.05950113378684807, 0.1523809523..."
1660,3,8,4,1096,D79b,D,7,38.0,D7,0,7,1258.0,57.0,0.090703,9,"[-1, 3, 2, 0, 10, 8, 9, 0, 9, 10, 7, 9]","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]","[-1.0, 0.08562358276643992, 0.0798185941043083..."
1665,3,9,4,1100,G-7,G,-7,38.0,G-7,0,7,-1.0,-1.0,-1.0,-1,"[-1, 9, 0, 10, -1]","[10, 10, 2, 2, 2]","[-1.0, 0.12448979591836734, 0.2204081632653061..."
1672,3,10,4,1104,A79b,A,7,40.0,A7,0,7,1267.0,64.0,0.210204,4,"[-1, 9, 7, 10, 9, 3, 4]","[9, 9, 9, 4, 4, 4, 4]","[-1.0, 0.23265306122448978, 0.1, 0.24081632653..."
1680,3,11,4,1108,D7,D,7,38.0,D7,0,7,1274.0,62.0,0.057143,2,"[6, 9, 6, -1, 2, 4, 1, 2]","[2, 2, 2, 9, 2, 2, 2, 2]","[0.3795918367346939, 0.24081632653061225, 0.21..."


## Create tuple with pitch and duration