In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
%load_ext autoreload
%autoreload 2

## Read data

In [2]:
from sqlalchemy import create_engine
import pandas as pd

path = "../Data/wjazzd.db"
engine = create_engine(f"sqlite:///{path}")

melody = pd.read_sql("melody", engine)
beats = pd.read_sql("beats", engine)

df_melody = pd.DataFrame(melody)
df_beats = pd.DataFrame(beats)

## Fill the chords column with the last known chord

In [3]:
#df_chords = df_beats[df_beats['chord'] != '']
df_chords = df_beats.replace({'chord': {'': np.nan}}).ffill()
#df_chords.to_csv('df_chords.csv')
df_chords = df_beats.copy()
df_chords.info()
df_chords.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132329 entries, 0 to 132328
Data columns (total 10 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   beatid      132329 non-null  int64  
 1   melid       132329 non-null  int64  
 2   onset       132329 non-null  float64
 3   bar         132329 non-null  int64  
 4   beat        132329 non-null  int64  
 5   signature   132329 non-null  object 
 6   chord       132329 non-null  object 
 7   form        132329 non-null  object 
 8   bass_pitch  122540 non-null  float64
 9   chorus_id   132329 non-null  int64  
dtypes: float64(2), int64(5), object(3)
memory usage: 10.1+ MB


Unnamed: 0,beatid,melid,onset,bar,beat,signature,chord,form,bass_pitch,chorus_id
0,1,1,9.171882,-1,1,,,I1,42.0,0
1,2,1,9.488254,-1,2,,,,42.0,0
2,3,1,9.779955,-1,3,,,,40.0,0
3,4,1,10.052608,-1,4,,,,40.0,0
4,5,1,10.339796,0,1,,Bb6,,50.0,0
5,6,1,10.631542,0,2,,,,50.0,0
6,7,1,10.918163,0,3,,,,50.0,0
7,8,1,11.217007,0,4,,,,42.0,0
8,9,1,11.514127,1,1,4/4,Bb6,A1,42.0,1
9,10,1,11.82415,1,2,,,,42.0,1


## Extract corresponding row in beats using the (melid, bar, beat) features

In [4]:
# Define new index with the key (melid, bar, beat)
new_index = ['melid', 'bar', 'beat']
df_chords_new = df_chords.set_index(new_index, drop=True)
df_melody_new = df_melody.set_index(new_index, drop=True)

# Merge the dataframes using the new index and then reset the index again
df_beats_mel = df_chords_new.merge(df_melody_new, left_on=new_index, right_on=new_index, how='outer')
df_beats_mel = df_beats_mel.reset_index(drop=False)

df_beats_mel = df_beats_mel[['melid', 'bar', 'beat', 'beatid', 'onset_x', 'chord', 'bass_pitch', 'eventid', 'pitch', 'duration']]

df_beats_mel.head(30)


Unnamed: 0,melid,bar,beat,beatid,onset_x,chord,bass_pitch,eventid,pitch,duration
0,1,-1,1,1,9.171882,,42.0,,,
1,1,-1,2,2,9.488254,,42.0,,,
2,1,-1,3,3,9.779955,,40.0,,,
3,1,-1,4,4,10.052608,,40.0,,,
4,1,0,1,5,10.339796,Bb6,50.0,1.0,65.0,0.138776
5,1,0,2,6,10.631542,,50.0,2.0,63.0,0.171247
6,1,0,2,6,10.631542,,50.0,3.0,58.0,0.08127
7,1,0,3,7,10.918163,,50.0,4.0,61.0,0.235102
8,1,0,4,8,11.217007,,42.0,5.0,63.0,0.130612
9,1,1,1,9,11.514127,Bb6,42.0,6.0,58.0,0.188662


In [16]:
import sys
sys.path.append('../data')

from combine_melody_beats import encode_pitch
from multi_hot_encoding import encode_chords

## Note sequence encoding

In [22]:
beats_encoded = encode_chords(df_beats)
beats_encoded.drop(['signature', 'onset', 'form', 'chorus_id'], axis=1, inplace=True)
pitch_per_chord = encode_pitch(df_melody, beats_encoded, pitch_sequence=False)
pitch_sequence_per_chord = encode_pitch(df_melody, beats_encoded, pitch_sequence=True)

  beats['chord_info'] = beats['chord_info'].str.replace('+','')
  beats['chord_info'] = beats['chord_info'].str.replace('\/(.*)','')


In [23]:
pitch_per_chord.head(15)

Unnamed: 0,melid,bar,beat,beatid,chord,bass_pitch,root_pitch,mod,mod2,chord_info,complete_pitch,final_pitch,new_chord,minor,eventid,pitch,duration,pitch_encoded,bass_pitch_encoded
0,1,-1,1,1,-1,42.0,-1,-1,-1,-1,-1,-1,-1,0,-1.0,-1.0,-1.0,-1,6
1,1,-1,2,2,-1,42.0,-1,-1,-1,-1,-1,-1,-1,0,-1.0,-1.0,-1.0,-1,6
2,1,-1,3,3,-1,40.0,-1,-1,-1,-1,-1,-1,-1,0,-1.0,-1.0,-1.0,-1,4
3,1,-1,4,4,-1,40.0,-1,-1,-1,-1,-1,-1,-1,0,-1.0,-1.0,-1.0,-1,4
4,1,0,1,5,Bb6,50.0,B,b,b,6,Bb,A#,A#6,0,1.0,65.0,0.138776,5,2
5,1,0,2,6,Bb6,50.0,B,b,b,6,Bb,A#,A#6,0,2.0,63.0,0.171247,3,2
6,1,0,2,6,Bb6,50.0,B,b,b,6,Bb,A#,A#6,0,3.0,58.0,0.08127,10,2
7,1,0,3,7,Bb6,50.0,B,b,b,6,Bb,A#,A#6,0,4.0,61.0,0.235102,1,2
8,1,0,4,8,Bb6,42.0,B,b,b,6,Bb,A#,A#6,0,5.0,63.0,0.130612,3,6
9,1,1,1,9,Bb6,42.0,B,b,b,6,Bb,A#,A#6,0,6.0,58.0,0.188662,10,6


In [24]:
pitch_sequence_per_chord.head(15)

Unnamed: 0,melid,bar,beat,beatid,chord,bass_pitch,root_pitch,mod,mod2,chord_info,complete_pitch,final_pitch,new_chord,minor,eventid,pitch,duration,pitch_encoded,pitch_sequence,bass_pitch_sequence
3,1,-1,4,4,-1,40.0,-1,-1,-1,-1,-1,-1,-1,0,-1.0,-1.0,-1.0,-1,"[-1, -1, -1, -1]","[6, 6, 4, 4]"
10,1,1,2,10,Bb6,42.0,B,b,b,6,Bb,A#,A#6,0,7.0,58.0,0.481814,10,"[5, 3, 10, 1, 3, 10, 10]","[2, 2, 2, 2, 6, 6, 6]"
12,1,1,4,12,G7,43.0,G,7,b,7,G,G,G7,0,-1.0,-1.0,-1.0,-1,"[-1, -1]","[7, 7]"
14,1,2,2,14,C-7,40.0,C,-,b,7,C,C,C-7,0,-1.0,-1.0,-1.0,-1,"[-1, -1]","[3, 4]"
16,1,2,4,16,F7,42.0,F,7,b,7,F,F,F7,0,-1.0,-1.0,-1.0,-1,"[-1, -1]","[5, 6]"
18,1,3,2,18,Bb,39.0,B,b,b,7,Bb,A#,A#,0,-1.0,-1.0,-1.0,-1,"[-1, -1]","[2, 3]"
21,1,3,4,20,G-7,42.0,G,-,b,7,G,G,G-7,0,10.0,60.0,0.110295,0,"[2, 9, 0]","[5, 6, 6]"
24,1,4,2,22,C-7,38.0,C,-,b,7,C,C,C-7,0,13.0,58.0,0.089977,10,"[10, 7, 10]","[7, 7, 2]"
27,1,4,4,24,F7,33.0,F,7,b,7,F,F,F7,0,16.0,58.0,0.087075,10,"[1, 0, 10]","[3, 9, 9]"
29,1,5,2,26,F-7,32.0,F,-,b,7,F,F,F-7,0,18.0,58.0,0.133515,10,"[0, 10]","[10, 8]"
