In [35]:
# Useful starting lines
%matplotlib inline
import numpy as np
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Read data

In [36]:
from sqlalchemy import create_engine
import pandas as pd

path = "../Data/wjazzd.db"
engine = create_engine(f"sqlite:///{path}")

melody = pd.read_sql("melody", engine)
beats = pd.read_sql("beats", engine)

df_melody = pd.DataFrame(melody)
df_beats = pd.DataFrame(beats)

## Fill the chords column with the last known chord

In [37]:
#df_chords = df_beats[df_beats['chord'] != '']
df_chords = df_beats.replace({'chord': {'': np.nan}}).ffill()
#df_chords.to_csv('df_chords.csv')
df_chords = df_beats.copy()
df_chords.info()
df_chords.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132329 entries, 0 to 132328
Data columns (total 10 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   beatid      132329 non-null  int64  
 1   melid       132329 non-null  int64  
 2   onset       132329 non-null  float64
 3   bar         132329 non-null  int64  
 4   beat        132329 non-null  int64  
 5   signature   132329 non-null  object 
 6   chord       132329 non-null  object 
 7   form        132329 non-null  object 
 8   bass_pitch  122540 non-null  float64
 9   chorus_id   132329 non-null  int64  
dtypes: float64(2), int64(5), object(3)
memory usage: 10.1+ MB


Unnamed: 0,beatid,melid,onset,bar,beat,signature,chord,form,bass_pitch,chorus_id
0,1,1,9.171882,-1,1,,,I1,42.0,0
1,2,1,9.488254,-1,2,,,,42.0,0
2,3,1,9.779955,-1,3,,,,40.0,0
3,4,1,10.052608,-1,4,,,,40.0,0
4,5,1,10.339796,0,1,,Bb6,,50.0,0
5,6,1,10.631542,0,2,,,,50.0,0
6,7,1,10.918163,0,3,,,,50.0,0
7,8,1,11.217007,0,4,,,,42.0,0
8,9,1,11.514127,1,1,4/4,Bb6,A1,42.0,1
9,10,1,11.82415,1,2,,,,42.0,1


## Extract corresponding row in beats using the (melid, bar, beat) features

In [38]:
# Define new index with the key (melid, bar, beat)
new_index = ['melid', 'bar', 'beat']
df_chords_new = df_chords.set_index(new_index, drop=True)
df_melody_new = df_melody.set_index(new_index, drop=True)

# Merge the dataframes using the new index and then reset the index again
df_beats_mel = df_chords_new.merge(df_melody_new, left_on=new_index, right_on=new_index, how='outer')
df_beats_mel = df_beats_mel.reset_index(drop=False)

df_beats_mel = df_beats_mel[['melid', 'bar', 'beat', 'beatid', 'onset_x', 'chord', 'bass_pitch', 'eventid', 'pitch', 'duration']]

df_beats_mel.head(30)


Unnamed: 0,melid,bar,beat,beatid,onset_x,chord,bass_pitch,eventid,pitch,duration
0,1,-1,1,1,9.171882,,42.0,,,
1,1,-1,2,2,9.488254,,42.0,,,
2,1,-1,3,3,9.779955,,40.0,,,
3,1,-1,4,4,10.052608,,40.0,,,
4,1,0,1,5,10.339796,Bb6,50.0,1.0,65.0,0.138776
5,1,0,2,6,10.631542,,50.0,2.0,63.0,0.171247
6,1,0,2,6,10.631542,,50.0,3.0,58.0,0.08127
7,1,0,3,7,10.918163,,50.0,4.0,61.0,0.235102
8,1,0,4,8,11.217007,,42.0,5.0,63.0,0.130612
9,1,1,1,9,11.514127,Bb6,42.0,6.0,58.0,0.188662


In [39]:
# Encode pitch
df_beats_mel['pitch_encoded'] = np.mod(df_beats_mel['pitch'], 12)
df_beats_mel['bass_pitch_encoded'] = np.mod(df_beats_mel['bass_pitch'], 12)

#df_beats_mel['pitch_encoded'] = df_beats_mel['pitch_encoded'].astype(int)
#df_beats_mel['bass_pitch_encoded'] = df_beats_mel['bass_pitch_encoded'].astype(int)

## Encode sequence of pitch for every chord
# Add column that represent chord changes
df_beats_mel['chord_changed'] = (df_beats_mel['chord'].shift() != df_beats_mel["chord"]).cumsum()

# Group chord changes to get sequences
pitch_sequences = [g['pitch_encoded'].tolist() for k, g in df_beats_mel.groupby('chord_changed')]
bass_pitch_sequences = [g['bass_pitch_encoded'].tolist() for k, g in df_beats_mel.groupby('chord_changed')]

# Identify last row of current chord
df_beats_mel['pitch_sequence'] = (df_beats_mel['chord'].shift(-1) != df_beats_mel["chord"])

# Change type to type object to add list to cell
df_beats_mel['pitch_sequence'] = df_beats_mel['pitch_sequence'].astype(object)
df_beats_mel['bass_pitch_sequence'] = df_beats_mel['pitch_sequence']

# Set sequence to last chord
# TODO decrease running time if possible
# TODO discuss where the sequence should be added
for idx, _ in df_beats_mel.iterrows():
    if df_beats_mel.at[idx, 'pitch_sequence'] == True:
        df_beats_mel.at[idx, 'pitch_sequence'] = pitch_sequences.pop(0)
        df_beats_mel.at[idx, 'bass_pitch_sequence'] = bass_pitch_sequences.pop(0)

# Drop useless columns and rows
# TODO discuss if rows should be dropped or not
df_beats_mel.drop(['pitch_encoded', 'bass_pitch_encoded', 'chord_changed'], axis=1, inplace=True)
df_beats_mel.drop(df_beats_mel[df_beats_mel['pitch_sequence'] == False].index, inplace=True)

df_beats_mel.to_csv('../Data/df_beats_mel.csv')

df_beats_mel.info()
df_beats_mel.head(20)


<class 'pandas.core.frame.DataFrame'>
Int64Index: 59822 entries, 3 to 235638
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   melid                59822 non-null  int64  
 1   bar                  59822 non-null  int64  
 2   beat                 59822 non-null  int64  
 3   beatid               59822 non-null  int64  
 4   onset_x              59822 non-null  float64
 5   chord                59822 non-null  object 
 6   bass_pitch           57333 non-null  float64
 7   eventid              45639 non-null  float64
 8   pitch                45639 non-null  float64
 9   duration             45639 non-null  float64
 10  pitch_sequence       59822 non-null  object 
 11  bass_pitch_sequence  59822 non-null  object 
dtypes: float64(5), int64(4), object(3)
memory usage: 5.9+ MB


Unnamed: 0,melid,bar,beat,beatid,onset_x,chord,bass_pitch,eventid,pitch,duration,pitch_sequence,bass_pitch_sequence
3,1,-1,4,4,10.052608,,40.0,,,,"[nan, nan, nan, nan]","[6.0, 6.0, 4.0, 4.0]"
4,1,0,1,5,10.339796,Bb6,50.0,1.0,65.0,0.138776,[5.0],[2.0]
8,1,0,4,8,11.217007,,42.0,5.0,63.0,0.130612,"[3.0, 10.0, 1.0, 3.0]","[2.0, 2.0, 2.0, 6.0]"
9,1,1,1,9,11.514127,Bb6,42.0,6.0,58.0,0.188662,[10.0],[6.0]
10,1,1,2,10,11.82415,,42.0,7.0,58.0,0.481814,[10.0],[6.0]
11,1,1,3,11,12.129433,G7,43.0,,,,[nan],[7.0]
12,1,1,4,12,12.414785,,43.0,,,,[nan],[7.0]
13,1,2,1,13,12.703628,C-7,39.0,,,,[nan],[3.0]
14,1,2,2,14,12.98356,,40.0,,,,[nan],[4.0]
15,1,2,3,15,13.290816,F7,41.0,,,,[nan],[5.0]


In [40]:
import sys
sys.path.append('../')

from combine_melody_beats import encode_pitch

## Note sequence encoding

In [41]:
pitch_per_chord = encode_pitch(df_melody, df_beats, pitch_sequence=False)
pitch_sequence_per_chord = encode_pitch(df_melody, df_beats, pitch_sequence=True)

In [42]:
pitch_per_chord.head(15)

Unnamed: 0,melid,bar,beat,beatid,onset,signature,chord,form,bass_pitch,chorus_id,eventid,pitch,duration,pitch_encoded,bass_pitch_encoded
0,1,-1,1,1,9.171882,,,I1,42.0,0,,,,,6.0
1,1,-1,2,2,9.488254,,,I1,42.0,0,,,,,6.0
2,1,-1,3,3,9.779955,,,I1,40.0,0,,,,,4.0
3,1,-1,4,4,10.052608,,,I1,40.0,0,,,,,4.0
4,1,0,1,5,10.339796,,Bb6,I1,50.0,0,1.0,65.0,0.138776,5.0,2.0
5,1,0,2,6,10.631542,,Bb6,I1,50.0,0,2.0,63.0,0.171247,3.0,2.0
6,1,0,2,6,10.631542,,Bb6,I1,50.0,0,3.0,58.0,0.08127,10.0,2.0
7,1,0,3,7,10.918163,,Bb6,I1,50.0,0,4.0,61.0,0.235102,1.0,2.0
8,1,0,4,8,11.217007,,Bb6,I1,42.0,0,5.0,63.0,0.130612,3.0,6.0
9,1,1,1,9,11.514127,4/4,Bb6,A1,42.0,1,6.0,58.0,0.188662,10.0,6.0


In [43]:
pitch_sequence_per_chord.head(15)

Unnamed: 0,melid,bar,beat,beatid,onset,signature,chord,form,bass_pitch,chorus_id,eventid,pitch,duration,pitch_sequence,bass_pitch_sequence
0,1,-1,1,1,9.171882,,,I1,42.0,0,,,,[nan],[6.0]
1,1,-1,2,2,9.488254,,,I1,42.0,0,,,,[nan],[6.0]
2,1,-1,3,3,9.779955,,,I1,40.0,0,,,,[nan],[4.0]
3,1,-1,4,4,10.052608,,,I1,40.0,0,,,,[nan],[4.0]
10,1,1,2,10,11.82415,4/4,Bb6,A1,42.0,1,7.0,58.0,0.481814,"[5.0, 3.0, 10.0, 1.0, 3.0, 10.0, 10.0]","[2.0, 2.0, 2.0, 2.0, 6.0, 6.0, 6.0]"
12,1,1,4,12,12.414785,4/4,G7,A1,43.0,1,,,,"[nan, nan]","[7.0, 7.0]"
14,1,2,2,14,12.98356,4/4,C-7,A1,40.0,1,,,,"[nan, nan]","[3.0, 4.0]"
16,1,2,4,16,13.608299,4/4,F7,A1,42.0,1,,,,"[nan, nan]","[5.0, 6.0]"
18,1,3,2,18,14.192585,4/4,Bb,A1,39.0,1,,,,"[nan, nan]","[2.0, 3.0]"
21,1,3,4,20,14.747211,4/4,G-7,A1,42.0,1,10.0,60.0,0.110295,"[2.0, 9.0, 0.0]","[5.0, 6.0, 6.0]"
