# Digital Musicology (DH-401)
## Assignment 3: Similarity
Group 6
- Mickaël Achkar
- Yichen Wang
- Yinghui Jiang

In [36]:
import ms3
import pandas as pd
import numpy as np
import math
import fractions
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 500)

## 0. Dataset preparation

In [39]:
%%time

# Suppress SettingWithCopy warnings
pd.options.mode.chained_assignment = None

# Import CSV
chorales_dataset = pd.read_csv('DM 2022 - Assignment 3 [Dataset].csv', converters = {'mn_onset': fractions.Fraction, 'act_dur': fractions.Fraction, 'duration': fractions.Fraction, 'nominal_duration': fractions.Fraction, 'scalar': fractions.Fraction}, index_col = 0, low_memory = False)

print(chorales_dataset[chorales_dataset['gracenote'].notna()])
# Add global onset column for easy time series manipulations
chorales_dataset['global_onset'] = chorales_dataset.groupby(['piece','staff'])['duration'].transform(pd.Series.cumsum)

# Ignore gracenotes (there is only one)
chorales_dataset = chorales_dataset[chorales_dataset['gracenote'].isna()].drop(columns = ['gracenote'])

# Remove Chorale043 because it is corrupted (it does not open in MuseScore and it has only 1 staff and no closing fermata)
chorales_dataset = chorales_dataset[chorales_dataset['piece'] != 'BachChorales/Chorale043']

# Get unique piece/staff pairs
piece_staffs = chorales_dataset[['piece', 'staff']].drop_duplicates().to_numpy()

# Consider only until the fermata in or after the 4th bar
fermata_data = []
for piece, staff in piece_staffs:
    for idx, row in chorales_dataset[(chorales_dataset['piece'] == piece) & (chorales_dataset['staff'] == staff)].iterrows():
        fermata_data.append(row)
        if row['fermata'] and row['mn'] >= 4:
            break   
fermata_df = pd.DataFrame(fermata_data)

# Sort by onset and staff for consistent alignment
fermata_df = fermata_df.sort_values(by=['piece', 'mn', 'mn_onset', 'staff']).reset_index(drop=True)

                       piece  mn mn_onset timesig act_dur  staff  voice  \
n                                                                         
197  BachChorales/Chorale209  16      1/4     3/4     3/4      1      1   

    duration nominal_duration scalar  tied  tpc  midi     gracenote  fermata  
n                                                                             
197        0              1/8      1   NaN   -3    75  acciaccatura    False  
CPU times: user 13.1 s, sys: 216 ms, total: 13.4 s
Wall time: 13.7 s


In [38]:
fermata_df

Unnamed: 0,piece,mn,mn_onset,timesig,act_dur,staff,voice,duration,nominal_duration,scalar,tied,tpc,midi,fermata,global_onset
0,BachChorales/Chorale001,0,1/2,3/4,1/4,1,1,1/4,1/4,1,,1,67,False,1/4
1,BachChorales/Chorale001,0,1/2,3/4,1/4,2,1,1/4,1/4,1,,2,62,False,1/4
2,BachChorales/Chorale001,0,1/2,3/4,1/4,3,1,1/4,1/4,1,,5,59,False,1/4
3,BachChorales/Chorale001,0,1/2,3/4,1/4,4,1,1/4,1/4,1,,1,43,False,1/4
4,BachChorales/Chorale001,1,0,3/4,3/4,1,1,1/2,1/2,1,,1,67,False,3/4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26816,BachChorales/Chorale371,4,3/8,4/4,3/4,3,1,1/8,1/8,1,,3,57,False,15/4
26817,BachChorales/Chorale371,4,1/2,4/4,3/4,1,1,1/4,1/4,1,,4,64,True,4
26818,BachChorales/Chorale371,4,1/2,4/4,3/4,2,1,1/4,1/4,1,,5,59,True,4
26819,BachChorales/Chorale371,4,1/2,4/4,3/4,3,1,1/4,1/4,1,,1,55,True,4


## I. Similarity parameters
### a. Melodic contour

In [34]:
#average of pitches over time
#discretize pitch into 100 steps
# fermata_df.groupby(['piece','mn','mn_onset',

In [35]:
fermata_df.head(200)

Unnamed: 0,piece,mn,mn_onset,timesig,act_dur,staff,voice,duration,nominal_duration,scalar,tied,tpc,midi,fermata,global_onset
0,BachChorales/Chorale001,0,1/2,3/4,1/4,1,1,1/4,1/4,1,,1,67,False,1/4
1,BachChorales/Chorale001,0,1/2,3/4,1/4,2,1,1/4,1/4,1,,2,62,False,1/4
2,BachChorales/Chorale001,0,1/2,3/4,1/4,3,1,1/4,1/4,1,,5,59,False,1/4
3,BachChorales/Chorale001,0,1/2,3/4,1/4,4,1,1/4,1/4,1,,1,43,False,1/4
4,BachChorales/Chorale001,1,0,3/4,3/4,1,1,1/2,1/2,1,,1,67,False,3/4
5,BachChorales/Chorale001,1,0,3/4,3/4,2,1,1/4,1/4,1,,2,62,False,1/2
6,BachChorales/Chorale001,1,0,3/4,3/4,3,1,1/4,1/4,1,,5,59,False,1/2
7,BachChorales/Chorale001,1,0,3/4,3/4,4,1,1/4,1/4,1,,1,55,False,1/2
8,BachChorales/Chorale001,1,1/4,3/4,3/4,2,1,1/4,1/4,1,,4,64,False,3/4
9,BachChorales/Chorale001,1,1/4,3/4,3/4,3,1,1/8,1/8,1,,0,60,False,5/8
