In [1]:
import sys, os

root_dir = os.path.join(os.getcwd(), '..')
sys.path.append(root_dir)

import pandas as pd
from tqdm.notebook import tqdm
from pretty_midi import PrettyMIDI
import time

from src.db import reference_sets as db
from src.utils import strings
from src.evaluation import mgeval



pygame 2.0.1 (SDL 2.0.14, Python 3.7.9)
Hello from the pygame community. https://www.pygame.org/contribute.html


## Prepare database
Create necessary tables if not existant

In [2]:
db.create_tables()
set_id = db.store_ref_set('theorytab', 17752, 4, 'https://github.com/wayne391/symbolic-musical-datasets, melodies crawled from https://www.hooktheory.com/theorytab')

## Batch evaluation of all reference midis + store similarity distances per pair in DB

In [3]:


set_name = 'theorytab' 

source_folder = '../data/reference_data/' + set_name
i = 0
t1 = time.time()
for root, dirs, files in tqdm(os.walk(source_folder)):
    for file in files:
        if "call" in file and "log" not in file: 
            pair_number = file[0:2]
            call_file = root + "/" + file
            response_file = root + "/" + pair_number + "_response.mid"
            pm = PrettyMIDI(call_file)
            call_analysis = mgeval.analyze_midi_file(call_file, 4)
            response_analysis = mgeval.analyze_midi_file(response_file, 4)
            similarity_distances = mgeval.calc_distances(call_analysis, response_analysis)

            song_name = strings.remove_prefix(root, source_folder + "\\")
            db.store_ref_data(set_id, song_name, pair_number, **similarity_distances)
  
t2 = time.time()
print("total time: " + str(t2-t1) + " sec.")

0it [00:00, ?it/s]

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


total time: 62228.700489759445 sec.


## Read reference data of data set 1 into a pandas dataframe

In [15]:
df = db.ref_data_table_to_dataframe(5)
df[0:5]

Unnamed: 0,id,set_id,song_name,pair_number,pitch_count,pitch_count_per_bar,pitch_class_histogram,pitch_class_histogram_per_bar,pitch_class_transition_matrix,avg_pitch_interval,pitch_range,note_count,note_count_per_bar,note_length_histogram,note_length_transition_matrix,avg_ioi,ioi_histogram,ioi_transition_matrix
0,18,5,a_a-day-to-remember_downfall-of-us-all_intro-a...,1,2.0,0.0,0.198554,0.0,5.830952,0.608466,2.0,6.0,0.0,0.139849,5.477226,0.082504,0.117692,3.741657
1,19,5,a_a-day-to-remember_downfall-of-us-all_pre-cho...,1,1.0,0.0,0.511819,0.0,4.242641,0.230769,5.0,3.0,0.0,0.317655,6.0,0.02111,0.098896,3.872983
2,20,5,a_a-day-to-remember_downfall-of-us-all_pre-cho...,2,0.0,0.0,0.460652,0.0,7.416198,0.092732,0.0,2.0,0.0,0.095979,4.123106,0.035862,0.184861,5.09902
3,21,5,a_a-g-cook_superstar_chorus,1,1.0,0.0,0.104842,0.0,2.44949,0.041739,0.0,2.0,0.0,0.09124,2.44949,0.026957,0.126917,3.162278
4,22,5,a_a-g-cook_superstar_pre-chorus-and-chorus,1,0.0,0.0,0.002156,0.0,1.414214,0.041481,0.0,2.0,0.0,0.093808,3.464102,0.012165,0.047266,2.44949


## Calculate Q1, Median, Q3, Min and Max

In [12]:
quantiles = df.quantile([0.25, 0.5, 0.75])
minimum = df.min()
minimum.name = 'min'
maximum = df.max()
maximum.name = 'max'
stats = quantiles.append([minimum, maximum])
stats = stats.drop(columns=['id', 'set_id', 'pair_number', 'song_name']) # drop unneccessary fields
# stats = stats.drop(columns=['pitch_count_per_bar_distance', 'note_count_per_bar_distance']) # drop the two per bar distances that could not be calculated
stats

Unnamed: 0,pitch_count,pitch_count_per_bar,pitch_class_histogram,pitch_class_histogram_per_bar,pitch_class_transition_matrix,avg_pitch_interval,pitch_range,note_count,note_count_per_bar,note_length_histogram,note_length_transition_matrix,avg_ioi,ioi_histogram,ioi_transition_matrix
0.25,,,,,,,,,,,,,,
0.5,,,,,,,,,,,,,,
0.75,,,,,,,,,,,,,,
min,,,,,,,,,,,,,,
max,,,,,,,,,,,,,,


 ## Store the stats to DB as reference for normalization

In [None]:
stats_json = stats.to_json(orient='index')
db.update_avg_distances_for_set(1, stats_json)

## Test fetch stats from DB

In [None]:
db.ref_set_stats_to_dataframe(1)

Unnamed: 0,pitch_count_distance,pitch_class_histogram_distance,pitch_class_histogram_per_bar_distance,pitch_class_transition_matrix_distance,avg_pitch_interval_distance,pitch_range_distance,note_count_distance,note_length_histogram_distance,note_length_transition_matrix_distance,avg_ioi_distance
0.25,0,0.138124,0.138124,2.645751,0.125,0,1,0.088554,2.44949,0.00712
0.5,1,0.29244,0.29244,4.242641,0.4,2,2,0.201864,4.358899,0.040212
0.75,2,0.4604,0.4604,6.244998,0.9,5,5,0.353553,6.480741,0.115665
min,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0
max,38,1.414214,1.414214,274.978181,17.063492,54,74,1.414214,96.145723,10.000758
