In [1]:
from datasets import load_dataset
import pandas as pd
import numpy as np
import os
import re

In [2]:
def get_track_id_and_value(root, file_name):
    # Get track_id
    splitted = re.split(r'[.\s]+', file_name)
    track_id = splitted[0]

    # Get information saved in file
    with open(os.path.join(root, file_name), 'r') as f:
        text = f.readline().strip()
    
    return track_id, text

In [3]:
base_key_dir = '../dataset/giantsteps-key-dataset/annotations/key'

track_id_list = []
base_key_list = []
for root, _, files in os.walk(base_key_dir):
    for file_name in files:
        file_suffix = '.LOFI.key'
        if file_name.endswith(file_suffix):
            track_id, key = get_track_id_and_value(root, file_name)
            track_id_list.append(track_id)
            base_key_list.append(key)
key_df = pd.DataFrame.from_dict({'track_id' : track_id_list,
                                  'base_key' : base_key_list})
key_df

Unnamed: 0,track_id,base_key
0,1092676,F minor
1,4328388,F minor
2,2680040,A minor
3,3935095,E minor
4,3667215,Ab minor
...,...,...
599,4615188,Ab minor
600,2031094,A minor
601,667508,D minor
602,511348,E minor


In [4]:
pseudo_key_dir = '../result/giantsteps/key'
assert os.path.exists(pseudo_key_dir), f'{pseudo_key_dir} does not exist'

key_df['pseudo_key'] = np.nan
pseudo_key_list = []
for root, _, files in os.walk(pseudo_key_dir):
    for file_name in files:
        if file_name.lower().endswith('.key.txt'):
            track_id, key = get_track_id_and_value(root, file_name)
            assert (key_df['track_id'] == track_id).sum() == 1
            key_df.loc[key_df['track_id'] == track_id, 'pseudo_key'] = key
key_df

Unnamed: 0,track_id,base_key,pseudo_key
0,1092676,F minor,major g#
1,4328388,F minor,minor f
2,2680040,A minor,minor a
3,3935095,E minor,minor a
4,3667215,Ab minor,minor g#
...,...,...,...
599,4615188,Ab minor,minor g#
600,2031094,A minor,minor a
601,667508,D minor,minor d
602,511348,E minor,minor e


In [5]:
assert sum(key_df['pseudo_key'].isnull()) == 0

In [6]:
# TODO(minigb): Create an additional file for constants
KEY_LABELS_FOR_BASE = ['A major', 'Bb major', 'B major', 'C major', 'Db major',
              'D major', 'Eb major', 'E major', 'F major', 'Gb major',
              'G major', 'Ab major', 'A minor', 'Bb minor', 'B minor',
              'C minor', 'Db minor', 'D minor', 'Eb minor', 'E minor',
              'F minor', 'Gb minor', 'G minor', 'Ab minor']

KEY_LABELS_FOR_PSEUDO = [
    'major a', 'major a#', 'major b', 'major c', 'major c#',
    'major d', 'major d#', 'major e', 'major f', 'major f#',
    'major g', 'major g#', 'minor a', 'minor a#', 'minor b',
    'minor c', 'minor c#', 'minor d', 'minor d#', 'minor e',
    'minor f', 'minor f#', 'minor g','minor g#']

In [7]:
# TODO(minigb): Modify the code so that there are no duplicated parts.
base_key_index_list = []
for key in key_df['base_key']:
    base_key_index_list.append(KEY_LABELS_FOR_BASE.index(key))
key_df['base_key_index'] = base_key_index_list 

pseudo_key_index_list = []
for key in key_df['pseudo_key']:
    pseudo_key_index_list.append(KEY_LABELS_FOR_PSEUDO.index(key))
key_df['pseudo_key_index'] = pseudo_key_index_list
key_df['is_same'] = (key_df['base_key_index'] == key_df['pseudo_key_index'])
key_df['correct_major_minor'] = ((np.array(base_key_index_list) < 12) == (np.array(pseudo_key_index_list) < 12))

sum(key_df['is_same']) / len(base_key_index_list), sum(key_df['correct_major_minor']) / len(base_key_index_list)

(0.6771523178807947, 0.8278145695364238)

In [8]:
key_df.to_csv(f'{pseudo_key_dir}/../giantsteps_key_comparision.csv')
key_df

Unnamed: 0,track_id,base_key,pseudo_key,base_key_index,pseudo_key_index,is_same,correct_major_minor
0,1092676,F minor,major g#,20,11,False,False
1,4328388,F minor,minor f,20,20,True,True
2,2680040,A minor,minor a,12,12,True,True
3,3935095,E minor,minor a,19,12,False,True
4,3667215,Ab minor,minor g#,23,23,True,True
...,...,...,...,...,...,...,...
599,4615188,Ab minor,minor g#,23,23,True,True
600,2031094,A minor,minor a,12,12,True,True
601,667508,D minor,minor d,17,17,True,True
602,511348,E minor,minor e,19,19,True,True
