In [1]:
%matplotlib inline
import os
import time
import numpy as np
import matplotlib.pyplot as plt
import IPython.display
import collections
from collections import Counter
import seaborn as sns

from pathlib import Path
import pretty_midi
import librosa.display
import glob
import datetime

from scipy.spatial import distance

from music21 import *

import tensorflow as tf

%load_ext autoreload
%autoreload 2

folder = 'train/'


In [2]:
## krumhansl schmuckler

# adapted from https://gist.github.com/bmcfee/1f66825cef2eb34c839b42dddbad49fd
import scipy.linalg
import scipy.stats

def ks_key(X, weights='kessler'):
    '''Estimate the key from a pitch class distribution
    
    Parameters
    ----------
    X : np.ndarray, shape=(12,)
        Pitch-class energy distribution.  Need not be normalized
        
    Returns
    -------
    major : np.ndarray, shape=(12,)
    minor : np.ndarray, shape=(12,)
    
        For each key (C:maj, ..., B:maj) and (C:min, ..., B:min),
        the correlation score for `X` against that key.
    '''
    X = scipy.stats.zscore(X)
    
    # Coefficients from Kumhansl and Schmuckler
    # as reported here: http://rnhart.net/articles/key-finding/
    
    # weights as https://musicinformationretrieval.wordpress.com/2017/02/24/state-of-the-art-2/
    if weights == 'kessler':
        major = np.asarray([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
        minor = np.asarray([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
    elif weights == 'schmuckler':
        major = np.asarray([6.35, 2.33, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
        minor = np.asarray([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
    elif weights == 'aarden':
        major = np.asarray([17.7661, 0.145624, 14.9265, 0.160186, 19.8049, 11.3587, 0.281248, 22.062, 0.145624, 8.15494, 0.232998, 4.95122])
        minor = np.asarray([18.2648, 0.737619, 14.0499, 16.8599, 0.702494, 14.4362, 0.702494, 18.6161, 4.56621, 1.93186, 7.37619, 1.75623])
    elif weights == 'sapp':
        major = np.asarray([2, 0, 1, 0, 1, 1, 0, 2, 0, 1, 0, 1])
        minor = np.asarray([2, 0, 1, 1, 0, 1, 0, 2, 1, 0, 0.5, 0.5])
    elif weights == 'bellman':
        major = np.asarray([16.8, 0.86, 12.95, 1.41, 13.49, 11.93, 1.25, 20.28, 1.8, 8.04, 0.62, 10.57])
        minor = np.asarray([18.16, 0.69, 12.99, 13.34, 1.07, 11.15, 1.38, 21.07, 7.49, 1.53, 0.92, 10.21])
    elif weights == 'temperley':
        major = np.asarray([0.748, 0.06, 0.488, 0.082, 0.67, 0.46, 0.096, 0.715, 0.104, 0.366, 0.057, 0.4])
        minor = np.asarray([0.712, 0.84, 0.474, 0.618, 0.049, 0.46, 0.105, 0.747, 0.404, 0.067, 0.133, 0.33])
        
        
    major = scipy.stats.zscore(major)
    minor = scipy.stats.zscore(minor)
    
    # Generate all rotations of major
    major = scipy.linalg.circulant(major)
    minor = scipy.linalg.circulant(minor)
    
    major = major.T.dot(X)
    minor = minor.T.dot(X)
    
    major_max = np.argmax(major)
    minor_max = np.argmax(minor)
    return major_max if np.max(major) >= np.max(minor) else minor_max+12

## Testing

In [3]:
def key_to_name(key_number):
    return pretty_midi.key_number_to_key_name(key_number)

def keynr(key):
    return pretty_midi.key_name_to_key_number(key)
       
def isFifth(realkey,predkey):
    return (realkey - predkey)%12 == 7 or (predkey - realkey)%12 == 7
        
def isRelative(realkey,predkey):
    if realkey > 11 and (realkey - 9) % 12 == predkey:
        return True
    elif predkey > 11 and (predkey - 9) % 12 == realkey:
        return True
    else:
        return False
        
def isParallel(realkey,predkey):
    return realkey%12 == predkey%12

In [7]:
# Download the Lakh MIDI Dataset at: https://colinraffel.com/projects/lmd/ 
#folder = "datasets/lmd_full/2/"
folder = "datasets/bachwtcall/" #source: https://ccrma.stanford.edu/~craig/keyscape/bach-wtc-new/ accessed: 18.07.19
t = time.time()

# find keys
files = [file for file in os.listdir(folder) if '.mid' in file.lower()]

files = files[:1000]
withoutC = False

verbose = False
keys=[]
for i, file in enumerate(files):
    try:
        pm = pretty_midi.PrettyMIDI(folder + file)
    except:
        continue

    pm = pretty_midi.PrettyMIDI(folder + file)
    if pm.key_signature_changes != []:
        try:
            kskey = ks_key(pm.get_pitch_class_histogram(), 'aarden')
            pmkeys = [x.key_number for x in pm.key_signature_changes]
            pmkey = pmkeys[0]
            if withoutC and pmkey == 0:
                continue
            keys.append([pmkey,kskey])
        except:
            pass
    

total = 0
correct,fifth,relative,parallel = 0,0,0,0
sacorrect,safifth,sarelative,saparallel = 0,0,0,0
for key in keys:
    realkey = key[0]
    kskey = key[1]
    total += 1
    # ks
    if realkey == kskey:
        correct += 1
    elif isFifth(realkey,kskey):
        fifth += 1
    elif isRelative(realkey,kskey):
        relative += 1
    elif isParallel(realkey,kskey):
        parallel += 1
        
print("KS on",folder,"\n")
print("time", np.round(time.time()-t,2), "s")
print("Nr of files:", total)
if verbose:
    print('ks')
    print(correct, relative, fifth, parallel, total)
    print((correct+relative)/total, correct/total, relative/total, fifth/total, parallel/total, (total-correct-fifth-relative-parallel)/total)
  
print()
print("KrumhanslSchmuckler with Aarden weights has", np.round(correct/total,2), "correct")
print("and predicted the relative minor for", np.round(relative/total,2), "which amounts to a score of", np.round((correct+relative)/total,2))



KS on datasets/bachwtcall/ 

time 8.25 s
Nr of files: 90

KrumhanslSchmuckler with Aarden weights has 0.83 correct
and predicted the relative minor for 0.14 which amounts to a score of 0.98


## Results

Results on 18.07.19 on Bach Fugues from https://ccrma.stanford.edu/~craig/keyscape/bach-wtc-new/

time 11.75 s

Nr of files: 90

KrumhanslSchmuckler with Aarden weights has 0.83 correct
and predicted the relative minor for 0.14 which amounts to a score of  0.98

SpiralArray has 0.28 correct
and predicted the relative minor for 0.13 which amounts to a score of  0.41

Results on lmd_full/2 

time 602.91 s

Nr of files: 169

KrumhanslSchmuckler with Aarden weights has 0.62 correct
and predicted the relative minor for 0.18 which amounts to a score of  0.8

SpiralArray has 0.37 correct
and predicted the relative minor for 0.04 which amounts to a score of  0.41