In [1]:
### IMPORTS ###
import pandas as pd
import os
import yaml
import numpy as np
import json
from itertools import combinations
import pretty_midi
import librosa
import matplotlib.pyplot as plt
import sys

In [3]:
# Load in original JSON file for chorale data
json_path = './data/jsb-chorales-8th.json'
with open(json_path, "r") as f:
     chorale_data = json.loads(f.read())
print(len(chorale_data['train']),len(chorale_data['test']),len(chorale_data['valid']))

# combine train and validation sets since we don't need validation set
chorale_data['train'] = chorale_data['train'] + chorale_data['valid']
print(len(chorale_data['train']),len(chorale_data['test']))


229 77 76
305 77


In [4]:
### SOME CHORD CONSTANTS
notes_in_keys = {
    #        C D E F G A B
    "C/a":    [0,2,4,5,7,9,11], # C!
    "G/e":    [0,2,4,6,7,9,11], #  F#
    "D/b":    [1,2,4,6,7,9,11], # +C#
    "A/f#":    [1,2,4,6,8,9,11], # +G#
    "E/c#":    [1,3,4,6,8,9,11], # +D#
    "B-Cb/g#": [1,3,4,6,8,10,11], # +A#
    "Gb-F#/eb":[1,3,5,6,8,10,11], # +E#
    "Db-C#/bb":[0,1,3,5,6,8,10], # +B#
    "Ab/f":   [0,1,3,5,7,8,10], # +Db
    "Eb/c":   [0,2,3,5,7,8,10], # +Ab
    "Bb/g":   [0,2,3,5,7,9,10], # +Eb 
    "F/d":    [0,2,4,5,7,9,10]  # +Bb
}

maj_min_tonics = {
    #        C D E F G A B
    "C/a":  {'maj': [0,4,7], # CEG
             'min': [0,4,9]}, # ACE
    "G/e":  {'maj': [2,7,11], # GBD
             'min': [4,7,11]},  # EGB
    "D/b":  {'maj': [2,6,9], # DF#A 
             'min': [2,6,11]},  # +C#
    "A/f#":   {'maj': [1,4,9], 
               'min': [1,6,9]}, # +G#
    "E/c#":    {'maj': [4,8,11], # EG#B 
               'min': [1,4,8]}, # +D#
    "B-Cb/g#": {'maj': [3,6,11], 
               'min': [3,8,11]}, # +A#
    "Gb-F#/eb":{'maj': [1,6,10], 
               'min': [3,6,10]}, # +E#
    "Db-C#/bb":{'maj': [1,5,8], # C# F G#
               'min': [1,5,10]}, # +B#
    "Ab/f":   {'maj': [0,3,8], # G# C D#
               'min': [0,5,8]} , # +Db
    "Eb/c":   {'maj': [3,7,10], # D#GA#
               'min': [0,3,7]} , # +Ab
    "Bb/g":   {'maj': [2,5,10], # D F Bb 
               'min': [2,7,10]},# +Eb 
    "F/d":    {'maj': [0,5,9], # FAC 
               'min': [2,5,9]}   # +Bb
}

notes_in_keys_minor = {
    #        C D E F G A B
    "a":    [0,2,4,5,7,8,9,11], # C!, raised 7th is G# (8)
    "e":    [0,2,3,4,6,7,9,11], #  F# raised 7th is D# (3)
    "b":    [1,2,4,6,7,9,10,11], # +C# raised 7th: A# 10
    "f#":   [1,2,4,5,6,8,9,11], # +G# raised 7th: F 5
    "c#":   [0,1,3,4,6,8,9,11], # +D#
    "g#":   [1,3,4,5,6,8,10,11], # +A#
    "eb":   [1,2,3,5,6,8,10,11], # +E#
    "bb":   [0,1,3,5,6,8,9,10], # +B#
    "f":    [0,1,3,4,5,7,8,10], # +Db
    "c":    [0,2,3,5,7,8,10,11], # +Ab
    "g":    [0,2,3,5,6,7,9,10], # +Eb 
    "d":    [0,1,2,4,5,7,9,10]  # +Bb
}

In [5]:
### CODE FOR ESTIMATING MODES AND KEYS
def mode_estimate(chorale, key_estimate):
    maj_tonic = set(maj_min_tonics[key_estimate]['maj'])
    min_tonic = set(maj_min_tonics[key_estimate]['min'])
    maj_tonic_count=0
    min_tonic_count=0
    for chord in chorale: 
        notes= set()
        for pitch in chord: 
            note = pitch%12
            if note not in notes: 
                notes.add(note)
        if notes == maj_tonic:
            maj_tonic_count += 1
        if notes == min_tonic:
            min_tonic_count += 1
    if maj_tonic_count > min_tonic_count: 
        return "maj"
    else: 
        return "min"
        
def key_estimate(chorale):
    # Chorale is a list of lists of 4 pitches representing chords
    note_counts = {0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0}
    total_notes = len(chorale) * 4
    for chord in chorale: # count # of occurences of every pitch in the chorale, then sort most to least frequent
        for note in chord: 
            cur_note = note%12
            note_counts[cur_note] +=1
    sorted_notecounts = {k: v for k, v in sorted(note_counts.items(), key=lambda item: item[1], reverse=True)}

    #print(sorted_notecounts)
    # KEY_SUMS
    max_proportion = 0
    key_estimate = None
    for key in notes_in_keys.keys():
        key_sum = 0 
        for note in notes_in_keys[key]:
            key_sum += sorted_notecounts[note]
        key_proportion = key_sum/total_notes
        #print(key, key_sum/total_notes)
        if key_proportion > max_proportion: 
            max_proportion = key_proportion
            key_estimate = key
    #print(max_proportion, key_estimate)
    return key_estimate

In [15]:
train_keys = {}
for i, chorale in enumerate(chorale_data['train']):
    key = key_estimate(chorale)
    train_keys[int(i)]={}
    train_keys[int(i)]["key"] = key
    train_keys[int(i)]["mode"] = mode_estimate(chorale, key)

test_keys = {}
for i, chorale in enumerate(chorale_data['test']):
    key = key_estimate(chorale)
    test_keys[i]={}
    test_keys[i]["key"] = key
    test_keys[i]["mode"] = mode_estimate(chorale, key)

print(train_keys)

keys_and_modes = {
    "train": train_keys,
    "test": test_keys,
}

# save dictionary with key and mode estimates for each chorale
with open('./data/jsb_chorales_keys_modes.yaml', 'w') as outfile:
    yaml.dump(keys_and_modes, outfile, default_flow_style=False)

{0: {'key': 'Bb/g', 'mode': 'maj'}, 1: {'key': 'A/f#', 'mode': 'maj'}, 2: {'key': 'G/e', 'mode': 'maj'}, 3: {'key': 'C/a', 'mode': 'maj'}, 4: {'key': 'C/a', 'mode': 'min'}, 5: {'key': 'D/b', 'mode': 'min'}, 6: {'key': 'Eb/c', 'mode': 'maj'}, 7: {'key': 'F/d', 'mode': 'min'}, 8: {'key': 'E/c#', 'mode': 'maj'}, 9: {'key': 'Bb/g', 'mode': 'min'}, 10: {'key': 'D/b', 'mode': 'maj'}, 11: {'key': 'Bb/g', 'mode': 'maj'}, 12: {'key': 'D/b', 'mode': 'min'}, 13: {'key': 'G/e', 'mode': 'maj'}, 14: {'key': 'C/a', 'mode': 'maj'}, 15: {'key': 'F/d', 'mode': 'maj'}, 16: {'key': 'F/d', 'mode': 'maj'}, 17: {'key': 'Eb/c', 'mode': 'maj'}, 18: {'key': 'Bb/g', 'mode': 'min'}, 19: {'key': 'Bb/g', 'mode': 'min'}, 20: {'key': 'A/f#', 'mode': 'maj'}, 21: {'key': 'G/e', 'mode': 'maj'}, 22: {'key': 'Eb/c', 'mode': 'maj'}, 23: {'key': 'D/b', 'mode': 'min'}, 24: {'key': 'C/a', 'mode': 'min'}, 25: {'key': 'D/b', 'mode': 'min'}, 26: {'key': 'C/a', 'mode': 'min'}, 27: {'key': 'A/f#', 'mode': 'maj'}, 28: {'key': 'Bb/g

In [20]:
# Now we want to shift all chorales into the key of C.
key_shift_amount = { # SUBTRACT!!!! move the least amount possible, so sometimes add, sometimes subtract
    "C/a":      0, # C!
    "G/e":      5, #  F# 
    "D/b":      -2, # +C#
    "A/f#":     3, # +G#
    "E/c#":     -4, # +D#
    "B-Cb/g#":  1, # +A#
    "Gb-F#/eb": -6, # +E#
    "Db-C#/bb": -1,#-1, # +B# 
    "Ab/f":     4, # +Db
    "Eb/c":     -3, # +Ab
    "Bb/g":     2, # +Eb 
    "F/d":      -5   # +Bb
}

def percentage_in_c(chorale):
    in_c = 0
    notes_in_c = [0,2,4,5,7,9,11]
    for chord in chorale:
        modded_chord = [n%12 for n in chord] 
        if set(modded_chord).issubset(set(notes_in_c)):
            in_c += 1
    return in_c / len(chorale)

def shift_to_c(chorale_data, ttv):
    shifted_chorales = []
    for i, chorale in enumerate(chorale_data[ttv]):
        key = keys_and_modes[ttv][i]['key']
        mode = keys_and_modes[ttv][i]['mode']
        shift_amount = key_shift_amount[key]
        new_chorale = [] 
        for chord in chorale: 
            new_chord = [] 
            for note in chord: 
                new_chord.append(int(note + shift_amount))
            new_chord.sort() # want it to go BTAS
            new_chorale.append(new_chord)
        shifted_chorales.append(new_chorale)
    return shifted_chorales

def determine_ranges(chorale_data): 
    # determine new part ranges
    bass_mm = [128,0] # min, then max
    ten_mm = [128,0]
    alt_mm = [128,0]
    sop_mm = [128,0]
    for i, chorale in enumerate(chorale_data): # enumerate(chorales_in_c['train']):
        for chord in chorale:
            if len(chord) == 4:
                if chord[0] > bass_mm[1]:
                    bass_mm[1] = chord[0]
                if chord[0] < bass_mm[0]:
                    bass_mm[0] = chord[0]
                if chord[1] > ten_mm[1]:
                    ten_mm[1] = chord[1]
                if chord[1] < ten_mm[0]:
                    ten_mm[0] = chord[1]
                if chord[2] > alt_mm[1]:
                    alt_mm[1] = chord[2]
                if chord[2] < alt_mm[0]:
                    alt_mm[0] = chord[2]
                if chord[3] > sop_mm[1]:
                    sop_mm[1] = chord[3]
                if chord[3] < sop_mm[0]:
                    sop_mm[0] = chord[3] 
    print(sop_mm, alt_mm, ten_mm, bass_mm)
    return sop_mm, alt_mm, ten_mm, bass_mm

train_in_c = shift_to_c(chorale_data, 'train')
test_in_c = shift_to_c(chorale_data, 'test')

determine_ranges(train_in_c)
determine_ranges(test_in_c)


[55, 84] [50, 79] [43, 74] [31, 67]
[55, 84] [50, 77] [45, 71] [31, 65]


([55, 84], [50, 77], [45, 71], [31, 65])

In [21]:
chorales_in_c = {
    "train": train_in_c,
    "test": test_in_c
}
# save dictionary with key and mode estimates for each chorale
with open('./data/jsb_chorales_in_c.yaml', 'w') as outfile: # STILL HAS MAJOR AND MINOR
    yaml.dump(chorales_in_c, outfile, default_flow_style=False)

In [35]:
def get_voicings_durations(chorale_data, ttv): 
    all_voicings = []
    all_durations = []
    maj_count = 0
    for i, chorale in enumerate(chorale_data[ttv]):
        if keys_and_modes[ttv][i]['mode'] == 'maj': # ONLY MAJOR!!
            maj_count += 1
            cur_voicing = [] 
            cur_durations = []
            cur_dur = 0
            cur_voc = None
            for j,chord in enumerate(chorale):
                if chord is not None and len(chord) == 4:
                    if j == 0:
                        cur_voc = chord
                        cur_dur = 1
                    else: 
                        if j == len(chorale) -1: 
                            cur_voicing.append(cur_voc) # append old variables
                            if chord != cur_voc:
                                cur_durations.append(cur_dur)
                                cur_voicing.append(chord)
                                cur_durations.append(1)
                            else:
                                cur_durations.append(cur_dur + 1)
                            break
                        if chord != cur_voc:
                            cur_voicing.append(cur_voc) # append old variables
                            cur_durations.append(cur_dur)
                            cur_voc = chord  # reset current variables
                            cur_dur = 1
                        else: # no chord change
                            cur_dur += 1
            assert len(cur_voicing) == len(cur_durations), "LIST LENGTH MISMATCH"
            all_voicings.append(cur_voicing) 
            all_durations.append(cur_durations) 

    return all_voicings, all_durations      


print(len(chorales_in_c['train']), len(chorales_in_c['test']))
training_voc, training_dur = get_voicings_durations(chorales_in_c, 'train')
testing_voc, testing_dur = get_voicings_durations(chorales_in_c, 'test')
print(len(training_voc), len(testing_voc)) # 190 major training examples, 50 major testing examples

voicing_duration_dict = {
    'train': {
        'voicings': training_voc,
        'durations': training_dur
    },
    'test': {
        'voicings': testing_voc,
        'durations': testing_dur
    }
}

with open('./data/jsb_maj_voicings_durations.yaml', 'w') as outfile:
    yaml.dump(voicing_duration_dict, outfile, default_flow_style=False) # STILL ON A PER-CHORALE BASIS! ONLY MAJOR

305 77
190 50


In [46]:
### TIME TO CHUNK!
# NOW GET DATA CHUNKS:
notes_in_chords = {
    1: [0,4,7], # C
    2: [2,5,9], # d
    3: [4,7,11], # e
    4: [5,9,0], # F
    5: [7,11,2], # G
    6: [9,0,4], # a
    7: [11,2,5], # b dim
    8: [2,5,9,0], # 2 7th
    9: [5,9,0,4], # 4 7th 
    10: [7,11,2,5], # 5 7th 
    11: [11,2,5,9] # 7 7th # might add additional chords!!
}

def get_chord_idx(chord):
    notes = set([n%12 for n in chord])
    chord_idx = -1
    for idx in notes_in_chords.keys():
        if notes == set(notes_in_chords[idx]):
            return idx
    return chord_idx

def chunk(voicings, durations):
    num_chunks = 0
    voc_chunks = [] 
    chord_chunks = [] 
    melody_chunks = [] 
    dur_chunks = []
    for voicing_list, dur_list in zip(voicings, durations): 
        cur_chunk = [] 
        cur_chord_chunk = [] 
        cur_mel_chunk = [] 
        cur_dur_chunk = []
        invalid_chords = 0
        for chord, dur in zip(voicing_list, dur_list):
            if chord is not None and len(chord) == 4:
                idx = get_chord_idx(chord)
                if idx == -1: # if we don't recognize the chord, end chunk and start a new one
                    invalid_chords += 1
                    if len(cur_chunk) > 2:
                        num_chunks += 1
                        voc_chunks.append(cur_chunk) # don't need to append -1s because wont be running alg on them. 
                        cur_chord_chunk.append(-1)
                        chord_chunks.append(cur_chord_chunk)
                        cur_mel_chunk.append(-1)
                        melody_chunks.append(cur_mel_chunk)
                        dur_chunks.append(cur_dur_chunk)
                    cur_chunk = []
                    cur_chord_chunk = [] 
                    cur_mel_chunk = []
                    cur_dur_chunk = []
                else: 
                    cur_chunk.append(chord)
                    cur_chord_chunk.append(idx)
                    cur_mel_chunk.append(chord[-1])
                    cur_dur_chunk.append(dur)
        if len(cur_chunk) >2:
            num_chunks += 1
            voc_chunks.append(cur_chunk)
            cur_chord_chunk.append(-1)
            chord_chunks.append(cur_chord_chunk)
            cur_mel_chunk.append(-1)
            melody_chunks.append(cur_mel_chunk)
            dur_chunks.append(cur_dur_chunk)
    print("TOTAL CHUNKS:", num_chunks)
    return voc_chunks, chord_chunks, melody_chunks, dur_chunks

training_voc_chunks, training_chord_chunks, training_mel_chunks, training_dur_chunks = chunk(training_voc, training_dur)
testing_voc_chunks, testing_chord_chunks, testing_mel_chunks, testing_dur_chunks = chunk(testing_voc, testing_dur)
### MELODIES HAVE TO BE LISTS OF LISTS 
testing_mel_list_chunks = [[[note] for note in mel] for mel in testing_mel_chunks]
training_mel_list_chunks = [[[note] for note in mel] for mel in training_mel_chunks]

print(len(training_voc_chunks),len(training_chord_chunks),len(training_mel_chunks), len(training_dur_chunks))
print(len(testing_voc_chunks), len(testing_chord_chunks), len(testing_mel_chunks), len(testing_dur_chunks))
print(testing_chord_chunks[0])
print(testing_mel_list_chunks[0])
print(testing_dur_chunks[0])

TOTAL CHUNKS: 1239
TOTAL CHUNKS: 375
1239 1239 1239 1239
375 375 375 375
[1, 5, 10, 1, 5, 6, -1]
[[60], [67], [65], [64], [62], [60], [-1]]
[2, 1, 1, 1, 1, 1]


In [48]:
### SAVE MY DATASETS!
orig_voicings = {
    'train': training_voc_chunks,
    'test': testing_voc_chunks
}

mel_data = {
    'train': training_mel_list_chunks,
    'test': testing_mel_list_chunks
}

chord_data = {
    'train': training_chord_chunks,
    'test': testing_chord_chunks
}

dur_data = {
    'train': training_dur_chunks,
    'test': testing_dur_chunks
}

with open('./data/jsb_maj_orig_voicings.yaml', 'w') as outfile:
    yaml.dump(orig_voicings, outfile, default_flow_style=False)

with open('./data/jsb_maj_chord_progs.yaml', 'w') as outfile:
    yaml.dump(chord_data, outfile, default_flow_style=False)
              
with open('./data/jsb_maj_melodies.yaml', 'w') as outfile:
    yaml.dump(mel_data, outfile, default_flow_style=False)

with open('./data/jsb_maj_durations.yaml', 'w') as outfile:
    yaml.dump(dur_data, outfile, default_flow_style=False)

In [3]:
start = 0.3
end = 0.1
for i in range(50):
    print((start-end)*((50 - i)/50) + end, )

for i in range(50):
    start = start * 0.9 
    print(start)


0.3
0.296
0.292
0.288
0.28400000000000003
0.28
0.276
0.272
0.268
0.264
0.26
0.256
0.252
0.248
0.244
0.24
0.23600000000000002
0.232
0.228
0.22399999999999998
0.21999999999999997
0.21599999999999997
0.21200000000000002
0.20800000000000002
0.20400000000000001
0.2
0.196
0.192
0.188
0.184
0.18
0.176
0.172
0.168
0.164
0.16
0.156
0.152
0.148
0.14400000000000002
0.14
0.136
0.132
0.128
0.124
0.12000000000000001
0.116
0.112
0.10800000000000001
0.10400000000000001
0.27
0.24300000000000002
0.21870000000000003
0.19683000000000003
0.17714700000000003
0.15943230000000003
0.14348907000000002
0.12914016300000003
0.11622614670000003
0.10460353203000003
0.09414317882700003
0.08472886094430003
0.07625597484987003
0.06863037736488302
0.061767339628394716
0.055590605665555244
0.05003154509899972
0.04502839058909975
0.040525551530189774
0.036472996377170795
0.032825696739453715
0.029543127065508344
0.026588814358957512
0.02392993292306176
0.021536939630755585
0.019383245667680026
0.017444921100912024
0.01570