##### Imports:

In [1]:
from utils import separate_for_training, preprocess, calculate_emission_from_chroma, calculate_mu_from_chroma, calculate_transition_probabilites, format_indiv_chroma
import pickle
from chroma import get_chromagram
import pandas as pd
from tqdm import tqdm
import numpy as np

##### Steps:

1. Training / Testing Data Split
2. Create Chromagram from Training Data
3. Create HMM Initialization Components
    - Initial State Probabilities
    - Transition Probability Matrix
    - Mu Value
    - Emission Matrix
4. Create HMM Object
5. Fit / Train HMM

##### Training / Test Data Split:

In [2]:
# Load data and split into training and test
piece_name_dict = preprocess('dataset.pkl')

training_piece_names, _, test_piece_names = separate_for_training(piece_name_dict, 0.8, 0.)

with open(r"dataset.pkl", 'rb') as data:
    midi_data = pickle.load(data)

100%|██████████| 5762/5762 [00:08<00:00, 702.08it/s] 


##### Create Chromagram from Training Data:

In [3]:
song_chromagrams = []
for song_name in tqdm(list(training_piece_names)):
    indiv_chroma = get_chromagram(song_name, midi_data)
    formatted = format_indiv_chroma(indiv_chroma)
    song_chromagrams.append(indiv_chroma)

chromagram = pd.concat(song_chromagrams)
chromagram.head(6)

  0%|          | 0/4609 [00:00<?, ?it/s]

100%|██████████| 4609/4609 [01:23<00:00, 54.88it/s] 


Unnamed: 0,C,C#,D,D#,E,F,F#,G,G#,A,A#,B,Chord Actual
0,0,0,0,139,0,0,0,0,121,0,134,139,G#m
1,0,0,0,139,0,0,0,0,121,0,134,139,G#m
2,0,0,0,139,0,0,0,0,121,0,134,139,G#m
3,0,0,0,139,0,0,0,0,121,0,134,139,G#m
4,0,0,0,139,0,0,0,0,121,0,134,139,G#m
5,0,0,0,139,0,0,0,0,121,0,134,139,G#m


##### Create HMM Components:

###### Initial State Probabilities:

In [4]:
def calculate_init_prob(song_chromagrams):
    first_chords = []
    for song_chroma in tqdm(song_chromagrams):
        first_chord = song_chroma['Chord Actual'].values[0]
        first_chords.append(first_chord)
    
    first_chord_counts = np.unique(first_chords, return_counts=True)

    return pd.Series(first_chord_counts[1]/first_chord_counts[1].sum(), index=first_chord_counts[0])

In [5]:
initial_state_probabilties = calculate_init_prob(song_chromagrams)
initial_state_probabilties

 29%|██▉       | 1349/4609 [00:00<00:00, 13443.12it/s]

100%|██████████| 4609/4609 [00:00<00:00, 9431.56it/s] 


A        0.020612
A#       0.000434
B        0.119983
Bm       0.156433
C        0.000217
C#dim    0.000217
C#m      0.056628
D        0.126492
D#m      0.020395
E        0.118030
Em       0.062920
F#       0.019527
F#m      0.021046
G        0.114992
G#m      0.162074
dtype: float64

###### Transition Matrix:

In [6]:
transition_prob_matrix = calculate_transition_probabilites(chromagram)
transition_prob_matrix

following_chords,A,A#,A#dim,B,Bm,C,C#dim,C#m,D,D#,...,Em,F,F#,F#m,G,G#,G#m,dim,<E>,<S>
initial_chords,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A,0.860427,7.2e-05,7.2e-05,0.007062,0.041505,0.000144,0.003243,0.00317,0.020464,0.0,...,0.011313,0.0,0.001009,0.011601,0.020392,7.2e-05,0.009223,7.2e-05,0.0,0
A#,0.0,0.875,0.0,0.0,0.0,0.019231,0.0,0.0,0.009615,0.076923,...,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
A#dim,0.002577,0.0,0.873711,0.020619,0.018041,0.0,0.0,0.007732,0.018041,0.0,...,0.007732,0.0,0.0,0.002577,0.005155,0.0,0.018041,0.0,0.0,0
B,0.001118,0.0,0.000186,0.861802,0.004566,9.3e-05,0.0,0.014537,0.009132,0.0,...,0.001584,0.0,0.04883,0.000186,0.003914,0.0,0.027584,9.3e-05,0.0,0
Bm,0.04332,0.0,0.0,0.001783,0.863303,0.000132,0.000198,0.00066,0.023311,0.0,...,0.011623,0.0,0.000726,0.022188,0.027339,0.0,0.00317,0.0,0.0,0
C,0.005714,0.0,0.0,0.001905,0.005714,0.860952,0.001905,0.0,0.001905,0.0,...,0.0,0.001905,0.0,0.001905,0.114286,0.0,0.001905,0.0,0.0,0
C#dim,0.0,0.0,0.0,0.013825,0.006912,0.0,0.880184,0.002304,0.011521,0.0,...,0.006912,0.0,0.002304,0.002304,0.020737,0.0,0.025346,0.0,0.0,0
C#m,0.000602,0.0,0.0,0.010081,0.003009,0.0,0.0,0.859314,0.002708,0.0,...,0.000752,0.0,0.013843,0.000752,0.002558,0.0,0.047999,0.00015,0.0,0
D,0.050984,8.7e-05,0.0,0.003498,0.02746,0.00035,0.000175,0.002711,0.86244,0.0,...,0.013992,0.005859,0.000525,0.004547,0.019152,0.0,0.004722,0.0,0.0,0
D#,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09375,0.875,...,0.015625,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0


###### Mu Value:

In [7]:
mu = calculate_mu_from_chroma(chromagram)
mu

C     11.534603
C#    36.820623
D     25.441283
D#    33.602348
E     29.378984
F     16.100921
F#    44.574756
G     17.020084
G#    40.027229
A     22.124592
A#    24.851141
B     49.201473
dtype: float64

###### Emission Matrix:

In [8]:
emission = calculate_emission_from_chroma(chromagram)

In [9]:
emission

array([[[ 3.16443410e+02, -1.92936090e+02,  3.71339534e+01, ...,
          2.01431127e+02, -7.67794112e+01, -5.70079960e+00],
        [-1.92936090e+02,  4.51976592e+03, -3.95472248e+02, ...,
         -1.07613823e+03,  9.24392870e+02,  3.39558022e+01],
        [ 3.71339534e+01, -3.95472248e+02,  8.96855416e+02, ...,
          6.61605223e+02, -3.34263738e+02, -7.12425513e+01],
        ...,
        [ 2.01431127e+02, -1.07613823e+03,  6.61605223e+02, ...,
          7.46723773e+03, -2.50829293e+03,  3.33954561e+02],
        [-7.67794112e+01,  9.24392870e+02, -3.34263738e+02, ...,
         -2.50829293e+03,  3.45066197e+03, -3.84725850e+02],
        [-5.70079960e+00,  3.39558022e+01, -7.12425513e+01, ...,
          3.33954561e+02, -3.84725850e+02,  1.67637875e+03]],

       [[ 8.89033607e+02,  0.00000000e+00, -4.98945482e+02, ...,
         -5.21870052e+02,  2.81266617e+02, -2.79456311e+02],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e