##### Imports:

In [23]:
from utils import separate_for_training, preprocess, calculate_emission_from_chroma, calculate_mu_from_chroma, calculate_transition_probabilites, format_indiv_chroma, predict, get_unique_predicted
import pickle
from chroma import get_chromagram
import pandas as pd
from tqdm import tqdm
import numpy as np
from hmmlearn import hmm

##### Steps:

1. Training / Testing Data Split
2. Create Chromagram from Training Data
3. Create HMM Initialization Components
    - Initial State Probabilities
    - Transition Probability Matrix
    - Mu Value
    - Emission Matrix
4. Create HMM Object
5. Fit / Train HMM

##### Training / Test Data Split:

In [13]:
# Load data and split into training and test
piece_name_dict = preprocess('dataset.pkl')

training_piece_names, _, test_piece_names = separate_for_training(piece_name_dict, 0.8, 0.)

with open(r"dataset.pkl", 'rb') as data:
    midi_data = pickle.load(data)

100%|██████████| 5762/5762 [00:08<00:00, 718.09it/s] 


##### Create Chromagram from Training Data:

In [14]:
song_chromagrams = []
for song_name in tqdm(list(training_piece_names)):
    indiv_chroma = get_chromagram(song_name, midi_data)
    formatted = format_indiv_chroma(indiv_chroma)
    song_chromagrams.append(indiv_chroma)

chromagram = pd.concat(song_chromagrams)
chromagram.head(200)

100%|██████████| 4609/4609 [01:11<00:00, 64.59it/s]


Unnamed: 0,C,C#,D,D#,E,F,F#,G,G#,A,A#,B,Chord Actual
0,0,0,0,48,0,0,48,0,0,0,0,98,B
1,0,0,0,48,0,0,48,0,0,0,0,98,B
2,0,0,0,48,0,0,48,0,0,0,0,45,B
3,0,0,0,48,0,0,48,0,0,0,0,45,B
4,0,0,0,0,0,0,60,0,0,0,0,0,B
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,0,0,0,0,153,0,0,0,145,0,0,69,G
6,0,0,0,0,0,0,0,0,0,0,0,85,G
8,0,0,0,62,0,0,88,0,70,0,0,271,Bm
9,0,0,0,62,0,0,88,0,70,0,0,271,Bm


##### Create HMM Components:

###### Initial State Probabilities:

In [15]:
def calculate_init_prob(song_chromagrams):
    first_chords = []
    for song_chroma in tqdm(song_chromagrams):
        first_chord = song_chroma['Chord Actual'].values[0]
        first_chords.append(first_chord)
    
    first_chord_counts = np.unique(first_chords, return_counts=True)

    return pd.Series(first_chord_counts[1]/first_chord_counts[1].sum(), index=first_chord_counts[0])

In [16]:
initial_state_probabilties = calculate_init_prob(song_chromagrams)
initial_state_probabilties

100%|██████████| 4609/4609 [00:00<00:00, 10067.75it/s]


A        0.019744
A#       0.000217
A#dim    0.000217
B        0.119115
Bm       0.157735
C#dim    0.000217
C#m      0.061619
D        0.116728
D#m      0.019093
E        0.122152
Em       0.059666
F#       0.019527
F#m      0.020829
G        0.117813
G#m      0.165329
dtype: float64

###### Transition Matrix:

In [17]:
transition_prob_matrix = calculate_transition_probabilites(chromagram)
transition_prob_matrix

 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 1.50852316e-04 0.00000000e+00 1.18773946e-01
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 5.41931988e-04 0.00000000e+00 0.00000000e+00
 0.00000000e+00 6.45286184e-05 0.00000000e+00 7.26005518e-05
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 6.64805212e-05 0.00000000e+00]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  all_chords_matrix.update(transition_prob_matrix)
 0.00205577 0.         0.         0.         0.01036269 0.
 0.01579259 0.00165938 0.         0.         0.         0.
 0.01174556 0.00219335 0.         0.00216773 0.         0.
 0.         0.01200232 0.         0.00297662 0.         0.
 0.         0.         0.         0.01502795 0.00099721 0.        ]' has dtype incompatible with int64, please explicitly cast to a comp

Unnamed: 0,C,Cm,Cdim,C#,C#m,C#dim,D,Dm,Ddim,D#,...,G#dim,A,Am,Adim,A#,A#m,A#dim,B,Bm,Bdim
C,0.86372,0,0,0,0.0,0.0,0.0,0,0,0.0,...,0,0.0,0,0,0.0,0,0.0,0.007366,0.001842,0
Cm,0.0,0,0,0,0.0,0.0,0.0,0,0,0.0,...,0,0.0,0,0,0.0,0,0.0,0.0,0.0,0
Cdim,0.0,0,0,0,0.0,0.0,0.0,0,0,0.0,...,0,0.0,0,0,0.0,0,0.0,0.0,0.0,0
C#,0.0,0,0,0,0.0,0.0,0.0,0,0,0.0,...,0,0.0,0,0,0.0,0,0.0,0.0,0.0,0
C#m,0.0,0,0,0,0.860009,0.0,0.002489,0,0,0.0,...,0,0.001025,0,0,0.0,0,0.0,0.010543,0.002929,0
C#dim,0.0,0,0,0,0.007752,0.881137,0.005168,0,0,0.0,...,0,0.0,0,0,0.0,0,0.0,0.01292,0.033592,0
D,0.0,0,0,0,0.002056,0.000179,0.863336,0,0,0.0,...,0,0.050501,0,0,0.0,0,0.0,0.003665,0.027798,0
Dm,0.0,0,0,0,0.0,0.0,0.0,0,0,0.0,...,0,0.0,0,0,0.0,0,0.0,0.0,0.0,0
Ddim,0.0,0,0,0,0.0,0.0,0.0,0,0,0.0,...,0,0.0,0,0,0.0,0,0.0,0.0,0.0,0
D#,0.0,0,0,0,0.0,0.0,0.0625,0,0,0.875,...,0,0.020833,0,0,0.0,0,0.0,0.0,0.0,0


###### Mu Value:

In [18]:
mu = calculate_mu_from_chroma(chromagram)
mu

C     11.904212
C#    37.005563
D     24.903218
D#    34.195783
E     29.047999
F     16.589156
F#    44.361431
G     16.893884
G#    40.504631
A     21.644234
A#    24.957070
B     49.421869
dtype: float64

###### Emission Matrix:

In [19]:
emission = calculate_emission_from_chroma(chromagram)

In [20]:
emission

array([[[ 3.13367466e+02, -1.88800695e+02,  2.64736262e+01, ...,
          2.03685460e+02, -7.46557983e+01, -5.76122604e+00],
        [-1.88800695e+02,  4.47333848e+03, -3.60546766e+02, ...,
         -9.30727428e+02,  9.02402408e+02, -2.84120039e+01],
        [ 2.64736262e+01, -3.60546766e+02,  8.75890355e+02, ...,
          5.99662549e+02, -3.27588935e+02, -6.03967268e+01],
        ...,
        [ 2.03685460e+02, -9.30727428e+02,  5.99662549e+02, ...,
          7.49036906e+03, -2.48532835e+03,  3.64302030e+02],
        [-7.46557983e+01,  9.02402408e+02, -3.27588935e+02, ...,
         -2.48532835e+03,  3.45092607e+03, -3.39876346e+02],
        [-5.76122604e+00, -2.84120039e+01, -6.03967268e+01, ...,
          3.64302030e+02, -3.39876346e+02,  1.72032314e+03]],

       [[ 9.24619718e+02,  0.00000000e+00, -5.21126761e+02, ...,
         -7.07380282e+02,  8.80338028e+02, -4.37483568e+02],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e

In [25]:
model = hmm.GaussianHMM(n_components=transition_prob_matrix.shape[0], covariance_type="Full")
print(transition_prob_matrix.shape)
print(initial_state_probabilties.shape)
model.startprob_ = initial_state_probabilties
model.transmat_ = transition_prob_matrix.values
model.means_ = mu

(36, 36)
(15,)


In [26]:
chroma_with_preds = predict(transition_prob_matrix, model, mu)

ValueError: startprob_ must have length n_components