##### Imports:

In [1]:
from utils import NOTES_NAMES, FULL_CHORD_LIST, CUSTOM_ENCODING, INVERSE_ENCODING, separate_last_chord, mean_chord_distance_with_quality, predict_next_chords, calculate_covariance_from_chroma, separate_for_training, calculate_mu_from_chroma, calculate_transition_probabilites, format_indiv_chroma, get_unique_predicted, calculate_initial_probabilities
import pickle
from chroma import get_chromagram
import pandas as pd
from tqdm import tqdm
import numpy as np
from hmmlearn import hmm
from sklearn.metrics import f1_score

##### Steps:

1. Training / Testing Data Split
2. Create Chromagram from Training Data
3. Create HMM Initialization Components
    - Initial State Probabilities
    - Transition Probability Matrix
    - Mu Value
    - Emission Matrix
4. Create HMM Object
5. Fit / Train HMM

##### Training / Test Data Split:

In [2]:
# Load data and split into training and test
with open(r"dataset.pkl", 'rb') as data:
    midi_data:dict = pickle.load(data)

training_piece_names, test_piece_names = separate_for_training(midi_data, 0.8)

##### Create Chromagram from Training Data:

In [3]:
song_chromagrams = []
for song_name in tqdm(list(training_piece_names)):
    indiv_chroma = get_chromagram(song_name, midi_data)
    formatted = format_indiv_chroma(indiv_chroma)
    song_chromagrams.append(indiv_chroma)

chromagram = pd.concat(song_chromagrams)
chromagram.head(200)

100%|███████████████████████████████████████| 4609/4609 [00:56<00:00, 80.90it/s]


Unnamed: 0,C,C#,D,D#,E,F,F#,G,G#,A,A#,B,Chord Actual
0,0,0,0,0,71,0,0,0,0,167,0,113,A
1,0,0,0,0,71,0,0,0,0,167,0,113,A
2,0,100,0,0,71,0,0,0,0,167,0,0,A
3,0,100,0,0,71,0,0,0,0,167,0,0,A
4,0,100,0,0,71,0,0,0,0,167,0,0,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...
28,0,59,0,0,0,0,56,0,0,0,138,0,A
29,0,59,0,0,0,0,56,0,0,0,138,0,A
30,0,59,0,0,0,0,56,0,0,0,138,0,A
31,0,59,0,0,0,0,56,0,0,0,138,0,A


##### Create HMM Components:

###### Initial State Probabilities:

In [None]:
initial_state_probabilties = calculate_initial_probabilities(training_piece_names, midi_data)
initial_state_probabilties

###### Transition Matrix:

In [None]:
transition_prob_matrix = calculate_transition_probabilites(chromagram)
print(transition_prob_matrix)
assert np.allclose(transition_prob_matrix.sum(axis=1), 1), "Not all rows sum to 1"

###### Mu Value:

In [None]:
mu = calculate_mu_from_chroma(chromagram)

###### Covariance Matrix:

In [None]:
covars = calculate_covariance_from_chroma(chromagram)
print("Covariances shape:", covars.shape)

In [None]:
model = hmm.GaussianHMM(n_components=transition_prob_matrix.shape[0], covariance_type="diag")
model.startprob_ = initial_state_probabilties
model.transmat_ = transition_prob_matrix.values
model.means_ = mu
model.covars_ = np.array([np.diag(cov_matrix) + 1e-6 for cov_matrix in covars]).reshape(-1, 12)
model.n_features = 36

In [None]:
true_labels = []
predicted_labels = []
for song_name in tqdm(list(test_piece_names)):
    last_chord, chromagram_without_last_chord = separate_last_chord(get_chromagram(song_name, midi_data))
    if not chromagram_without_last_chord.empty:
        encoded_chromagram_without_last_chord = chromagram_without_last_chord['Chord Actual'].apply(lambda x: CUSTOM_ENCODING.get(x, -1)).values.reshape(-1, 1)
        preds = model.predict(encoded_chromagram_without_last_chord)
        prediction = preds[-1]
        predicted_labels.append(prediction)
        true_labels.append(CUSTOM_ENCODING.get(last_chord, -1))

f1 = f1_score(true_labels, predicted_labels, average='micro')
print(f"F1 Score: {f1}")

In [None]:
true_chords = [INVERSE_ENCODING.get(x) for x in true_labels]
predicted_chords = [INVERSE_ENCODING.get(x) for x in predicted_labels]
mean_distance = mean_chord_distance_with_quality(predicted_chords, true_chords)
print(mean_distance)

In [None]:
n_predictions = 40
i = 1
def are_all_items_same(items):
    if not items:
        return True
    return all(item == items[0] for item in items)

predicted_chords=[]
while are_all_items_same(predicted_chords):
    name = test_piece_names[i]
    chroma = get_chromagram(name, midi_data)
    predicted_chords = predict_next_chords(model, chroma, n_predictions)
    i+=1

print(chroma)
print(i-1)
print(predicted_chords)