##### Imports:

In [None]:
from utils import new_predict, separate_for_training, calculate_emission_from_chroma, calculate_mu_from_chroma, calculate_transition_probabilites, format_indiv_chroma, predict, get_unique_predicted, calculate_initial_probabilities
import pickle
from chroma import get_chromagram
import pandas as pd
from tqdm import tqdm
import numpy as np
from hmmlearn import hmm

##### Steps:

1. Training / Testing Data Split
2. Create Chromagram from Training Data
3. Create HMM Initialization Components
    - Initial State Probabilities
    - Transition Probability Matrix
    - Mu Value
    - Emission Matrix
4. Create HMM Object
5. Fit / Train HMM

##### Training / Test Data Split:

In [None]:
# Load data and split into training and test
with open(r"dataset.pkl", 'rb') as data:
    midi_data:dict = pickle.load(data)

training_piece_names, _, test_piece_names = separate_for_training(midi_data, 0.8, 0.)


##### Create Chromagram from Training Data:

In [None]:
song_chromagrams = []
for song_name in tqdm(list(training_piece_names)):
    indiv_chroma = get_chromagram(song_name, midi_data)
    formatted = format_indiv_chroma(indiv_chroma)
    song_chromagrams.append(indiv_chroma)

chromagram = pd.concat(song_chromagrams)
chromagram.head(200)

##### Create HMM Components:

###### Initial State Probabilities:

In [None]:
initial_state_probabilties = calculate_initial_probabilities(training_piece_names, midi_data)
initial_state_probabilties

###### Transition Matrix:

In [None]:
transition_prob_matrix = calculate_transition_probabilites(chromagram)
transition_prob_matrix

###### Mu Value:

In [None]:
mu = calculate_mu_from_chroma(chromagram)

###### Emission Matrix:

In [None]:
NOTES_NAMES =   ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
FULL_CHORD_LIST = [note + suffix for note in NOTES_NAMES for suffix in ['', 'm', 'dim']]
def calculate_emission_from_chroma_adjusted(chroma):
    emission_matrices = np.zeros((36,36,36))

    # For simplicity, use the occurrence of each chord as a feature
    # This example assumes 'chroma' is a DataFrame that includes a 'Chord Actual' column
    chord_counts = chroma['Chord Actual'].value_counts(normalize=True)

    for i, chord in enumerate(FULL_CHORD_LIST):
        # Initialize a matrix for the current chord
        matrix = np.zeros((36,36))
        
        # If the chord is observed in the dataset, fill its matrix diagonally with its normalized count
        # This is a proxy for its "presence" or "significance" in the dataset
        if chord in chord_counts:
            np.fill_diagonal(matrix, chord_counts[chord])
        else:
            # For chords not present in the dataset, consider a minimal presence value
            np.fill_diagonal(matrix, 0.01)  # A small value to indicate minimal presence
        
        # Assign this matrix to the corresponding slot in the emission_matrices
        emission_matrices[i] = matrix

    return emission_matrices
emission = calculate_emission_from_chroma_adjusted(chromagram)

In [None]:
emission
equal = np.array_equal(emission, emission.T)
print(equal)
print(emission.shape)
print(np.linalg.cholesky(emission))

In [None]:
def calculate_mu_from_chroma(chroma):
    # Assuming `chroma` includes features or can be mapped to features that correspond to the FULL_CHORD_LIST
    # and that each chord in FULL_CHORD_LIST has a corresponding set of features in `chroma`
    
    # Initialize a placeholder for the mean values of the features corresponding to each chord
    mu_values = np.zeros(len(FULL_CHORD_LIST))
    
    # For simplicity in this example, we assume each chord corresponds to a single feature/column in `chroma`.
    # If each chord's representation is more complex, you'll need to adjust the calculation accordingly.
    for i, chord in enumerate(FULL_CHORD_LIST):
        # Calculate the mean of the feature(s) corresponding to this chord.
        # This requires `chroma` to have columns directly corresponding to each chord, or a mapping to such columns.
        if chord in chroma.columns:
            mu_values[i] = chroma[chord].mean()
        else:
            # Handle case where a chord doesn't have a direct column or mapping in `chroma`
            # For simplicity, setting to 0 or some default value - adjust based on your data structure
            mu_values[i] = 0
    
    return mu_values
mu = calculate_mu_from_chroma(chromagram)
model = hmm.GaussianHMM(n_components=transition_prob_matrix.shape[0], covariance_type="full")
model.startprob_ = initial_state_probabilties
model.transmat_ = transition_prob_matrix.values
model.means_ = mu.reshape(-1, 1)
model.covars_ = emission
print(model.means_.shape)


In [None]:
chroma_with_preds = new_predict(transition_prob_matrix, model)