In [2]:
from sklearn.model_selection import train_test_split
from dynamax.hidden_markov_model import CategoricalHMM
import numpy as np
import pandas as pd
import jax.numpy as jnp
import jax.random as jr
from jax import vmap
from matplotlib import pyplot as plt

In [None]:
folder_path = '/content/Cleaned Data/'
Beethoven = np.load(folder_path+'Beethoven.npy', allow_pickle = True)
Bach = np.load(folder_path+'Bach.npy', allow_pickle = True)
Chopin = np.load(folder_path+'Chopin.npy', allow_pickle = True)

comps_list = [Beethoven,Bach,Chopin]

In [None]:
channel_names = ['note_name', 'note_num','start_time', 'duration', 'normed_duration', 'velocity', 'tempo']
bin_list = [i*15 for i in range(65)]
composers = [[],[],[]]
for i,composer in enumerate(comps_list):
    for song in composer:
        df = pd.DataFrame(song,columns=channel_names)
        df.drop(['duration','note_name','start_time','velocity','tempo'],axis=1,inplace=True)
        composers[i].append(df.to_numpy())

Beethoven = composers[0]
Bach = composers[1]
Chopin = composers[2]
comps_list = [Beethoven,Bach,Chopin]

In [None]:
# Label the data
label_dict = {0:"Bach", 1:"Beethoven", 2:"Chopin"}

Beethoven_lbl = np.vstack((Beethoven,1*np.ones(len(Beethoven))))
Bach_lbl = np.vstack((Bach,2*np.ones(len(Bach))))
Chopin_lbl = np.vstack((Chopin,3*np.ones(len(Chopin))))


In [None]:
def print_params(params):
    jnp.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
    print("initial probs:")
    print(params.initial.probs)
    print("transition matrix:")
    print(params.transitions.transition_matrix)
    print("emission probs:")
    print(params.emissions.probs) 

In [None]:
# Concatenate into one big data array and train-test-split
data = np.concatenate((Beethoven_lbl, Bach_lbl, Chopin_lbl), axis=1)

X, Y = data[0, :], data[1, :]
X_train, X_test, Y_train, Y_test = train_test_split(X,Y)

In [None]:
# CategoricalHMM fitting
params_list = []
num_unique_notes = 96
num_unique_durations = len(np.unique(X[:,-1]))
num_states = 3
num_emmisions = 2
num_classes = num_unique_notes*num_unique_durations

for i,p in enumerate(X_train):
    if (np.isnan(p).any()) or (np.isinf(p).any()):
        print("NaN or Inf")
        print(np.isnan(p).any())
        print(np.isinf(p).any())
        print(" at i =", i)
        continue
    hmm = CategoricalHMM(num_states,num_emmisions,num_classes)
    params, props = hmm.initialize(method="prior")
    params, log_probs = hmm.fit_em(params,props,p,num_iters=10)
    params_list.append(params)

In [None]:
t_matrix_list = []
for params in params_list:
    t_matrix_list.append(params.transitions.transition_matrix)

In [3]:
from sklearn.neighbors import NearestCentroid
from sklearn.cluster import KMeans

In [None]:
y = Y_train
X_for_training = []
for i, matrix in enumerate(t_matrix_list):
    X_for_training.append(np.ravel(matrix))

y_ints = [int(label) for label in y]

In [None]:
NC = NearestCentroid()
NC.fit(np.array(X_for_training), y_ints)
KM = KMeans(3)
KM.fit(np.array(X_for_training), y_ints)

Now Let's work with the test set

In [None]:
# CategoricalHMM fitting
params_list_test = []
num_unique_notes = 96
num_unique_durations = 67
num_states = 3
num_emmisions = 2
num_classes = num_unique_notes*num_unique_durations

for i,p in enumerate(X_test):
    if (np.isnan(p).any()) or (np.isinf(p).any()):
        print("NaN or Inf")
        print(np.isnan(p).any())
        print(np.isinf(p).any())
        print(" at i =", i)
        continue
    hmm = CategoricalHMM(num_states,num_emmisions,num_classes)
    params, props = hmm.initialize(method="prior")
    params, log_probs = hmm.fit_em(params,props,p,num_iters=10)
    params_list_test.append(params)

In [None]:
t_matrix_list_test = []
for params in params_list_test:
    t_matrix_list_test.append(params.transitions.transition_matrix)

In [None]:
y = Y_test
X_for_training = []
for i, matrix in enumerate(t_matrix_list_test):
    X_for_training.append(np.ravel(matrix))

y_ints = [int(label) for label in y]

In [None]:
NC.score(np.array(X_for_test), y_ints_test)

In [None]:
KM.score(np.array(X_for_test), y_ints_test)