## Libraries Import

In [1]:
import os
# import pydub
#import sox
import pandas as pd
import numpy as np
# import datetime
# import librosa
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

import faiss
from sklearn import datasets

from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [2]:
REPO_DIR = '/Users/JakubMichalowski/Documents/UWR/UWrMLProjectAudioSeg/'

## Pre-processed data loading

In [105]:
data = pd.concat([pd.read_csv(REPO_DIR + "/data/data_{}.csv".format(i), index_col=False) for i in [1,2,3]])
data.drop('Unnamed: 0', axis=1, inplace=True)
data.drop('timestamp', axis=1, inplace=True)

In [106]:
# data.label.unique()
data.head(7)

Unnamed: 0,label,chroma_stft,rmse,spec_cent,spec_bw,rolloff,zcr,mfcc_0,mfcc_1,mfcc_2,...,mfcc_10,mfcc_11,mfcc_12,mfcc_13,mfcc_14,mfcc_15,mfcc_16,mfcc_17,mfcc_18,mfcc_19
0,music,0.0,0.0,0.0,0.0,0.0,0.0,-1131.370972,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,music,0.0,0.0,0.0,0.0,0.0,0.0,-1131.370972,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,music,0.0,0.0,0.0,0.0,0.0,0.0,-1131.370972,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,music,0.012444,0.081449,432.529692,424.989395,949.418501,0.01466,-632.946411,12.773681,-1.715082,...,-2.038454,-0.88455,-1.802255,0.243648,-0.727257,0.287844,0.141726,-0.364878,-0.763656,-0.56674
4,music,0.172535,0.435161,2309.434221,2392.904163,4963.40332,0.10235,-33.924923,96.108818,-15.746825,...,-10.182379,-11.484722,-13.53147,-5.707917,-9.890487,-1.513985,-8.121307,-4.673933,-4.060278,-2.206214
5,music,0.157657,0.336784,2567.839539,2600.708755,5628.485662,0.119984,-45.499687,84.936684,-8.681884,...,-11.334825,-8.786905,-12.080547,-5.115549,-6.204822,-0.849673,-5.121214,0.046853,-1.955851,-2.410228
6,music,0.163854,0.356565,2282.280482,2313.616205,4751.007635,0.108554,-50.080624,97.172562,-16.078642,...,-12.691456,-9.263289,-13.510224,-5.936949,-7.081617,-0.348157,-5.19101,-2.328905,-1.093816,-1.405749


In [110]:
X = data.drop('label', axis=1)
y = data['label']
trainX, testX, trainY, testY = train_test_split(X, y, test_size = 0.3, random_state=42, shuffle=False)

trainX, testX, trainY, testY = map(np.array, [trainX, testX, trainY, testY]) 

train_X = trainX.copy() 
test_X  = testX.copy()
train_y = trainY.copy()
test_y  = testY.copy()

np.reshape(train_X, train_X.shape, order='C')
np.reshape(test_X, test_X.shape, order='C')
np.reshape(train_y, train_y.shape, order='C')
np.reshape(test_y, test_y.shape, order='C')

train_X = train_X.astype('float32')
test_X = test_X.astype('float32')

In [111]:
# le = preprocessing.LabelEncoder()
# le.fit(train_y)
# test_y_trans = le.transform(test_y)
# train_y_trans = le.transform(train_y)

In [112]:
# train_y_trans, train_y 
# test_y_trans, test_y

In [228]:
class FaissKMeans:
    def __init__(self, n_clusters=8, n_redo=10, max_iter=300):
        self.n_clusters = n_clusters
        # redo means repeating with new starting points
        self.n_redo = n_redo
        self.max_iter = max_iter
        
        self.kmeans = None
        self.cluster_centers_ = None
        self.inertia_ = None

    def fit(self, X):
        self.kmeans = faiss.Kmeans(d=X.shape[1],
                                   k=self.n_clusters,
                                   niter=self.max_iter,
                                   nredo=self.n_redo)
        self.kmeans.train(X.astype(np.float32))
        self.cluster_centers_ = self.kmeans.centroids
        self.inertia_ = self.kmeans.obj[-1]

    def predict(self, X):
        preds = self.kmeans.index.search(X.astype(np.float32), 1)[1]
        preds = preds.reshape((X.shape[0], ))
        return np.array(pd.Series(preds).map({1: 'music', 0: 'speech'}))

In [257]:
class SklearnKMeans:
    def __init__(self, n_clusters=8, n_redo=10, max_iter=300, initialization='random'):
        self.n_clusters = n_clusters
        # redo means repeating with new starting points
        self.n_redo = n_redo
        self.max_iter = max_iter
        self.initialization = initialization
        
        self.kmeans = None
        self.cluster_centers_ = None
#         self.inertia_ = None

    def fit(self, X):
        self.kmeans = KMeans(n_clusters=self.n_clusters, 
                             random_state=0, 
                             init=self.initialization, 
                             n_init=self.n_redo, max_iter=self.max_iter)
        
        self.kmeans.fit(X)
        self.cluster_centers_ = self.kmeans.cluster_centers_

    def predict(self, X):
        preds = self.kmeans.predict(X)
        preds = preds.reshape((X.shape[0], ))
        return np.array(pd.Series(preds).map({0: 'music', 1: 'speech'}))

## Fitting classifiers

In [258]:
K  = 2

kmeansFaiss = FaissKMeans(n_clusters=K, max_iter=300, n_redo=50)
kmeansFaiss.fit(train_X)
y_pred_faiss_train = kmeansFaiss.predict(train_X)
y_pred_faiss_test = kmeansFaiss.predict(test_X)


# initialization_methods = ['random', 'k-means++']
# for init_method in initialization_methods:
#     pass
kmeansSklearn = SklearnKMeans(n_clusters=K, initialization='random', n_redo=50)
kmeansSklearn.fit(train_X)
y_pred_sklearn_train = kmeansSklearn.predict(train_X)
y_pred_sklearn_test = kmeansSklearn.predict(test_X)

## mapping pred.results of sklearn classifiers

In [259]:
# y_pred_sklearn_train[0:50], data.head(50)

# y_pred_sklearn_train = np.array(pd.Series(y_pred_sklearn_train).map({0: 'music', 1: 'speech'}))
# y_pred_sklearn_test = np.array(pd.Series(y_pred_sklearn_test).map({0: 'music', 1: 'speech'}))

In [260]:
trainAccuracies = {
    'sklearn': accuracy_score(y_true=train_y, y_pred=y_pred_sklearn_train),
    'faiss': accuracy_score(y_true=train_y, y_pred=y_pred_faiss_train)
}
trainAccuracies

{'sklearn': 0.7138749101365924, 'faiss': 0.7233165588305775}

In [261]:
testAccuracies = {
    'sklearn': accuracy_score(y_true=test_y, y_pred=y_pred_sklearn_test),
    'faiss': accuracy_score(y_true=test_y, y_pred=y_pred_faiss_test)
}
testAccuracies

{'sklearn': 0.724029967572403, 'faiss': 0.7354355361735435}

In [262]:
# kmeansFaiss.cluster_centers_, kmeansClassifier.cluster_centers_

## Smoothing HHM

In [263]:
class smoothHMM:

    def __init__(self, predictor, state_names):

        self.predictor = predictor
        self.modelHMM = HiddenMarkovModel()
        self.le=LabelEncoder()
        self.state_names = state_names

    def fit(self, X, y):
        
        X, y = map(np.array, [X, y])
        X_ = X.copy() 
        y_ = y.copy()
        
        np.reshape(X_, X_.shape, order='C')
        np.reshape(y_, y_.shape, order='C')
        
        X_ = X_.astype('float32')
        
        X_simplified = self.le.fit_transform(self.predictor.predict(X_)).reshape(1, -1)
        y_simplified = y_.reshape(1, -1)
    
        self.modelHMM = self.modelHMM.from_samples(
            DiscreteDistribution, len(self.state_names), X_simplified, labels = y_simplified,
            algorithm='labeled', state_names=self.state_names , verbose = True)
        self.modelHMM.bake()
           

    def predict(self, X):
        
        X= np.array(X)
        X_ = X.copy()
        
        np.reshape(X_, X_.shape, order='C')    
        X_ = X_.astype('float32')
            
        X_simplified = self.le.transform(self.predictor.predict(X_)).tolist()
        return self.modelHMM.predict(X_simplified)
    
    def score(self, X, y):
        
        target = self.predict(X)    
        y = self.le.transform(y.to_numpy())
        
        return np.sum(target==y)/y.shape[0]

In [264]:
from sklearn.preprocessing import LabelEncoder
from pomegranate import HiddenMarkovModel
from pomegranate.distributions import DiscreteDistribution
import pomegranate as pg

In [265]:
dataHTrain = pd.concat([pd.read_csv(REPO_DIR + "/data/data_{}.csv".format(i), index_col=False) for i in [1,2]])
dataHTrain.drop('Unnamed: 0', axis=1, inplace=True)
dataHTrain.drop('timestamp', axis=1, inplace=True)
dataHTest = pd.concat([pd.read_csv(REPO_DIR + "/data/data_{}.csv".format(i), index_col=False) for i in [3]])
dataHTest.drop('Unnamed: 0', axis=1, inplace=True)
dataHTest.drop('timestamp', axis=1, inplace=True)

In [266]:
trainHX = dataHTrain.drop('label', axis=1)
trainHY = dataHTrain['label']

testHX = dataHTest.drop('label', axis=1)
testHY = dataHTest['label']

In [270]:
hmm_faiss = smoothHMM(kmeansFaiss, state_names=['music', 'speech'])
hmm_faiss.fit(trainHX, trainHY)
print('Faiss library classifier score:', hmm_faiss.score(testHX, testHY))

[1] Improvement: 4304.927336080702	Time (s): 0.05253
[2] Improvement: 0.0	Time (s): 0.03472
Total Training Improvement: 4304.927336080702
Total Training Time (s): 0.1400
Faiss library classifier score: 0.7624750499001997


In [271]:
hmm_sklearn = smoothHMM(kmeansSklearn, state_names=['music', 'speech'])
hmm_sklearn.fit(trainHX, trainHY)
print('Sklearn library classifier score:', hmm_sklearn.score(testHX, testHY))

[1] Improvement: 4282.8583965135895	Time (s): 0.05066
[2] Improvement: 0.0	Time (s): 0.03193
Total Training Improvement: 4282.8583965135895
Total Training Time (s): 0.1342
Sklearn library classifier score: 0.7572854291417166
