In [1]:
import sys
# add path for processing module
sys.path.append('..')

import numpy as np
from sklearn import svm

from processing import convert_to_mfcc
from classification import gmm_js, fit_gmm, init_gmm

# create samples
samples = list()
samples.append(convert_to_mfcc('../data/songs/Akagami no Shirayuki-hime.wav', frames=3000))
samples.append(convert_to_mfcc('../data/songs/Dragon Ball.wav', frames=3000))

print(samples[0].shape)

(12, 3000)


In [2]:
# create gram matrix
gram_matrix = np.zeros((2, 2))

# precompute gmm's
gmm_samples = list(map(lambda x: init_gmm(fit_gmm(x)), samples))

# precompute d_js
for i in range(len(samples)):
    for j in range(len(samples)):
        gram_matrix[i, j] = gmm_js(gmm_samples[i], gmm_samples[j])

gram_matrix

array([[-8.88178420e-16,  5.87205180e-01],
       [ 5.72627014e-01,  1.33226763e-15]])

In [3]:
# apply custom rbf kernel
def kernel(js: float, gamma: float = 0.1):
    return np.exp(-gamma * js)

# apply kernel
kernel_func = np.vectorize(kernel)
kernel_func(gram_matrix)

# fit svm
svc = svm.SVC(kernel='precomputed')
svc.fit(gram_matrix, [1, 2])

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale',
    kernel='precomputed', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

## Predict the class of song using the trained model

In [4]:
# full pipeline

song = '../data/songs/Akagami no Shirayuki-hime.wav'
gmm = init_gmm(fit_gmm(convert_to_mfcc(song, frames=3000)))
sims = list(map(lambda x: gmm_js(x, gmm), gmm_samples))
sims = kernel_func(sims).reshape(1, -1)

f'class={svc.predict(sims)}'

'class=[1]'