# K-Nearest Neighbours Model

## Imports

In [655]:
# Import the required libraries
import librosa
import matplotlib.pyplot as plt
import numpy as np
import librosa.display
import scipy.fftpack
import math
import soundfile as sf
import sklearn.preprocessing
import pandas as pd

from scipy.io import wavfile
from scipy.signal import find_peaks

from sklearn.neural_network import MLPClassifier

# Required to train and split the data
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# Required to import the data
from os import listdir

# KNN Imports
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing
from scipy import stats

## Import Features

In [668]:
onset_strength_peak_rate = np.loadtxt('extracted_features/onset_strength_peak_rate_1000.csv', delimiter=',',dtype=np.double)
var_zc = np.loadtxt('extracted_features/var_zc_1000.csv', delimiter=',',dtype=np.double)
low_energy_frame_percentages =  np.loadtxt('extracted_features/low_energy_frame_percentages_1000.csv', delimiter=',',dtype=np.double)
all_labels = np.loadtxt('extracted_features/labels_zc1_1000.csv', delimiter=',')

print("Length of onset_strength_peak_rate : {}".format(len(onset_strength_peak_rate)))
print("Length of var_zc : {}".format(len(var_zc)))
print("Length of low_energy_frame_percentages : {}".format(len(low_energy_frame_percentages)))
print("Length of all_labels : {}".format(len(all_labels)))

Length of onset_strength_peak_rate : 1000
Length of var_zc : 1000
Length of low_energy_frame_percentages : 1000
Length of all_labels : 1000


## Remove Outliers, Normalize, and Combine Data

In [669]:
def removeOutliers(x, outlierConstant):
    a = np.array(x)
    # print(a.shape)
    upper_quartile = np.percentile(a, 75)
    lower_quartile = np.percentile(a, 25)
    IQR = (upper_quartile - lower_quartile) * outlierConstant
    quartileSet = (lower_quartile - IQR, upper_quartile + IQR)
    
    result = []
    removed = []
    
    for i,value in enumerate(a):
        if ((value >= quartileSet[0]) and (value <= quartileSet[1])): result.append(value)
        else: removed.append(i)
    
    return np.array(removed), np.array(result)

In [672]:
# Remove outliers caused by var_vc
removed, var_zc = removeOutliers(var_zc,1.5)
onset_strength_peak_rate = np.delete(onset_strength_peak_rate,removed)
low_energy_frame_percentages = np.delete(low_energy_frame_percentages,removed)
all_labels = np.delete(all_labels,removed)

# Remove outliers caused by onset_strength_peak_rate
removed, onset_strength_peak_rate = removeOutliers(onset_strength_peak_rate,1.5)
var_zc = np.delete(var_zc,removed)
low_energy_frame_percentages = np.delete(low_energy_frame_percentages,removed)
all_labels = np.delete(all_labels,removed)

# Remove outliers caused by low_energy_frame_percentages
removed, low_energy_frame_percentages = removeOutliers(low_energy_frame_percentages,1.5)
var_zc = np.delete(var_zc,removed)
onset_strength_peak_rate = np.delete(onset_strength_peak_rate,removed)
all_labels = np.delete(all_labels,removed)

# Normalise the Data
var_zc = var_zc / var_zc.max()
onset_strength_peak_rate = onset_strength_peak_rate / onset_strength_peak_rate.max()
low_energy_frame_percentages = low_energy_frame_percentages / low_energy_frame_percentages.max()

print("Length of onset_strength_peak_rate : {}".format(len(onset_strength_peak_rate)))
print("Length of var_zc : {}".format(len(var_zc)))
print("Length of low_energy_frame_percentages : {}".format(len(low_energy_frame_percentages)))
print("Length of all_labels : {}".format(len(all_labels)))

Length of onset_strength_peak_rate : 959
Length of var_zc : 959
Length of low_energy_frame_percentages : 959
Length of all_labels : 959
  from ipykernel import kernelapp as app
  app.launch_new_instance()


In [673]:
X = np.array([ [onset_strength_peak_rate[i],var_zc[i],low_energy_frame_percentages[i]] for i,x in enumerate(var_zc) ])
print("Length of combined data : {}".format(len(X)))

Length of combined data : 959


## Split the Data

In [705]:
X_train, X_test, y_train, y_test = train_test_split(X, all_labels, test_size=0.2)

print("Length of X_train : {}".format(len(X_train))) 
print("Length of X_test : {}".format(len(X_test)))

print()

print("Length of y_train : {}".format(len(y_train)))
print("Length of y_test : {}".format(len(y_test)))

Length of X_train : 767
Length of X_test : 192

Length of y_train : 767
Length of y_test : 192


## KNN Algorithm

In [706]:
n_neighbors  = 5
weights = 'uniform'

clf = KNeighborsClassifier(n_neighbors, weights=weights)
clf.fit(X_train,y_train)

z = clf.predict(X_test)

correct_pred = 0
for x,value in enumerate(z):
    if value == y_test[x]: correct_pred+=1

print("Correct predictions : {:3.2f} %".format(correct_pred/len(y_test)*100))

Correct predictions : 81.77 %
