# K-Nearest Neighbours Model

## Imports

In [1]:
# Import the required libraries
import librosa
import matplotlib.pyplot as plt
import numpy as np
import librosa.display
import scipy.fftpack
import math
import soundfile as sf
import sklearn.preprocessing
import pandas as pd

from scipy.io import wavfile
from scipy.signal import find_peaks

from sklearn.neural_network import MLPClassifier

# Required to train and split the data
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# Required to import the data
from os import listdir

# KNN Imports
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KNeighborsClassifier

# Data Processing
from sklearn import preprocessing
from scipy import stats

# Kfold 
from sklearn.model_selection import KFold

# Time keeping
import time

## Import Files

In [83]:
speech_files = np.loadtxt('extracted_features/speech_files.csv', delimiter=',',dtype=str)
music_files = np.loadtxt('extracted_features/music_files.csv', delimiter=',',dtype=str)

speech_files=speech_files[0:4000]
music_files=music_files[0:4000]

all_files = np.append(speech_files,music_files)

print("Number of Speech files : {}".format(len(speech_files)))
print("Number of Music files : {}".format(len(music_files)))
print("Number of total files : {}".format(len(all_files)))

Number of Speech files : 4000
Number of Music files : 4000
Number of total files : 8000


## Import Features

In [2]:
# onset_strength_peak_rate = np.loadtxt('extracted_features/onset_strength_peak_rate_8000.csv', delimiter=',',dtype=np.double)
flux = np.loadtxt('extracted_features/norm_2_8000.csv', delimiter=',',dtype=np.double)
var_zc = np.loadtxt('extracted_features/var_zc_8000.csv', delimiter=',',dtype=np.double)
low_energy_frame_percentages =  np.loadtxt('extracted_features/low_energy_frame_percentages_8000.csv', delimiter=',',dtype=np.double)
mfccs = np.loadtxt('extracted_features/mfccs_8000.csv', delimiter=',',dtype=np.double)
all_labels = np.loadtxt('extracted_features/labels_zc_8000.csv', delimiter=',')

# print("Length of onset_strength_peak_rate : {}".format(len(onset_strength_peak_rate)))
print("Length of flux : {}".format(len(flux)))
print("Length of var_zc : {}".format(len(var_zc)))
print("Length of low_energy_frame_percentages : {}".format(len(low_energy_frame_percentages)))
print("Length of mfccs : {}".format(len(mfccs)))
print("Length of all_labels : {}".format(len(all_labels)))

Length of flux : 8000
Length of var_zc : 8000
Length of low_energy_frame_percentages : 8000
Length of mfccs : 8000
Length of all_labels : 8000


## Remove Outliers, Normalize, and Combine Data

In [725]:
def removeOutliers(x, outlierConstant):
    a = np.array(x)
    # print(a.shape)
    upper_quartile = np.percentile(a, 75)
    lower_quartile = np.percentile(a, 25)
    IQR = (upper_quartile - lower_quartile) * outlierConstant
    quartileSet = (lower_quartile - IQR, upper_quartile + IQR)
    
    result = []
    removed = []
    
    for i,value in enumerate(a):
        if ((value >= quartileSet[0]) and (value <= quartileSet[1])): result.append(value)
        else: removed.append(i)
    
    return np.array(removed), np.array(result)

In [726]:
# Remove outliers caused by var_vc
removed, var_zc = removeOutliers(var_zc,1.5)
onset_strength_peak_rate = np.delete(onset_strength_peak_rate,removed)
low_energy_frame_percentages = np.delete(low_energy_frame_percentages,removed)
all_labels = np.delete(all_labels,removed)

# Remove outliers caused by onset_strength_peak_rate
removed, onset_strength_peak_rate = removeOutliers(onset_strength_peak_rate,1.5)
var_zc = np.delete(var_zc,removed)
low_energy_frame_percentages = np.delete(low_energy_frame_percentages,removed)
all_labels = np.delete(all_labels,removed)

# Remove outliers caused by low_energy_frame_percentages
removed, low_energy_frame_percentages = removeOutliers(low_energy_frame_percentages,1.5)
var_zc = np.delete(var_zc,removed)
onset_strength_peak_rate = np.delete(onset_strength_peak_rate,removed)
all_labels = np.delete(all_labels,removed)

# Normalise the Data
flux = flux / flux.max()
var_zc = var_zc / var_zc.max()
onset_strength_peak_rate = onset_strength_peak_rate / onset_strength_peak_rate.max()
low_energy_frame_percentages = low_energy_frame_percentages / low_energy_frame_percentages.max()

print("Length of onset_strength_peak_rate : {}".format(len(onset_strength_peak_rate)))
print("Length of var_zc : {}".format(len(var_zc)))
print("Length of low_energy_frame_percentages : {}".format(len(low_energy_frame_percentages)))
print("Length of all_labels : {}".format(len(all_labels)))

Length of onset_strength_peak_rate : 962
Length of var_zc : 962
Length of low_energy_frame_percentages : 962
Length of all_labels : 962


## Knn for Flux Only

In [16]:
# Get data
X = np.array([ [flux[i]] for i,x in enumerate(var_zc) ])
print("Length of combined data : {}".format(len(X)))

# Knn
n_neighbors  = 5
weights = 'distance'

clf = KNeighborsClassifier(n_neighbors, weights=weights)

splits = 5
kf = KFold(n_splits=splits,shuffle=True)

# Track Results
result_list=[]

# Track Timing
training_times=[]
predicting_time=[]

sum = 0
for train_indices, test_indices in kf.split(X):

    # Normalise the Data independently
    X_train = X[train_indices] 
    X_train = X_train / X_train.max() 
    X_test = X[test_indices] 
    X_test = X_test / X_test.max()

    start_time = time.time()                    # Keep Time
    clf.fit(X_train,all_labels[train_indices])  # Train the model
    training_times.append(time.time()-start_time)

    start_time = time.time()                    # Keep Time
    z = clf.predict(X_test)                     # Perform predictions
    predicting_time.append( (time.time()-start_time)/z.size )

    correct_pred = 0
    for x,value in enumerate(z):
        if value == all_labels[test_indices][x]: correct_pred+=1
    result_list.append(correct_pred/len(test_indices)*100)
    sum += correct_pred/len(test_indices)*100
    # print(correct_pred/len(test_indices)*100)

print("Average Accuracy Score : {:3.3f} ± {:3.3f} %".format(np.sum(result_list)/splits,np.var(result_list)))
print("Average Training Time : {:3.3} ± {:3.3f} ms".format(np.sum(training_times)/splits*1000,np.var(training_times)*1000))
print("Average Single Prediction Time : {:3.3} ± {:3.3f} ms".format(np.sum(predicting_time)/splits*1000,np.var(predicting_time)*1000))

Length of combined data : 8000
Average Accuracy Score : 92.237 ± 0.437 %
Average Training Time : 3.0 ± 0.001 ms
Average Single Prediction Time : 0.00212 ± 0.000 ms


## KNN For ZCR Only

In [15]:
# Get data
X = np.array([ [var_zc[i]] for i,x in enumerate(var_zc) ])
print("Length of combined data : {}".format(len(X)))

# Knn
n_neighbors  = 5
weights = 'distance'

clf = KNeighborsClassifier(n_neighbors, weights=weights)

splits = 10
kf = KFold(n_splits=splits,shuffle=True)


# Track Results
result_list=[]

# Track Timing
training_times=[]
predicting_time=[]

sum = 0
for train_indices, test_indices in kf.split(X):

    # Normalise the Data independently
    X_train = X[train_indices] 
    X_train = X_train / X_train.max() 
    X_test = X[test_indices] 
    X_test = X_test / X_test.max()

    start_time = time.time()                    # Keep Time
    clf.fit(X_train,all_labels[train_indices])  # Train the model
    training_times.append(time.time()-start_time)

    start_time = time.time()                    # Keep Time
    z = clf.predict(X_test)                     # Perform predictions
    predicting_time.append( (time.time()-start_time)/z.size )

    correct_pred = 0
    for x,value in enumerate(z):
        if value == all_labels[test_indices][x]: correct_pred+=1
    result_list.append(correct_pred/len(test_indices)*100)
    sum += correct_pred/len(test_indices)*100
    # print(correct_pred/len(test_indices)*100)

print("Average Accuracy Score : {:3.3f} ± {:3.3f} %".format(np.sum(result_list)/splits,np.var(result_list)))
print("Average Training Time : {:3.3} ± {:3.3f} ms".format(np.sum(training_times)/splits*1000,np.var(training_times)*1000))
print("Average Single Prediction Time : {:3.3} ± {:3.3f} ms".format(np.sum(predicting_time)/splits*1000,np.var(predicting_time)*1000))

Length of combined data : 8000
Average Accuracy Score : 76.412 ± 1.988 %
Average Training Time : 3.79 ± 0.000 ms
Average Single Prediction Time : 0.00237 ± 0.000 ms


## KNN For Low Energy Frames Only

In [14]:
# Get data
X = np.array([ [low_energy_frame_percentages[i]] for i,x in enumerate(var_zc) ])
print("Length of combined data : {}".format(len(X)))

# Knn
n_neighbors  = 5
weights = 'distance'

clf = KNeighborsClassifier(n_neighbors, weights=weights)

splits = 10
kf = KFold(n_splits=splits,shuffle=True)

# Track Results
result_list=[]

# Track Timing
training_times=[]
predicting_time=[]


sum = 0
for train_indices, test_indices in kf.split(X):

    # Normalise the Data independently
    X_train = X[train_indices] 
    X_train = X_train / X_train.max() 
    X_test = X[test_indices] 
    X_test = X_test / X_test.max()

    start_time = time.time()                    # Keep Time
    clf.fit(X_train,all_labels[train_indices])  # Train the model
    training_times.append(time.time()-start_time)

    start_time = time.time()                    # Keep Time
    z = clf.predict(X_test)                     # Perform predictions
    predicting_time.append( (time.time()-start_time)/z.size )

    correct_pred = 0
    for x,value in enumerate(z):
        if value == all_labels[test_indices][x]: correct_pred+=1
    sum += correct_pred/len(test_indices)*100
    result_list.append(correct_pred/len(test_indices)*100)
    # print(correct_pred/len(test_indices)*100)

print("Average Accuracy Score : {:3.3f} ± {:3.3f} %".format(np.sum(result_list)/splits,np.var(result_list)))
print("Average Training Time : {:3.3} ± {:3.3f} ms".format(np.sum(training_times)/splits*1000,np.var(training_times)*1000))
print("Average Single Prediction Time : {:3.3} ± {:3.3f} ms".format(np.sum(predicting_time)/splits*1000,np.var(predicting_time)*1000))

Length of combined data : 8000
Average Accuracy Score : 89.475 ± 2.240 %
Average Training Time : 3.2 ± 0.000 ms
Average Single Prediction Time : 0.00224 ± 0.000 ms


## KNN For All Features

In [57]:
X = np.array([ [flux[i],var_zc[i],low_energy_frame_percentages[i]] for i,x in enumerate(var_zc) ])

# Knn
n_neighbors  = 25
weights = 'distance'

clf = KNeighborsClassifier(n_neighbors, weights=weights)

splits = 10
kf = KFold(n_splits=splits,shuffle=True)

sum = 0

# Track Results
result_list=[]
speech_results=[]
music_results=[]

# Track Timing
training_times=[]
predicting_time=[]

for train_indices, test_indices in kf.split(X):

    # Get Data
    X_train_flux = flux[train_indices]
    X_train_low_e = low_energy_frame_percentages[train_indices]
    X_train_var_zc = var_zc[train_indices]

    X_test_flux = flux[test_indices]
    X_test_low_e = low_energy_frame_percentages[test_indices]
    X_test_var_zc = var_zc[test_indices]

    # normalise data
    X_train_flux = X_train_flux / X_train_flux.max()
    X_train_low_e = X_train_low_e / X_train_low_e.max()
    X_train_var_zc = X_train_var_zc / X_train_var_zc.max()

    X_test_flux = X_test_flux / X_test_flux.max()
    X_test_low_e = X_test_low_e / X_test_low_e.max()
    X_test_var_zc = X_test_var_zc / X_test_var_zc.max()

    # Normalise the Data independently
    X_train = np.array([ [X_train_flux[i],X_train_var_zc[i],X_train_low_e[i]] for i,x in enumerate(train_indices) ])
    # X_train = X_train / X_train.max() 
    X_test = np.array([ [X_test_flux[i],X_test_var_zc[i],X_test_low_e[i]] for i,x in enumerate(test_indices) ])
    # X_test = X_test / X_test.max()

    start_time = time.time()                    # Keep Time
    clf.fit(X_train,all_labels[train_indices])  # Train the model
    training_times.append(time.time()-start_time)

    start_time = time.time()                    # Keep Time
    z = clf.predict(X_test)                     # Perform predictions
    predicting_time.append( (time.time()-start_time)/z.size )    
    
    correct_pred = 0
    for x,value in enumerate(z):
        # Track overall accuracy
        if value == all_labels[test_indices][x]: correct_pred+=1
        # Track predictions of speech
        if all_labels[test_indices][x] == 0 : 
            if value == all_labels[test_indices][x]: speech_results.append(1)
            else : speech_results.append(0)
        # Track predictions of music
        if all_labels[test_indices][x] == 1 : 
            if value == all_labels[test_indices][x]: music_results.append(1)
            else : music_results.append(0)

    result_list.append(correct_pred/len(test_indices)*100)
    sum += correct_pred/len(test_indices)*100
    # print(correct_pred/len(test_indices)*100)

print("Average Speech Accuracy Score : {:3.3f} ± {:3.3f} %".format(np.sum(speech_results)/len(speech_results)*100,np.var(speech_results)*100))
print("Average Music Accuracy Score : {:3.3f} ± {:3.3f} %".format(np.sum(music_results)/len(music_results)*100,np.var(music_results)*100))
print("Average Accuracy Score : {:3.3f} ± {:3.3f} %".format(np.sum(result_list)/splits,np.var(result_list)))
print("Average Training Time : {:3.3} ± {:3.3f} ms".format(np.sum(training_times)/splits*1000,np.var(training_times)*1000))
print("Average Single Prediction Time : {:3.3} ± {:3.3f} ms".format(np.sum(predicting_time)/splits*1000,np.var(predicting_time)*1000))

4000
Average Speech Accuracy Score : 98.850 ± 1.137 %
Average Music Accuracy Score : 95.775 ± 4.046 %
Average Accuracy Score : 97.312 ± 0.276 %
Average Training Time : 4.69 ± 0.000 ms
Average Single Prediction Time : 0.0086 ± 0.000 ms


## KNN for MFCCs

In [86]:
# Get data
X = np.array([ mfccs[i] for i,x in enumerate(mfccs) ])
print("Length of combined data : {}".format(len(X)))

# Knn
n_neighbors  = 25
weights = 'distance'

clf = KNeighborsClassifier(n_neighbors, weights=weights,algorithm='kd_tree')

splits = 10
kf = KFold(n_splits=splits,shuffle=True)

sum = 0
 
# Track Results
result_list=[]
speech_results=[]
music_results=[]

# Track Timing
training_times=[]
predicting_time=[]

for train_indices, test_indices in kf.split(X):

    # Normalise the Data independently
    X_train = X[train_indices] 
    # X_train = X_train / X_train.max() 
    X_test = X[test_indices] 
    # X_test = X_test / X_test.max()

    start_time = time.time()                    # Keep Time
    clf.fit(X_train,all_labels[train_indices])  # Train the model
    training_times.append(time.time()-start_time)

    start_time = time.time()                    # Keep Time
    z = clf.predict(X_test)                     # Perform predictions
    predicting_time.append( (time.time()-start_time)/z.size )


    correct_pred = 0
    for x,value in enumerate(z):
        # Track overall accuracy
        if value == all_labels[test_indices][x]: correct_pred+=1
        # Track predictions of speech
        if all_labels[test_indices][x] == 0 : 
            if value == all_labels[test_indices][x]: speech_results.append(1)
            else : 
                speech_results.append(0)
                # print(all_files[test_indices][x])
        # Track predictions of music
        if all_labels[test_indices][x] == 1 : 
            if value == all_labels[test_indices][x]: music_results.append(1)
            else : 
                music_results.append(0)
                # print(all_files[test_indices][x])

    sum += correct_pred/len(test_indices)*100
    result_list.append(correct_pred/len(test_indices)*100)
    # print(correct_pred/len(test_indices)*100)

print("Average Speech Accuracy Score : {:3.3f} ± {:3.3f} %".format(np.sum(speech_results)/len(speech_results)*100,np.var(speech_results)*100))
print("Average Music Accuracy Score : {:3.3f} ± {:3.3f} %".format(np.sum(music_results)/len(music_results)*100,np.var(music_results)*100))
print("Average Accuracy Score : {:3.3f} ± {:3.3f} %".format(np.sum(result_list)/splits,np.var(result_list)))
print("Average Training Time : {:3.3} ± {:3.3f} ms".format(np.sum(training_times)/splits*1000,np.var(training_times)*1000))
print("Average Single Prediction Time : {:3.3} ± {:3.3f} ms".format(np.sum(predicting_time)/splits*1000,np.var(predicting_time)*1000))

Length of combined data : 8000
Average Speech Accuracy Score : 99.775 ± 0.224 %
Average Music Accuracy Score : 92.575 ± 6.874 %
Average Accuracy Score : 96.175 ± 0.201 %
Average Training Time : 92.9 ± 0.012 ms
Average Single Prediction Time : 0.209 ± 0.000 ms
