In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV, train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [7]:
metadata = pd.read_csv("processed_metadata.csv")
d1 = pd.read_csv("mfcc_deltamfcc_entire.csv", index_col = 'Unnamed: 0')
d2 = pd.read_csv("mfcc_deltamfcc_stat.csv", index_col = 'Unnamed: 0')

In [8]:
d1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,120,121,122,123,124,125,126,127,128,129
0,-498.372785,70.558003,-17.394419,22.370149,-2.773349,-8.818175,-3.375472,-3.880317,0.060479,-5.638018,...,-9.810652,-15.77145,-9.511748,-4.777353,-3.628776,-8.05793,-7.059937,-5.427528,-9.378248,-2.965233
1,-461.939744,94.49037,-30.162219,19.7726,-8.361755,0.029626,-1.49666,-4.535218,-1.276991,-2.467174,...,-12.70898,-9.712587,-9.239233,-6.313464,-6.577796,-4.884297,-5.886631,-5.312574,-3.758418,-6.700661
2,-529.540707,80.44615,-29.131353,20.216969,0.571056,-8.416232,-5.676588,-8.62122,-1.333912,-5.050858,...,-10.555406,-11.804663,-6.711576,-3.423953,-6.255485,-4.667065,-7.142622,-6.389498,-5.132346,-4.865369
3,-616.144775,50.042843,-27.797401,10.828676,-0.034238,-3.344975,-5.781679,-5.397288,-0.332126,-4.46605,...,-9.258957,-8.813666,-5.116341,-2.900284,-2.844657,-2.890104,-4.251687,-2.681928,-4.668478,-2.775859
4,-471.954266,115.457746,-22.104456,24.454849,-5.56485,2.889188,6.143604,0.236881,-3.250076,-0.004432,...,-10.568315,-10.561276,-8.527216,-5.587518,-7.701231,-6.702233,-6.928992,-5.485289,-7.765387,-6.183199


In [6]:
d2.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,2434,2435,2436,2437,2438,2439,2440,2441,2442,2443
0,0,-759.643734,-759.643734,-759.643734,-759.643734,-759.643734,-759.643734,-759.643734,-759.643734,-759.643734,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,-794.749525,-794.749525,-794.749525,-794.749525,-794.749525,-794.749525,-396.305007,-235.767194,-185.439379,...,-1.738996,1.351106,3.759922,5.072241,5.020127,4.745581,4.346004,3.247134,1.812866,0.0
2,2,-812.413496,-812.413496,-812.413496,-812.413496,-812.413496,-812.413496,-812.413496,-812.413496,-812.413496,...,0.832108,1.057161,0.826852,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,-811.385119,-811.385119,-811.385119,-811.385119,-811.385119,-811.385119,-811.385119,-811.385119,-811.385119,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,-295.943017,-266.674291,-253.891257,-261.825848,-292.782937,-331.729997,-371.42353,-400.653534,-426.523563,...,0.068072,1.409403,2.6162,3.196503,3.179404,2.306567,1.614574,1.782035,1.783818,1.325165


In [5]:
metadata.head()

Unnamed: 0,path,segment,label,samplingrate,ms
0,./Labelled/Bleatings/evt_000_000_000681_210415...,0,Bleatings,16000,3000.0
1,./Labelled/Bleatings/evt_000_000_000681_210415...,1,Bleatings,16000,3000.0
2,./Labelled/Bleatings/evt_000_000_000681_210415...,2,Bleatings,16000,3000.0
3,./Labelled/Bleatings/evt_000_000_000681_210415...,3,Bleatings,16000,3000.0
4,./Labelled/Bleatings/evt_000_000_000681_210415...,4,Bleatings,16000,3000.0


In [6]:
# Transforming non numerical labels into numerical labels
labels = metadata["label"]
encoder = LabelEncoder()

# encoding train labels
y = encoder.fit_transform(labels)

In [10]:
# scale the features
scaler = StandardScaler()
x1 = scaler.fit_transform(d1)
x2 = scaler.fit_transform(d2)

In [12]:
# get train and test data
X1_train, X1_test, y1_train, y1_test = train_test_split(x1, y, test_size=0.2)
X2_train, X2_test, y2_train, y2_test = train_test_split(x2, y, test_size=0.2)

In [13]:
m1 = SVC()
m2 = SVC()
m1.fit(X1_train, y1_train)
m2.fit(X2_train, y2_train)

SVC()

In [14]:
Y_pred1 = m1.predict(X1_test)
Y_pred2 = m2.predict(X2_test)

In [15]:
Y_pred1

array([1, 1, 2, ..., 2, 0, 2])

In [16]:
list(encoder.inverse_transform(Y_pred1))

['ContactCalls',
 'ContactCalls',
 'Others',
 'Bleatings',
 'Bleatings',
 'Others',
 'Others',
 'ContactCalls',
 'Others',
 'Others',
 'Bleatings',
 'ContactCalls',
 'Others',
 'Bleatings',
 'ContactCalls',
 'Others',
 'Bleatings',
 'Others',
 'Others',
 'Bleatings',
 'Bleatings',
 'Bleatings',
 'Bleatings',
 'Others',
 'ContactCalls',
 'Others',
 'Bleatings',
 'Bleatings',
 'Others',
 'Bleatings',
 'ContactCalls',
 'Others',
 'Bleatings',
 'ContactCalls',
 'Bleatings',
 'Bleatings',
 'Others',
 'Bleatings',
 'Others',
 'Others',
 'Bleatings',
 'Bleatings',
 'Others',
 'Bleatings',
 'Bleatings',
 'Others',
 'Bleatings',
 'Others',
 'Others',
 'Others',
 'Others',
 'Bleatings',
 'Others',
 'Others',
 'ContactCalls',
 'Bleatings',
 'Bleatings',
 'Others',
 'Bleatings',
 'Bleatings',
 'ContactCalls',
 'Others',
 'Bleatings',
 'Bleatings',
 'Bleatings',
 'ContactCalls',
 'Others',
 'Bleatings',
 'ContactCalls',
 'ContactCalls',
 'Others',
 'Bleatings',
 'Others',
 'Bleatings',
 'Others',
 

In [19]:
print(confusion_matrix(y1_test,Y_pred1))
print("\n")
print(classification_report(y1_test,Y_pred1))

print("Training set score for SVM: %f" % m1.score(X1_train , y1_train))
print("Testing  set score for SVM: %f" % m1.score(X1_test  , y1_test ))

m1.score

[[541  15   0]
 [ 40 161   0]
 [  0   0 571]]


              precision    recall  f1-score   support

           0       0.93      0.97      0.95       556
           1       0.91      0.80      0.85       201
           2       1.00      1.00      1.00       571

    accuracy                           0.96      1328
   macro avg       0.95      0.92      0.94      1328
weighted avg       0.96      0.96      0.96      1328

Training set score for SVM: 0.990586
Testing  set score for SVM: 0.958584


<bound method ClassifierMixin.score of SVC()>

In [20]:
print(confusion_matrix(y2_test,Y_pred2))
print("\n")
print(classification_report(y2_test,Y_pred2))

print("Training set score for SVM: %f" % m2.score(X2_train , y2_train))
print("Testing  set score for SVM: %f" % m2.score(X2_test  , y2_test ))

m2.score

[[540  15   6]
 [139  75   3]
 [  4   0 546]]


              precision    recall  f1-score   support

           0       0.79      0.96      0.87       561
           1       0.83      0.35      0.49       217
           2       0.98      0.99      0.99       550

    accuracy                           0.87      1328
   macro avg       0.87      0.77      0.78      1328
weighted avg       0.88      0.87      0.86      1328

Training set score for SVM: 0.982113
Testing  set score for SVM: 0.874247


<bound method ClassifierMixin.score of SVC()>

In [24]:
params_grid = [{'kernel': ['rbf'],'C': [0.1, 1, 10, 100, 1000]},
               {'kernel': ['linear'], 'C': [0.1, 1, 10, 100, 1000]}]

In [26]:
svm_model = GridSearchCV(SVC(), params_grid, cv=5)
svm_model.fit(X1_train, y1_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['rbf']},
                         {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']}])

In [27]:
# View the accuracy score
print('Best score for training data:', svm_model.best_score_,"\n") 

# View the best parameters for the model found using grid search
print('Best C:',svm_model.best_estimator_.C,"\n") 
print('Best Kernel:',svm_model.best_estimator_.kernel,"\n")
print('Best Gamma:',svm_model.best_estimator_.gamma,"\n")

final_model = svm_model.best_estimator_
Y_pred = final_model.predict(X1_test)
Y_pred_label = list(encoder.inverse_transform(Y_pred))

Best score for training data: 0.9939743433022767 

Best C: 1 

Best Kernel: linear 

Best Gamma: scale 



In [29]:
print(confusion_matrix(y1_test,Y_pred))
print("\n")
print(classification_report(y1_test,Y_pred))

print("Training set score for SVM: %f" % final_model.score(X1_train, y1_train))
print("Testing  set score for SVM: %f" % final_model.score(X1_test, y1_test ))

svm_model.score

[[546  10   0]
 [  8 193   0]
 [  0   0 571]]


              precision    recall  f1-score   support

           0       0.99      0.98      0.98       556
           1       0.95      0.96      0.96       201
           2       1.00      1.00      1.00       571

    accuracy                           0.99      1328
   macro avg       0.98      0.98      0.98      1328
weighted avg       0.99      0.99      0.99      1328

Training set score for SVM: 0.999059
Testing  set score for SVM: 0.986446


<bound method BaseSearchCV.score of GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['rbf']},
                         {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']}])>

In [31]:
svm_model2 = GridSearchCV(SVC(), params_grid, cv=5)
svm_model2.fit(X2_train, y2_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['rbf']},
                         {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']}])

In [34]:
# View the accuracy score
print('Best score for training data:', svm_model2.best_score_,"\n") 

# View the best parameters for the model found using grid search
print('Best C:',svm_model2.best_estimator_.C,"\n") 
print('Best Kernel:',svm_model2.best_estimator_.kernel,"\n")
print('Best Gamma:',svm_model2.best_estimator_.gamma,"\n")

final_model = svm_model2.best_estimator_
Y_pred = final_model.predict(X2_test)
Y_pred_label = list(encoder.inverse_transform(Y_pred))

Best score for training data: 0.9081158218664797 

Best C: 1 

Best Kernel: linear 

Best Gamma: scale 



In [36]:
print(confusion_matrix(y2_test,Y_pred))
print("\n")
print(classification_report(y2_test,Y_pred))

print("Training set score for SVM: %f" % final_model.score(X2_train, y2_train))
print("Testing  set score for SVM: %f" % final_model.score(X2_test, y2_test ))

svm_model.score

[[515  41   5]
 [ 62 147   8]
 [  0   2 548]]


              precision    recall  f1-score   support

           0       0.89      0.92      0.91       561
           1       0.77      0.68      0.72       217
           2       0.98      1.00      0.99       550

    accuracy                           0.91      1328
   macro avg       0.88      0.86      0.87      1328
weighted avg       0.91      0.91      0.91      1328

Training set score for SVM: 1.000000
Testing  set score for SVM: 0.911145


<bound method BaseSearchCV.score of GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['rbf']},
                         {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']}])>