In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV, train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [13]:
metadata = pd.read_csv("processed_metadata.csv")
d1 = pd.read_csv("mfcc_deltamfcc_entire.csv", index_col = 'Unnamed: 0')
d2 = pd.read_csv("mfcc_deltamfcc_stat.csv", index_col = 'Unnamed: 0')

In [14]:
np.shape(d1)

(7005, 2444)

In [15]:
np.shape(d2)

(7005, 130)

In [16]:
metadata.head()

Unnamed: 0,path,segment,mod,label,samplingrate,ms
0,./Labelled/Bleatings/evt_000_000_000681_210415...,0,0,Bleatings,16000,3000.0
1,./Labelled/Bleatings/evt_000_000_000681_210415...,1,0,Bleatings,16000,3000.0
2,./Labelled/Bleatings/evt_000_000_000681_210415...,2,0,Bleatings,16000,3000.0
3,./Labelled/Bleatings/evt_000_000_000681_210415...,3,0,Bleatings,16000,3000.0
4,./Labelled/Bleatings/evt_000_000_000682_210415...,0,0,Bleatings,16000,3000.0


In [17]:
# Transforming non numerical labels into numerical labels
labels = metadata["label"]
encoder = LabelEncoder()

# encoding train labels
y = encoder.fit_transform(labels)

In [18]:
# scale the features
scaler = StandardScaler()
x1 = scaler.fit_transform(d1)
x2 = scaler.fit_transform(d2)

In [19]:
# get train and test data
X1_train, X1_test, y1_train, y1_test = train_test_split(x1, y, test_size=0.2)
X2_train, X2_test, y2_train, y2_test = train_test_split(x2, y, test_size=0.2)

In [20]:
m1 = SVC()
m2 = SVC()
m1.fit(X1_train, y1_train)
m2.fit(X2_train, y2_train)

SVC()

In [21]:
Y_pred1 = m1.predict(X1_test)
Y_pred2 = m2.predict(X2_test)

In [22]:
Y_pred1

array([0, 2, 0, ..., 2, 2, 1])

In [23]:
list(encoder.inverse_transform(Y_pred1))

['Bleatings',
 'Others',
 'Bleatings',
 'Others',
 'Others',
 'Bleatings',
 'ContactCalls',
 'Bleatings',
 'Others',
 'Others',
 'Others',
 'ContactCalls',
 'Bleatings',
 'ContactCalls',
 'Others',
 'Bleatings',
 'ContactCalls',
 'Bleatings',
 'Others',
 'ContactCalls',
 'Bleatings',
 'Others',
 'ContactCalls',
 'Others',
 'Bleatings',
 'Bleatings',
 'Others',
 'Bleatings',
 'Bleatings',
 'ContactCalls',
 'Bleatings',
 'Bleatings',
 'Others',
 'Others',
 'ContactCalls',
 'ContactCalls',
 'Others',
 'Bleatings',
 'Others',
 'Others',
 'Others',
 'Others',
 'Bleatings',
 'ContactCalls',
 'ContactCalls',
 'Others',
 'Bleatings',
 'ContactCalls',
 'ContactCalls',
 'Others',
 'Others',
 'ContactCalls',
 'ContactCalls',
 'ContactCalls',
 'ContactCalls',
 'ContactCalls',
 'Others',
 'ContactCalls',
 'Bleatings',
 'Others',
 'ContactCalls',
 'Others',
 'Bleatings',
 'ContactCalls',
 'Others',
 'Others',
 'Others',
 'ContactCalls',
 'Others',
 'ContactCalls',
 'Bleatings',
 'ContactCalls',
 'Ot

# Base classifier

In [26]:
print(confusion_matrix(y1_test,Y_pred1))
print("\n")
print(classification_report(y1_test,Y_pred1))

print("Training set score for SVM: %f" % m1.score(X1_train , y1_train))
print("Testing  set score for SVM: %f" % m1.score(X1_test  , y1_test ))

m1.score

[[332 138  11]
 [ 71 344   6]
 [ 11   6 482]]


              precision    recall  f1-score   support

           0       0.80      0.69      0.74       481
           1       0.70      0.82      0.76       421
           2       0.97      0.97      0.97       499

    accuracy                           0.83      1401
   macro avg       0.82      0.82      0.82      1401
weighted avg       0.83      0.83      0.83      1401

Training set score for SVM: 0.956817
Testing  set score for SVM: 0.826552


<bound method ClassifierMixin.score of SVC()>

In [27]:
m1.score

<bound method ClassifierMixin.score of SVC()>

In [25]:
print(confusion_matrix(y2_test,Y_pred2))
print("\n")
print(classification_report(y2_test,Y_pred2))

print("Training set score for SVM: %f" % m2.score(X2_train , y2_train))
print("Testing  set score for SVM: %f" % m2.score(X2_test  , y2_test ))

m2.score

[[386  81  12]
 [ 77 361   6]
 [  5   0 473]]


              precision    recall  f1-score   support

           0       0.82      0.81      0.82       479
           1       0.82      0.81      0.81       444
           2       0.96      0.99      0.98       478

    accuracy                           0.87      1401
   macro avg       0.87      0.87      0.87      1401
weighted avg       0.87      0.87      0.87      1401

Training set score for SVM: 0.918630
Testing  set score for SVM: 0.870807


<bound method ClassifierMixin.score of SVC()>

# Grid Search

In [24]:
params_grid = [{'kernel': ['rbf'],'C': [0.1, 1, 10, 100, 1000]},
               {'kernel': ['linear'], 'C': [0.1, 1, 10, 100, 1000]}]

In [26]:
svm_model = GridSearchCV(SVC(), params_grid, cv=5)
svm_model.fit(X1_train, y1_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['rbf']},
                         {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']}])

In [27]:
# View the accuracy score
print('Best score for training data:', svm_model.best_score_,"\n") 

# View the best parameters for the model found using grid search
print('Best C:',svm_model.best_estimator_.C,"\n") 
print('Best Kernel:',svm_model.best_estimator_.kernel,"\n")
print('Best Gamma:',svm_model.best_estimator_.gamma,"\n")

final_model = svm_model.best_estimator_
Y_pred = final_model.predict(X1_test)
Y_pred_label = list(encoder.inverse_transform(Y_pred))

Best score for training data: 0.9939743433022767 

Best C: 1 

Best Kernel: linear 

Best Gamma: scale 



In [29]:
print(confusion_matrix(y1_test,Y_pred))
print("\n")
print(classification_report(y1_test,Y_pred))

print("Training set score for SVM: %f" % final_model.score(X1_train, y1_train))
print("Testing  set score for SVM: %f" % final_model.score(X1_test, y1_test ))

svm_model.score

[[546  10   0]
 [  8 193   0]
 [  0   0 571]]


              precision    recall  f1-score   support

           0       0.99      0.98      0.98       556
           1       0.95      0.96      0.96       201
           2       1.00      1.00      1.00       571

    accuracy                           0.99      1328
   macro avg       0.98      0.98      0.98      1328
weighted avg       0.99      0.99      0.99      1328

Training set score for SVM: 0.999059
Testing  set score for SVM: 0.986446


<bound method BaseSearchCV.score of GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['rbf']},
                         {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']}])>

In [31]:
svm_model2 = GridSearchCV(SVC(), params_grid, cv=5)
svm_model2.fit(X2_train, y2_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['rbf']},
                         {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']}])

In [34]:
# View the accuracy score
print('Best score for training data:', svm_model2.best_score_,"\n") 

# View the best parameters for the model found using grid search
print('Best C:',svm_model2.best_estimator_.C,"\n") 
print('Best Kernel:',svm_model2.best_estimator_.kernel,"\n")
print('Best Gamma:',svm_model2.best_estimator_.gamma,"\n")

final_model = svm_model2.best_estimator_
Y_pred = final_model.predict(X2_test)
Y_pred_label = list(encoder.inverse_transform(Y_pred))

Best score for training data: 0.9081158218664797 

Best C: 1 

Best Kernel: linear 

Best Gamma: scale 



In [36]:
print(confusion_matrix(y2_test,Y_pred))
print("\n")
print(classification_report(y2_test,Y_pred))

print("Training set score for SVM: %f" % final_model.score(X2_train, y2_train))
print("Testing  set score for SVM: %f" % final_model.score(X2_test, y2_test ))

svm_model.score

[[515  41   5]
 [ 62 147   8]
 [  0   2 548]]


              precision    recall  f1-score   support

           0       0.89      0.92      0.91       561
           1       0.77      0.68      0.72       217
           2       0.98      1.00      0.99       550

    accuracy                           0.91      1328
   macro avg       0.88      0.86      0.87      1328
weighted avg       0.91      0.91      0.91      1328

Training set score for SVM: 1.000000
Testing  set score for SVM: 0.911145


<bound method BaseSearchCV.score of GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.1, 1, 10, 100, 1000], 'kernel': ['rbf']},
                         {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']}])>