In [1]:
import librosa
import os
import pandas as pd
import glob
import numpy as np
import soundfile
from sklearn.model_selection import train_test_split
import sys
from sklearn.model_selection import GridSearchCV
from sklearn import svm

In [2]:
import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore",category=DeprecationWarning)

In [3]:
def extract_feature(file_name,mfcc,chroma,mel,zcr,rmse):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    
    return result

In [4]:
#Load the data and extract features for each sound file
def load_data(test_size=0.5):
    x,y=[],[]
    for file in glob.glob("C:\\Users\\ACER\\Documents\\Jupyter Notebook\\TESS Toronto emotional speech set data/*//*"):
        file_name=os.path.basename(file)
        L=file_name.split('_')
        if L[2]=='angry.wav':
          e='angry'
        if L[2]=='disgust.wav':
          e='disgust'
        if L[2]=='fear.wav':
          e='fear'
        if L[2]=='happy.wav':
          e='happy'
        if L[2]=='neutral.wav':
          e='neutral'
        if L[2]=='ps.wav':
          e='pleasant surprised'
        if L[2]=='sad.wav':
          e='sad'
        feature=extract_feature(file,mfcc=True,chroma=True,mel=True,zcr=True,rmse=True)
        x.append(feature)
        y.append(e)
    return train_test_split(np.array(x), y, test_size=test_size)

In [6]:
x_train, x_test, y_train, y_test = load_data(test_size = 0.10)

In [7]:
clf=svm.SVC()
grid={'C' : [1e2, 1e3, 5e3, 1e4, 5e4, 1e5], 'gamma' : [1e-3, 5e-4, 1e-4, 5e-3]}
abc=GridSearchCV(clf,grid)
abc.fit(x_train, y_train)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [100.0, 1000.0, 5000.0, 10000.0, 50000.0,
                               100000.0],
                         'gamma': [0.001, 0.0005, 0.0001, 0.005]})

In [8]:
abc.best_estimator_

SVC(C=100.0, gamma=0.0001)

### Accuracy of our model

In [16]:
from sklearn.metrics import accuracy_score
y_pred=abc.predict(x_test)
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 99.29%


### Classification Report

In [17]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

                    precision    recall  f1-score   support

             angry       1.00      1.00      1.00        35
           disgust       1.00      1.00      1.00        46
              fear       1.00      0.97      0.99        39
             happy       1.00      0.98      0.99        45
           neutral       1.00      1.00      1.00        36
pleasant surprised       0.98      1.00      0.99        46
               sad       0.97      1.00      0.99        33

          accuracy                           0.99       280
         macro avg       0.99      0.99      0.99       280
      weighted avg       0.99      0.99      0.99       280

