In [20]:
import soundfile # to read audio file
import numpy as np #multi-dimensional arrays
import librosa # to extract speech features
import glob #to retrieve files/pathnames
import os #provides functions for creating and removing a directory(folder)
import pickle # to save model after training
from sklearn.model_selection import train_test_split # for splitting training and testing
from sklearn.metrics import accuracy_score # to measure how good we are

In [21]:
from sklearn.tree import DecisionTreeClassifier #Decision Tree Classifier
from sklearn.neighbors import KNeighborsClassifier #K Nearest Neighbor Classifier
import lightgbm as lgb #LGBM Classifier
from sklearn.linear_model import LogisticRegression #Logistic Regression
from sklearn.neural_network import MLPClassifier #Multi Layer Perceptron Classifier
from sklearn.naive_bayes import GaussianNB #Gaussian Naive Bayes
from sklearn.ensemble import RandomForestClassifier #Random Forest Classifier
from sklearn.linear_model import SGDClassifier #Stochastic Gradient Descent Classifier
from sklearn.svm import SVC #Support Vector Classifier
import xgboost as xgb #eXtreme Gradient Boosting

In [22]:
#extract features from soundfile
def extract_feature(file_name, **kwargs):
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    with soundfile.SoundFile(file_name) as sound_file:
        A = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(A))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=A, sr=sample_rate, n_mfcc=40).T, axis=0)
        result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(A, sr=sample_rate).T,axis=0)
        result=np.hstack((result, mel))
        if contrast:
            contrast=np.mean(librosa.feature.spectral_contrast(S=stft, sr=srate).T,axis=0)
            result=np.hstack((result, contrast))
        if tonnetz:
            tonnetz=np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(A), sr=srate).T,axis=0)
            result=np.hstack((result, tonnetz))
    return result

In [36]:
#Emotions available in TESS Dataset
emotions={
  'neutral':'NEUTRAL',
  'happy':'HAPPY',
  'sad':'SAD',
  'angry':'ANGRY',
  'fear':'FEARFUL',
  'disgust':'DISGUST',
  'ps':'SURPRISE'
}
#Emotions we want to observe
observed_emotions=['NEUTRAL', 'HAPPY', 'FEARFUL', 'DISGUST']

In [37]:
#load the data
def load_data(test_size=0.25):
    a,b=[],[]
    for file in glob.glob("E:\Kaggle\Toronto emotion speech set\TESS\Actor_*\*.wav"):
        file_name=os.path.basename(file)
        filename=file_name.split("_")[2]
        emotion=emotions[filename.split(".wav")[0]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        a.append(feature)
        b.append(emotion)
    return train_test_split(np.array(a), b, test_size=test_size, random_state=9)

In [38]:
#Split the dataset
a_train,a_test,b_train,b_test=load_data(test_size=0.25)

In [39]:
#Get the shape(number of elements) of the training and testing datasets
print((a_train.shape[0], a_test.shape[0]))
#Get the number of features extracted
print(f'Features extracted: {a_train.shape[1]}')

(1200, 400)
Features extracted: 180


In [40]:
#MLPClassifier
model_params = {
    'alpha': 0.01,
    'batch_size': 200,
    'epsilon': 1e-08, 
    'hidden_layer_sizes': (600,), 
    'learning_rate': 'adaptive', 
    'max_iter': 500, 
}
model = MLPClassifier(**model_params)
#train the model
model.fit(a_train, b_train)
#predict values for test set
b_pred=model.predict(a_test)
#Calculate Accuracy
accuracy = accuracy_score(y_true=b_test, y_pred=b_pred)
print("Accuracy using MLPClassifier: {:.2f}%".format(accuracy*100))

Accuracy using MLPClassifier: 100.00%


In [9]:
#LGBMClassifier
lgb_params = {'num_leaves': 22, 
              'max_depth': 37, 
              'n_estimators': 12310, 
              'subsample_for_bin': 491645, 
              'min_data_in_leaf': 27, 
              'reg_alpha': 1.744123586157066, 
              'colsample_bytree': 0.6495503686746514, 
              'learning_rate': 0.8581745963346554, 
              'boosting_type': 'dart'}
model=lgb.LGBMClassifier(**lgb_params)
#training
model.fit(a_train, b_train)
#predicting
b_pred=model.predict(a_test)
#accuracy
accuracy = accuracy_score(y_true=b_test, y_pred=b_pred)
print("Accuracy using LGBMClassifier: {:.2f}%".format(accuracy*100))

Accuracy using LGBMClassifier: 99.75%


In [10]:
#Random Forest Classifier
rf=RandomForestClassifier(criterion = 'entropy', 
             max_depth = 15, 
             n_estimators = 22984, 
             min_samples_leaf = 3, 
             min_samples_split = 9, 
             max_leaf_nodes = 239, 
             random_state = 22)
rf.fit(a_train, b_train)
b_pred=rf.predict(a_test)
accuracy = accuracy_score(y_true=b_test, y_pred=b_pred)
print("Accuracy using Random Forest: {:.2f}%".format(accuracy*100))

Accuracy using Random Forest: 100.00%


In [11]:
#Support Vector Classifier
svclassifier = SVC(kernel = 'linear')
svclassifier.fit(a_train, b_train)
y_preds = svclassifier.predict(a_test)
accuracy = accuracy_score(y_true=b_test, y_pred=y_preds)
print("Accuracy using SVClassifier: {:.2f}%".format(accuracy*100))

Accuracy using SVClassifier: 100.00%


In [12]:
#LogisticRegression
lr=LogisticRegression(multi_class='multinomial',
             class_weight = None, 
             solver = 'saga', 
             max_iter = 10000)
lr.fit(a_train,b_train)
b_pred=lr.predict(a_test)
accuracy = accuracy_score(y_true=b_test, y_pred=b_pred)
print("Accuracy using Logistic Regression: {:.2f}%".format(accuracy*100))

Accuracy using Logistic Regression: 100.00%


In [13]:
#K Nearest Neighbors
knn = KNeighborsClassifier(weights='distance', n_neighbors=32)
knn.fit(a_train, b_train)
b_pred=knn.predict(a_test)
accuracy = accuracy_score(y_true=b_test, y_pred=b_pred)
print("Accuracy using K Nearest Neighbor: {:.2f}%".format(accuracy*100))

Accuracy using K Nearest Neighbor: 99.00%


In [19]:
#XGradientBoost
xgb_params = {'booster': 'gbtree', 
              'lambda': 7.201651687969849e-08, 
              'alpha': 2.2495125443474775e-05, 
              'max_depth': 7, 
              'eta': 9.307925211476325e-06, 
              'gamma': 1.7948741419263195e-05, 
              'grow_policy': 'lossguide'}
model=xgb.XGBClassifier(**xgb_params)
model.fit(a_train, b_train)
b_pred=model.predict(a_test)
accuracy = accuracy_score(y_true=b_test, y_pred=b_pred)
print("Accuracy using XGBClassifier: {:.2f}%".format(accuracy*100))

import warnings
def fxn():
    warnings.warn("deprecated", DeprecationWarning)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()

Accuracy using XGBClassifier: 97.50%


In [15]:
#Decision Tree
dtree = DecisionTreeClassifier(criterion = 'entropy', 
             max_depth = 35, 
             min_samples_leaf = 4, 
             min_samples_split = 23, 
             max_leaf_nodes = 169)
dtree.fit(a_train, b_train)
predictions = dtree.predict(a_test)
accuracy = accuracy_score(y_true=b_test, y_pred=predictions)
print("Accuracy using Decision Tree Classifier: {:.2f}%".format(accuracy*100))

Accuracy using Decision Tree Classifier: 95.50%


In [16]:
#Naive Baye's
nb=GaussianNB()
nb.fit(a_train, b_train)
b_pred=nb.predict(a_test)
accuracy = accuracy_score(y_true=b_test, y_pred=b_pred)
print("Accuracy using Naive Bayes: {:.2f}%".format(accuracy*100))

Accuracy using Naive Bayes: 86.25%


In [17]:
#Stochastic Gradient Descent
sgd=SGDClassifier(loss='modified_huber', shuffle=True, random_state=101)
sgd.fit(a_train, b_train)
b_pred=sgd.predict(a_test)
accuracy = accuracy_score(y_true=b_test, y_pred=b_pred)
print("Accuracy using Stochastic Gradient Descent: {:.2f}%".format(accuracy*100))

Accuracy using Stochastic Gradient Descent: 100.00%
