In [None]:
!pip install soundfile



Importing necessary Libraries

In [None]:
import soundfile
import numpy as np
import librosa
import glob
import os
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


int2emotion = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}


AVAILABLE_EMOTIONS = {
    "sad",
    "happy","calm","angry"
}

Feature Extraction


In [None]:
def extract_feature(file_name, **kwargs):
    
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
            result = np.hstack((result, tonnetz))
    return result

Loding Data

In [None]:
def load_data(test_size=0.2):
    X, y = [], []
    
    for file in glob.glob("C:\\Users\\Dell\\Downloads\\ML lab\\speech emotion\\Actor_*\\*.wav"):
          
        basename = os.path.basename(file)
        print(basename)
          
        emotion = int2emotion[basename.split("-")[2]]
          
        if emotion not in AVAILABLE_EMOTIONS:
            continue
          
        feature = extract_feature(file, mfcc=True, chroma=True, mel=True , tonnetz=True, contrast=True)
          
        X.append(feature)
        y.append(emotion)
    
        
    
    print (X , y)
    
    return train_test_split(np.array(X), y, test_size=test_size, random_state=7)

In [None]:
X_train, X_test, y_train, y_test = load_data(test_size=0.25)


print("[+] Number of training samples:", X_train.shape[0])

print("[+] Number of testing samples:", X_test.shape[0])
print("[+] Number of features:", X_train.shape[1])





03-01-01-01-01-01-01.wav
03-01-01-01-01-02-01.wav
03-01-01-01-02-01-01.wav
03-01-01-01-02-02-01.wav
03-01-02-01-01-01-01.wav
03-01-02-01-01-02-01.wav
03-01-02-01-02-01-01.wav
03-01-02-01-02-02-01.wav
03-01-02-02-01-01-01.wav
03-01-02-02-01-02-01.wav
03-01-02-02-02-01-01.wav
03-01-02-02-02-02-01.wav
03-01-03-01-01-01-01.wav
03-01-03-01-01-02-01.wav
03-01-03-01-02-01-01.wav
03-01-03-01-02-02-01.wav
03-01-03-02-01-01-01.wav
03-01-03-02-01-02-01.wav
03-01-03-02-02-01-01.wav
03-01-03-02-02-02-01.wav
03-01-04-01-01-01-01.wav
03-01-04-01-01-02-01.wav
03-01-04-01-02-01-01.wav
03-01-04-01-02-02-01.wav
03-01-04-02-01-01-01.wav
03-01-04-02-01-02-01.wav
03-01-04-02-02-01-01.wav
03-01-04-02-02-02-01.wav
03-01-05-01-01-01-01.wav
03-01-05-01-01-02-01.wav
03-01-05-01-02-01-01.wav
03-01-05-01-02-02-01.wav
03-01-05-02-01-01-01.wav
03-01-05-02-01-02-01.wav
03-01-05-02-02-01-01.wav
03-01-05-02-02-02-01.wav
03-01-06-01-01-01-01.wav
03-01-06-01-01-02-01.wav
03-01-06-01-02-01-01.wav
03-01-06-01-02-02-01.wav


03-01-05-01-01-01-06.wav
03-01-05-01-01-02-06.wav
03-01-05-01-02-01-06.wav
03-01-05-01-02-02-06.wav
03-01-05-02-01-01-06.wav
03-01-05-02-01-02-06.wav
03-01-05-02-02-01-06.wav
03-01-05-02-02-02-06.wav
03-01-06-01-01-01-06.wav
03-01-06-01-01-02-06.wav
03-01-06-01-02-01-06.wav
03-01-06-01-02-02-06.wav
03-01-06-02-01-01-06.wav
03-01-06-02-01-02-06.wav
03-01-06-02-02-01-06.wav
03-01-06-02-02-02-06.wav
03-01-07-01-01-01-06.wav
03-01-07-01-01-02-06.wav
03-01-07-01-02-01-06.wav
03-01-07-01-02-02-06.wav
03-01-07-02-01-01-06.wav
03-01-07-02-01-02-06.wav
03-01-07-02-02-01-06.wav
03-01-07-02-02-02-06.wav
03-01-08-01-01-01-06.wav
03-01-08-01-01-02-06.wav
03-01-08-01-02-01-06.wav
03-01-08-01-02-02-06.wav
03-01-08-02-01-01-06.wav
03-01-08-02-01-02-06.wav
03-01-08-02-02-01-06.wav
03-01-08-02-02-02-06.wav
03-01-01-01-01-01-07.wav
03-01-01-01-01-02-07.wav
03-01-01-01-02-01-07.wav
03-01-01-01-02-02-07.wav
03-01-02-01-01-01-07.wav
03-01-02-01-01-02-07.wav
03-01-02-01-02-01-07.wav
03-01-02-01-02-02-07.wav


03-01-02-01-01-02-12.wav
03-01-02-01-02-01-12.wav
03-01-02-01-02-02-12.wav
03-01-02-02-01-01-12.wav
03-01-02-02-01-02-12.wav
03-01-02-02-02-01-12.wav
03-01-02-02-02-02-12.wav
03-01-03-01-01-01-12.wav
03-01-03-01-01-02-12.wav
03-01-03-01-02-01-12.wav
03-01-03-01-02-02-12.wav
03-01-03-02-01-01-12.wav
03-01-03-02-01-02-12.wav
03-01-03-02-02-01-12.wav
03-01-03-02-02-02-12.wav
03-01-04-01-01-01-12.wav
03-01-04-01-01-02-12.wav
03-01-04-01-02-01-12.wav
03-01-04-01-02-02-12.wav
03-01-04-02-01-01-12.wav
03-01-04-02-01-02-12.wav
03-01-04-02-02-01-12.wav
03-01-04-02-02-02-12.wav
03-01-05-01-01-01-12.wav
03-01-05-01-01-02-12.wav
03-01-05-01-02-01-12.wav
03-01-05-01-02-02-12.wav
03-01-05-02-01-01-12.wav
03-01-05-02-01-02-12.wav
03-01-05-02-02-01-12.wav
03-01-05-02-02-02-12.wav
03-01-06-01-01-01-12.wav
03-01-06-01-01-02-12.wav
03-01-06-01-02-01-12.wav
03-01-06-01-02-02-12.wav
03-01-06-02-01-01-12.wav
03-01-06-02-01-02-12.wav
03-01-06-02-02-01-12.wav
03-01-06-02-02-02-12.wav
03-01-07-01-01-01-12.wav


03-01-05-02-01-02-17.wav
03-01-05-02-02-01-17.wav
03-01-05-02-02-02-17.wav
03-01-06-01-01-01-17.wav
03-01-06-01-01-02-17.wav
03-01-06-01-02-01-17.wav
03-01-06-01-02-02-17.wav
03-01-06-02-01-01-17.wav
03-01-06-02-01-02-17.wav
03-01-06-02-02-01-17.wav
03-01-06-02-02-02-17.wav
03-01-07-01-01-01-17.wav
03-01-07-01-01-02-17.wav
03-01-07-01-02-01-17.wav
03-01-07-01-02-02-17.wav
03-01-07-02-01-01-17.wav
03-01-07-02-01-02-17.wav
03-01-07-02-02-01-17.wav
03-01-07-02-02-02-17.wav
03-01-08-01-01-01-17.wav
03-01-08-01-01-02-17.wav
03-01-08-01-02-01-17.wav
03-01-08-01-02-02-17.wav
03-01-08-02-01-01-17.wav
03-01-08-02-01-02-17.wav
03-01-08-02-02-01-17.wav
03-01-08-02-02-02-17.wav
03-01-01-01-01-01-18.wav
03-01-01-01-01-02-18.wav
03-01-01-01-02-01-18.wav
03-01-01-01-02-02-18.wav
03-01-02-01-01-01-18.wav
03-01-02-01-01-02-18.wav
03-01-02-01-02-01-18.wav
03-01-02-01-02-02-18.wav
03-01-02-02-01-01-18.wav
03-01-02-02-01-02-18.wav
03-01-02-02-02-01-18.wav
03-01-02-02-02-02-18.wav
03-01-03-01-01-01-18.wav


03-01-02-01-01-02-23.wav
03-01-02-01-02-01-23.wav
03-01-02-01-02-02-23.wav
03-01-02-02-01-01-23.wav
03-01-02-02-01-02-23.wav
03-01-02-02-02-01-23.wav
03-01-02-02-02-02-23.wav
03-01-03-01-01-01-23.wav
03-01-03-01-01-02-23.wav
03-01-03-01-02-01-23.wav
03-01-03-01-02-02-23.wav
03-01-03-02-01-01-23.wav
03-01-03-02-01-02-23.wav
03-01-03-02-02-01-23.wav
03-01-03-02-02-02-23.wav
03-01-04-01-01-01-23.wav
03-01-04-01-01-02-23.wav
03-01-04-01-02-01-23.wav
03-01-04-01-02-02-23.wav
03-01-04-02-01-01-23.wav
03-01-04-02-01-02-23.wav
03-01-04-02-02-01-23.wav
03-01-04-02-02-02-23.wav
03-01-05-01-01-01-23.wav
03-01-05-01-01-02-23.wav
03-01-05-01-02-01-23.wav
03-01-05-01-02-02-23.wav
03-01-05-02-01-01-23.wav
03-01-05-02-01-02-23.wav
03-01-05-02-02-01-23.wav
03-01-05-02-02-02-23.wav
03-01-06-01-01-01-23.wav
03-01-06-01-01-02-23.wav
03-01-06-01-02-01-23.wav
03-01-06-01-02-02-23.wav
03-01-06-02-01-01-23.wav
03-01-06-02-01-02-23.wav
03-01-06-02-02-01-23.wav
03-01-06-02-02-02-23.wav
03-01-07-01-01-01-23.wav


Building Decision Tree Model

In [None]:
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix


dtree_model = DecisionTreeClassifier(max_depth = 6).fit(X_train, y_train) 
dtree_predictions = dtree_model.predict(X_test) 

print(accuracy_score(y_true=y_test,y_pred=dtree_predictions))
print(classification_report(y_test,dtree_predictions)) 
 
print(confusion_matrix(y_test, dtree_predictions) )

accuracy=accuracy_score(y_true=y_test, y_pred=dtree_predictions)
print("Accuracy: {:.2f}%".format(accuracy*100))

0.5989583333333334
              precision    recall  f1-score   support

       angry       0.73      0.84      0.78        49
        calm       0.67      0.63      0.65        46
       happy       0.59      0.38      0.47        52
         sad       0.42      0.56      0.48        45

    accuracy                           0.60       192
   macro avg       0.60      0.60      0.59       192
weighted avg       0.61      0.60      0.59       192

[[41  1  6  1]
 [ 1 29  1 15]
 [13  1 20 18]
 [ 1 12  7 25]]
Accuracy: 59.90%


In [None]:
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix


dtree_model = DecisionTreeClassifier(max_depth = 9,random_state=0).fit(X_train, y_train) 
dtree_predictions = dtree_model.predict(X_test) 

print(accuracy_score(y_true=y_test,y_pred=dtree_predictions))
print(classification_report(y_test,dtree_predictions)) 

print(confusion_matrix(y_test, dtree_predictions) )

accuracy=accuracy_score(y_true=y_test, y_pred=dtree_predictions)
print("Accuracy: {:.2f}%".format(accuracy*100))

0.6197916666666666
              precision    recall  f1-score   support

       angry       0.68      0.80      0.74        49
        calm       0.73      0.70      0.71        46
       happy       0.59      0.42      0.49        52
         sad       0.48      0.58      0.53        45

    accuracy                           0.62       192
   macro avg       0.62      0.62      0.62       192
weighted avg       0.62      0.62      0.62       192

[[39  1  8  1]
 [ 2 32  3  9]
 [12  0 22 18]
 [ 4 11  4 26]]
Accuracy: 61.98%


In [None]:
from sklearn.ensemble import RandomForestClassifier
  
 # create regressor object 
classifier = RandomForestClassifier(n_estimators = 90, random_state = 0) 
  
# fit the regressor with x and y data 
classifier.fit(X_train, y_train)   

c_p = classifier.predict(X_test) 



print(accuracy_score(y_true=y_test,y_pred=c_p))
print(classification_report(y_test,c_p)) 
 
print(confusion_matrix(y_test,c_p) )

accuracy=accuracy_score(y_true=y_test, y_pred=c_p)
print("Accuracy: {:.2f}%".format(accuracy*100))

0.75
              precision    recall  f1-score   support

       angry       0.87      0.82      0.84        49
        calm       0.67      0.93      0.78        46
       happy       0.74      0.77      0.75        52
         sad       0.75      0.47      0.58        45

    accuracy                           0.75       192
   macro avg       0.76      0.75      0.74       192
weighted avg       0.76      0.75      0.74       192

[[40  2  6  1]
 [ 0 43  1  2]
 [ 6  2 40  4]
 [ 0 17  7 21]]
Accuracy: 75.00%


In [None]:
from sklearn.svm import SVC 
svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train, y_train) 
svm_predictions = svm_model_linear.predict(X_test) 


print(accuracy_score(y_true=y_test,y_pred=svm_predictions))
print(classification_report(y_test,svm_predictions)) 
# creating a confusion matrix 
print(confusion_matrix(y_test, svm_predictions) )
accuracy=accuracy_score(y_true=y_test, y_pred=svm_predictions)
print("Accuracy: {:.2f}%".format(accuracy*100))

0.6614583333333334
              precision    recall  f1-score   support

       angry       0.68      0.73      0.71        49
        calm       0.75      0.83      0.78        46
       happy       0.61      0.54      0.57        52
         sad       0.60      0.56      0.57        45

    accuracy                           0.66       192
   macro avg       0.66      0.66      0.66       192
weighted avg       0.66      0.66      0.66       192

[[36  0  9  4]
 [ 1 38  2  5]
 [14  2 28  8]
 [ 2 11  7 25]]
Accuracy: 66.15%


In [None]:
from sklearn.svm import SVC 
svm_model_linear = SVC().fit(X_train, y_train) 
svm_predictions = svm_model_linear.predict(X_test) 


print(accuracy_score(y_true=y_test,y_pred=svm_predictions))
print(classification_report(y_test,svm_predictions)) 

print(confusion_matrix(y_test, svm_predictions) )

accuracy=accuracy_score(y_true=y_test, y_pred=svm_predictions)
print("Accuracy: {:.2f}%".format(accuracy*100))

0.4895833333333333
              precision    recall  f1-score   support

       angry       0.57      0.59      0.58        49
        calm       0.52      0.87      0.65        46
       happy       0.35      0.27      0.30        52
         sad       0.46      0.24      0.32        45

    accuracy                           0.49       192
   macro avg       0.47      0.49      0.46       192
weighted avg       0.47      0.49      0.46       192

[[29  2 17  1]
 [ 0 40  3  3]
 [20  9 14  9]
 [ 2 26  6 11]]
Accuracy: 48.96%


In [None]:
filename = "C:\\Users\\Dell\\Downloads\\ML lab\\speech emotion\\audio2.wav"

feature = extract_feature(filename, mfcc=True, chroma=True, mel=True , contrast=True, tonnetz=True)
prediction = dtree_model.predict([feature]) 
print (prediction)
accuracy=accuracy_score(y_true=y_test, y_pred=dtree_predictions)
print("Accuracy: {:.2f}%".format(accuracy*100))


['sad']
Accuracy: 61.98%


In [None]:
filename = "C:\\Users\\Dell\\Downloads\\ML lab\\speech emotion\\audio2.wav"
    
feature = extract_feature(filename, mfcc=True, chroma=True, mel=True , contrast=True, tonnetz=True)
vimal = classifier.predict([feature])
print (vimal)

accuracy=accuracy_score(y_true=y_test, y_pred=c_p)
print("Accuracy: {:.2f}%".format(accuracy*100))



['happy']
Accuracy: 75.00%


In [None]:
filename = "C:\\Users\\Dell\\Downloads\\ML lab\\speech emotion\\audio2.wav"
    
feature = extract_feature(filename, mfcc=True, chroma=True, mel=True , contrast=True, tonnetz=True)
predictions = svm_model_linear.predict([feature])
print (predictions)

accuracy=accuracy_score(y_true=y_test, y_pred=svm_predictions)
print("Accuracy: {:.2f}%".format(accuracy*100))

['angry']
Accuracy: 48.96%
