In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import librosa
import soundfile
import os, glob, pickle
import random
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
        return result

In [None]:
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [None]:
#DataFlair - Load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("/content/drive/MyDrive/Deep Learning/Emotion Detection using Audio/code/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [None]:
x_train,x_test,y_train,y_test=load_data(test_size=0.19)

In [None]:
print((x_train.shape[0], x_test.shape[0]))

(622, 146)


In [None]:
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)


In [None]:
model.fit(x_train,y_train)

In [None]:
y_pred=model.predict(x_test)

In [None]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

In [None]:
result = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(result)
result1 = classification_report(y_test, y_pred)
print("Classification Report:", )
print(result1)

Confusion Matrix:
[[32  9  1  1]
 [ 1 32  1  6]
 [ 2  5 18  5]
 [ 1  6  1 25]]
Classification Report:
              precision    recall  f1-score   support

        calm       0.89      0.74      0.81        43
     disgust       0.62      0.80      0.70        40
     fearful       0.86      0.60      0.71        30
       happy       0.68      0.76      0.71        33

    accuracy                           0.73       146
   macro avg       0.76      0.73      0.73       146
weighted avg       0.76      0.73      0.74       146



In [None]:
print("Accuracy: {:.4f}%".format(accuracy*116))

Accuracy: 85.0137%


In [None]:
# define list of possible emotions
emotionslist = ['happy', 'sad', 'angry', 'neutral', 'calm', 'fearful', 'disgust', 'surprised']

# load test data
test_data = x_test # load your test data here

# create random predictions for each sample in the test data
random_predictions = [random.choice(emotionslist) for _ in range(len(test_data))]

# calculate accuracy by comparing random predictions to true labels
correct_predictions = [1 if pred == true_label else 0 for pred, true_label in zip(random_predictions, y_train)]
accuracy = sum(correct_predictions) / len(correct_predictions)
print("Random baseline accuracy: {:.2f}%".format(accuracy * 100))


Random baseline accuracy: 16.44%
