In [1]:
#Importing Libraries
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import os
import librosa
import random

In [2]:
# Define function to extract audio features
def extract_features(audio_data):
    # Extract Mel-frequency cepstral coefficients (MFCCs)
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=40)
    # Calculate the mean and standard deviation of each MFCC coefficient
    mfccs_mean = np.mean(mfccs.T, axis=0)
    mfccs_std = np.std(mfccs.T, axis=0)
    return np.concatenate((mfccs_mean, mfccs_std), axis=0)

In [3]:
# Define paths to audio files
data_dir = 'data'
classes = os.listdir(data_dir)

In [4]:
# Load audio files and their corresponding labels
X = []
y = []
label_to_index = {}
index_to_label = {}
for i, label in enumerate(classes):
    label_to_index[label] = i
    index_to_label[i] = label
    class_path = os.path.join(data_dir, label)
    class_files = os.listdir(class_path)
    for file_name in class_files:
        file_path = os.path.join(class_path, file_name)
        audio_data, sr = librosa.load(file_path, res_type='kaiser_fast')
        features = extract_features(audio_data)
        X.append(features)
        y.append(i)

In [5]:
# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

In [6]:
# Shuffle data
indices = np.arange(X.shape[0])
np.random.shuffle(indices)
X = X[indices]
y = y[indices]

In [7]:
# Split data into training and testing sets
split_index = int(len(X) * 0.8) # 80% training, 20% testing
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [8]:
# Reshape data for CNN input
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

In [9]:
# Define CNN architecture
model = tf.keras.Sequential([
    layers.Conv1D(32, 3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
    layers.MaxPooling1D(2),
    layers.Conv1D(64, 3, activation='relu'),
    layers.MaxPooling1D(2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(classes), activation='softmax')
])

In [10]:
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
# Convert labels to one-hot encoding
y_train_one_hot = tf.keras.utils.to_categorical(y_train, num_classes=len(classes))
y_test_one_hot = tf.keras.utils.to_categorical(y_test, num_classes=len(classes))

In [13]:
# Train model
model.fit(X_train, y_train_one_hot, batch_size=32, epochs=15, validation_data=(X_test, y_test_one_hot))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x228ea6ae670>

In [14]:
# Evaluate model
score = model.evaluate(X_test, y_test_one_hot, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.01797403022646904
Test accuracy: 0.9927536249160767


In [15]:
# Save model to working directory
model.save('emotion_audio_classifier.h5')

In [29]:
# Load custom audio file
file_path = 'test_data/neutral/OAF_choice_neutral.wav'
audio_data, sr = librosa.load(file_path, res_type='kaiser_fast')

In [30]:
# Extract features from audio file
features = extract_features(audio_data)

In [31]:
# Reshape data for CNN input
features = np.expand_dims(features, axis=0)
features = np.expand_dims(features, axis=2)

In [32]:
# Use the trained model to make a prediction
prediction = model.predict(features)



In [33]:
# Print the predicted class label
print('Predicted class:', prediction[0])

Predicted class: [1.5699037e-10 5.4681482e-08 2.5704635e-11 5.7200127e-11 1.0000000e+00
 5.7933669e-08 6.6398630e-11]


In [34]:
label_to_class = {
    0: "Angry",
    1: "Disgust",
    2: "Fear",
    3: "Happy",
    4: "Neutral",
    5: "Pleasant_Surprised",
    6: "Sad"
}

In [35]:
predicted_class = np.argmax(prediction)

In [36]:
predicted_class_name = label_to_class[predicted_class]

In [37]:
# Print the predicted class label
print("Predicted class label:", predicted_class_name)

Predicted class label: Neutral
