In [None]:
## Importing libraries

!pip install librosa
!pip install tqdm
!pip install plotly
import os
import random
import sys
import glob
import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
import warnings
from sklearn.model_selection import StratifiedShuffleSplit
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
from tqdm import tqdm
import plotly.offline as py

warnings.simplefilter("ignore")
pd.set_option('display.max_columns', None)
py.init_notebook_mode(connected=True)



In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp '/content/drive/MyDrive/ravdess.zip' .
!unzip  -q ravdess.zip

In [None]:
import os
import glob

def metadata(basepath):
    df = pd.DataFrame(columns=['path', 'source', 'actor', 'gender', 'intensity', 'statement', 'repetition', 'emotion'])
    count = 0

    actor_folders = glob.glob(basepath)
    for actor_folder in actor_folders:
     if os.path.isdir(actor_folder):
        try:
            actor = int(os.path.basename(actor_folder).split('_')[-1])
        except ValueError:
            print("Invalid actor folder:", actor_folder)
            continue

        actor_files = glob.glob(actor_folder + '/*.wav')
        for file_path in actor_files:
            print("Actor folder:", actor_folder)  # Print the actor_folder for troubleshooting
            actor = int(os.path.basename(actor_folder).split('_')[-1])  # Extract the actor number correctly
            filename = os.path.basename(file_path).split('.')[0].split('-')
            if len(filename) == 7:
                src = int(filename[1])
                emotion = int(filename[2])

                gender = "female" if actor % 2 == 0 else "male"
                intensity = 0 if filename[3] == '01' else 1
                statement = 0 if filename[4] == '01' else 1
                repeat = 0 if filename[5] == '01' else 1

                df.loc[count] = [file_path, src, actor, gender, intensity, statement, repeat, emotion]
                count += 1


    labels = []
    y = []
    for i in range(len(df)):
        if df.emotion.iloc[i] == 1:
            label = "_neutral"
            y.append(1)
        elif df.emotion.iloc[i] == 2:
            label = "_calm"
            y.append(2)
        elif df.emotion.iloc[i] == 3:
            label = "_happy"
            y.append(3)
        elif df.emotion.iloc[i] == 4:
            label = "_sad"
            y.append(4)
        elif df.emotion.iloc[i] == 5:
            label = "_angry"
            y.append(5)
        elif df.emotion.iloc[i] == 6:
            label = "_fearful"
            y.append(6)
        elif df.emotion.iloc[i] == 7:
            label = "_disgust"
            y.append(7)
        elif df.emotion.iloc[i] == 8:
            label = "_surprised"
            y.append(8)
        else:
            label = "_none"

        # Add gender to the label
        labels.append(label)

    df['label'] = labels

    return df

basepath = '/content/Actor_*'
df = metadata(basepath)


In [None]:
y = df.emotion.values.tolist()


In [None]:
class Spectrograms():
    def __init__(self, df, datasettype, outputpath, sample=False, augmentation=False, mel=True, mfcc=False, spectral=False, mfccbanks=20, n_mels=128):
        self.df = df
        self.augmentation = augmentation
        self.mel = mel
        self.mfcc = mfcc
        self.spectral = spectral
        self.mfccbanks = mfccbanks
        self.n_mels = n_mels
        self.outputpath = outputpath
        self.datasettype = datasettype
        self.sample = sample

    def get_spectrograms(self):
        if self.sample:
            x, sample_rate = librosa.load(self.df.index[0])
            self.generate(x, sample_rate, '', 0)

        else:
            for file in tqdm(range(self.df.shape[0])):
                emotion = df.iloc[file, df.columns.get_loc('label')]
                path = self.outputpath+self.datasettype+"/"+str(emotion)+"/"

                if not os.path.isdir(path):
                    os.makedirs(path)

                ## Reading signal from .wav file
                x, sample_rate = librosa.load(self.df.index[file])
                emo = df.iloc[file, df.columns.get_loc('emotion')]
                self.generate(x, sample_rate, path, file, emo)


    def generate(self, x, sample_rate, path, count, emo):
        if self.mel:
            mel_features = librosa.feature.melspectrogram(y=x, sr=sample_rate, n_mels=self.n_mels)

            log_mel_features = librosa.power_to_db(mel_features, ref=np.max)
            fig = plt.figure(figsize=(12,4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(log_mel_features, sr=sample_rate, x_axis='time', y_axis='mel')
            if self.sample:
                plt.show()
            else:
                plt.savefig(path+str(emo)+"-"+str(count)+".jpg")

                plt.close()

        if self.mfcc:
            mfcc_features = librosa.feature.mfcc(x, sr=sample_rate, n_mfcc=self.mfccbanks)
            fig = plt.figure(figsize=(12,4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(mfcc_features, sr=sample_rate, x_axis='time', y_axis='mel')
            if self.sample:
                plt.show()
            else:
                plt.savefig(path+"mfccspectrogram_"+str(count)+".jpg")
                plt.close()

        if self.spectral:
            spectral_features = librosa.feature.spectral_contrast(x, sr=sample_rate)
            fig = plt.figure(figsize=(12,4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(spectral_features, sr=sample_rate, x_axis='time', y_axis='mel')
            if self.sample:
                plt.show()
            else:
                plt.savefig(path+"spectralspectrogram_"+str(count)+".jpg")
                plt.close()

In [None]:
df_new = metadata('/content/Actor_*')


In [None]:
df_new.index = df_new.path
df_new = df_new.drop("path", axis=1)

In [None]:
spectrograms = Spectrograms(df_new, 'images_new', '/content/spectrograms', sample=False)
spectrograms.get_spectrograms()

In [None]:
!pip install opencv-python
import os
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
categories = os.listdir("/content/spectrogramsimages_new")
len(categories)

In [None]:
def load_images_and_labels(categories):
    img_lst=[]
    labels=[]
    for index, category in enumerate(categories):
        for image_name in os.listdir(fpath+"/"+category):
            img = cv2.imread(fpath+"/"+category+"/"+image_name)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            img_array = Image.fromarray(img, 'RGB')

            #resize image to 224 x 224 because the input image resolution for GoogleNet is 224 x 224
            resized_img = img_array.resize((224, 224))

            img_lst.append(np.array(resized_img))

            labels.append(index)
    return img_lst, labels

fpath = "/content/spectrogramsimages_new"
images, labels = load_images_and_labels(categories)
print("No. of images loaded = ",len(images),"\nNo. of labels loaded = ",len(labels))
print(type(images),type(labels))


In [None]:
images = np.array(images)
labels = np.array(labels)

print("Images shape = ",images.shape,"\nLabels shape = ",labels.shape)
print(type(images),type(labels))

In [None]:
#1-step in data shuffling
random_seed=42
#get equally spaced numbers in a given range
n = np.arange(images.shape[0])
print("'n' values before shuffling = ",n)

#shuffle all the equally spaced values in list 'n'
np.random.seed(random_seed)
np.random.shuffle(n)
print("\n'n' values after shuffling = ",n)

In [None]:
#2-step in data shuffling

#shuffle images and corresponding labels data in both the lists
images = images[n]
labels = labels[n]

print("Images shape after shuffling = ",images.shape,"\nLabels shape after shuffling = ",labels.shape)

In [None]:
images = images.astype(np.float32)
labels = labels.astype(np.int32)
images = images/255
print("Images shape after normalization = ",images.shape)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size = 0.2, random_state = random_seed)

print("x_train shape = ",x_train.shape)
print("y_train shape = ",y_train.shape)
print("\nx_test shape = ",x_test.shape)
print("y_test shape = ",y_test.shape)

In [None]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization


In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Dropout, Dense, Flatten, BatchNormalization, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input

def InceptionModule(layer, filters):
    branch1 = Conv2D(filters[0], (1, 1), padding='same', activation='relu')(layer)

    branch2 = Conv2D(filters[1], (1, 1), padding='same', activation='relu')(layer)
    branch2 = Conv2D(filters[2], (3, 3), padding='same', activation='relu')(branch2)

    branch3 = Conv2D(filters[3], (1, 1), padding='same', activation='relu')(layer)
    branch3 = Conv2D(filters[4], (5, 5), padding='same', activation='relu')(branch3)

    branch4 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same')(layer)
    branch4 = Conv2D(filters[5], (1, 1), padding='same', activation='relu')(branch4)

    output = concatenate([branch1, branch2, branch3, branch4], axis=-1)
    return output

input_layer = Input(shape=(224, 224, 3))

# Stage 1
x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', activation='relu')(input_layer)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
x = BatchNormalization()(x)

# Stage 2
x = Conv2D(64, (1, 1), activation='relu')(x)
x = Conv2D(192, (3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

# Inception modules (3a, 3b, 3c)
x = InceptionModule(x, [64, 96, 128, 16, 32, 32])
x = InceptionModule(x, [128, 128, 192, 32, 96, 64])
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

x = InceptionModule(x, [192, 96, 208, 16, 48, 64])
x = InceptionModule(x, [160, 112, 224, 24, 64, 64])
x = InceptionModule(x, [128, 128, 256, 24, 64, 64])
x = InceptionModule(x, [112, 144, 288, 32, 64, 64])
x = InceptionModule(x, [256, 160, 320, 32, 128, 128])
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

x = InceptionModule(x, [256, 160, 320, 32, 128, 128])
x = InceptionModule(x, [384, 192, 384, 48, 128, 128])
x = AveragePooling2D(pool_size=(7, 7), strides=(1, 1), padding='valid')(x)

x = Flatten()(x)
x = Dense(1000, activation='relu')(x)
x = Dropout(0.4)(x)
output = Dense(20, activation='softmax')(x)

model = Model(inputs=input_layer, outputs=output)
model.summary()


In [None]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [None]:
%%time
model.fit(x_train, y_train, epochs=100)

# New Section

In [None]:
predictions = model.predict(x_test)
predicted_classes = np.argmax(predictions, axis=1)

In [None]:
predictions
y_test
new_Ytest = y_test.astype(int)
new_Ytest

In [None]:
from sklearn.metrics import classification_report

report = classification_report(new_Ytest, predicted_classes)
print(report)


In [None]:
from sklearn.metrics import confusion_matrix

new_Ytest = new_Ytest.astype(int)
matrix = confusion_matrix(new_Ytest, predicted_classes)
print(matrix)

In [None]:
# pred = model.predict(x_test)
# plt.figure(1 , figsize = (19 , 10))
# n = 0

# for i in range(9):
#     n += 1
#     r = np.random.randint( 0, x_test.shape[0], 1)

#     plt.subplot(3, 3, n)
#     plt.subplots_adjust(hspace = 0.3, wspace = 0.3)

#     plt.imshow(x_test[r[0]])
#     plt.title('Actual = {}, Predicted = {}'.format(y_test[r[0]] , y_test[r[0]]*pred[r[0]][y_test[r[0]]]) )
#     plt.xticks([]) , plt.yticks([])

# plt.show()

In [None]:
# from keras.layers import Dense, Flatten, Reshape, Input, InputLayer
# from keras.models import Sequential, Model

# def build_autoencoder(img_shape, code_size):
#     # The encoder
#     encoder = Sequential()
#     encoder.add(InputLayer(img_shape))
#     encoder.add(Flatten())
#     encoder.add(Dense(code_size))

#     # The decoder
#     decoder = Sequential()
#     decoder.add(InputLayer((code_size,)))
#     decoder.add(Dense(np.prod(img_shape))) # np.prod(img_shape) is the same as 32*32*3, it's more generic than saying 3072
#     decoder.add(Reshape(img_shape))

#     return encoder, decoder

In [None]:
# IMG_SHAPE = images.shape[1:]
# encoder, decoder = build_autoencoder(IMG_SHAPE, 32)

# inp = Input(IMG_SHAPE)
# code = encoder(inp)
# reconstruction = decoder(code)

# autoencoder = Model(inp,reconstruction)
# autoencoder.compile(optimizer='adamax', loss='mse')

# print(autoencoder.summary())

In [None]:
# history = autoencoder.fit(x=x_train, y=x_train, epochs=20,
#                 validation_data=[x_test, x_test])

In [None]:
# decoded_imgs = autoencoder.predict(images)

# n = 1
# plt.figure(figsize=(20, 4))
# for i in range(n):
#     # display original
#     ax = plt.subplot(2, n, i+1)
#     plt.imshow(x_test[i].reshape(224, 224,3))
#     plt.gray()
#     ax.get_xaxis().set_visible(False)
#     ax.get_yaxis().set_visible(False)

#     # display reconstruction
#     ax = plt.subplot(2, n, i + n+1)
#     plt.imshow(decoded_imgs[i].reshape(224, 224,3))
#     plt.gray()
#     ax.get_xaxis().set_visible(False)
#     ax.get_yaxis().set_visible(False)
# plt.show()

# print("Accuracy=",1-np.mean(abs(images-decoded_imgs)),'\n')

In [None]:
# decoded_imgs = autoencoder.predict(images)
# n = 1
# plt.figure(figsize=(20, 4))
# for i in range(n):
#     # display original
#     ax = plt.subplot(2, n, i+1)
#     plt.imshow(images[i].reshape(224, 224,3))
#     plt.gray()
#     ax.get_xaxis().set_visible(False)
#     ax.get_yaxis().set_visible(False)

#     # display reconstruction
#     ax = plt.subplot(2, n, i + n+1)
#     plt.imshow(decoded_imgs[i].reshape(224, 224,3))
#     plt.gray()
#     ax.get_xaxis().set_visible(False)
#     ax.get_yaxis().set_visible(False)
# plt.show()

# print("Accuracy=",1-np.mean(abs(images-decoded_imgs)),'\n')

In [None]:
# X_train, X_test, Y_train, Y_test = train_test_split(decoded_imgs, labels, test_size = 0.2, random_state = random_seed)

# print("x_train shape = ",X_train.shape)
# print("y_train shape = ",Y_train.shape)
# print("\nx_test shape = ",X_test.shape)
# print("y_test shape = ",Y_test.shape)

In [None]:
# %%time
# model.fit(X_train, Y_train, epochs=100, batch_size=32)

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
predictions_new = np.argmax(model.predict(x_test), axis=-1)
predictions_new
y_test
new_Ytest_new = y_test.astype(int)
new_Ytest_new

In [None]:
from sklearn.metrics import classification_report
report = classification_report(new_Ytest_new, predictions_new)
print(report)

In [None]:
from sklearn.metrics import classification_report, matthews_corrcoef

report = classification_report(new_Ytest_new, predictions_new)
print(report)

# Calculate MCC for each emotion
emotions = [0, 1, 2, 3, 4, 5, 6, 7]  # Replace with your actual emotion labels
mcc_scores = {}
for emotion in emotions:
    emotion_indices = (new_Ytest_new == emotion)
    emotion_predictions = predictions_new[emotion_indices]
    emotion_labels = new_Ytest_new[emotion_indices]
    mcc_scores[emotion] = matthews_corrcoef(emotion_labels, emotion_predictions)

# Print MCC scores for each emotion
for emotion, mcc in mcc_scores.items():
    print(f"MCC for emotion {emotion}: {mcc}")



In [None]:
import matplotlib.pyplot as plt

# Train the model and obtain the history object
history = model.fit(x_train, y_train, epochs=50, validation_data=(x_test, y_test))

# Plot the accuracy curve
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot the loss curve
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()
