In [None]:
from google.colab import drive
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torchaudio
import torch
from transformers import pipeline
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio

# Mount Google Drive
drive.mount('/content/drive')

# Directory path to your dataset
dataset_path = '/content/drive/MyDrive/AudioWAV/'



In [None]:
# Create empty lists for emotions and file paths
file_emotion = []
file_paths = []

# Iterate through all files in the directory
for file_name in os.listdir(dataset_path):
    # Construct the full file path
    file_path_full = os.path.join(dataset_path, file_name)

    # Check if the file is an audio file (assuming WAV format)
    if file_name.lower().endswith('.wav') and os.path.isfile(file_path_full):
        # storing file paths
        file_paths.append(file_path_full)

        # storing file emotions
        part = file_name.split('_')
        if part[2] == 'SAD':
            file_emotion.append('sad')
        elif part[2] == 'ANG':
            file_emotion.append('angry')
        elif part[2] == 'DIS':
            file_emotion.append('disgust')
        elif part[2] == 'FEA':
            file_emotion.append('fear')
        elif part[2] == 'HAP':
            file_emotion.append('happy')
        elif part[2] == 'NEU':
            file_emotion.append('neutral')
        else:
            file_emotion.append('Unknown')

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(file_emotion)

In [None]:
res_type_s = 'kaiser_best'
duration_s = None
sample_rate_s = 22050
offset_s = 0.5

In [None]:
librosa_audio, librosa_sample_rate = librosa.load(file_path_full)

mfccs = librosa.feature.mfcc(y=librosa_audio, sr=librosa_sample_rate, n_mfcc=40)
print(mfccs.shape)

import librosa.display
librosa.display.specshow(mfccs, sr=librosa_sample_rate, x_axis='time')

In [None]:
 #Get audio features

audio_inputs = [librosa.load(file_path_full, sr=16000) for file_path_full in file_paths]

#Get MFFC features
#mfccs = librosa.feature.mfcc(y=X)
#Get MFFCs average features
#mfccs_mean = np.mean(    mfccs,
#                          axis = 0) ################check axis 0/1

In [None]:
X =[]

for audio in audio_inputs:
  array, sr = audio
  X.append(array)


print(X[0])

In [None]:
# Convert audio files to mel spectrograms using librosa
#spectrograms = [librosa.feature.melspectrogram(y=torchaudio.load(file_path_full)[0].numpy().flatten(), sr=16000) for file_path_full in file_paths]
#audio_inputs = [librosa.load(file_path_full, sr=16000) for file_path_full in file_paths]

# Data splitting
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)


print(len(X_train))

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten, Dropout

#Build sequential CNN
CNN_model = Sequential()

#Build first layer
CNN_model.add(Conv1D(16, 5,padding='same',input_shape=(5357, 80080), activation='relu'))

#Build second layer
CNN_model.add(Conv1D(32, 5,padding='same',activation='relu'))

#Build third layer
CNN_model.add(Conv1D(64, 5,padding='same',activation='relu'))

#Build forth layer
CNN_model.add(Conv1D(128, 5,padding='same',activation='relu'))

#Add dropout
CNN_model.add(Dropout(0.1))

#Flatten
CNN_model.add(Flatten())

CNN_model.add(Dense(128, activation ='relu'))
CNN_model.add(Dropout(0.1))
CNN_model.add(Dense(64, activation ='relu'))
CNN_model.add(Dense(8, activation='softmax'))

In [None]:
# Compile the model with the desired loss function, optimizer, and metric to optimize
CNN_model.compile(loss = 'categorical_crossentropy',
                  optimizer = 'Adam',
                  metrics = ['accuracy'])

In [None]:
print(np.array(X_train).shape)

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

X_train = pad_sequences(X_train)
print(X_train.shape)


In [None]:
print(y_train.shape)

In [None]:
#Model fit
cnn_results = CNN_model.fit(X_train, y_train,
              batch_size = 64,
              epochs = 25,
              verbose = 1,
              validation_data = (X_val, y_val))