# NLP Challenge
The dataset is a collection of audio clips with expressions of the following emotions: *angry, fear, happy, neutral, sad*.  
  
Your task is to train a model to perform *speech emotion recognition*.  
  
You will be provided a training set for your use. You are allowed to include additional data to train your model.  
  
The evaluation data will consist of audio clips spoken in multiple languages.  
*Majority* of the evaluation data are in the intonation of *Singapore English*.  
  
Take inspo from [training notebook](https://github.com/AbishekSankar/Audio-Classification-Deep-Learning/blob/main/Demo%20Jupyter%20Notebook/Final_Project.ipynb)  
and [Speech Emotion Recognition with CNN](https://www.kaggle.com/code/ritzing/speech-emotion-recognition-with-cnn/notebook)

## Possible Extra Datasets
https://www.kaggle.com/datasets/dmitrybabko/speech-emotion-recognition-en (specifically Crema)  
https://www.kaggle.com/datasets/piyushagni5/berlin-database-of-emotional-speech-emodb  

In [1]:
import librosa as lb
from librosa.display import specshow
import glob
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

from tensorflow.keras.utils import to_categorical
import tensorflow as tf
import tensorflow_io as tfio
import tensorflow_addons as tfa
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, load_model

import sklearn as sk

%matplotlib inline

## Audio Data Processing

In [2]:
max_ms = 4000

ind_to_label = {
    0 : 'angry',
    1 : 'fear',
    2 : 'happy',
    3 : 'neutral',
    4 : 'sad'
}

label_to_ind = { 
    lab: ind for ind, lab in ind_to_label.items()
    }

### Preprocessing Utils

In [3]:
class aud_util:
    
    @staticmethod
    def loadaud(audio_file_path, sr=None, mono=False):                                 # load audio file, *mono argument (bool) can auto convert to mono, while default sr is converted to 22050*
        return lb.load(audio_file_path, sr=sr, mono=mono)                              # returns (data, sr)       
    
    # @staticmethod
    # def mono_channel_withsr(audio_data_with_sr):
    #     return lb.to_mono(audio_data_with_sr[0]), audio_data_with_sr[1]

    # @staticmethod
    # def resample_withsr(data, in_sr, new_sr=22050):
    #     if in_sr == new_sr:
    #         return data
    #     else:
    #         return lb.resample(data, orig_sr=sr, new_sr=new_sr)
    
    @staticmethod
    def pad_trunc(aud, sr, target_ms):                                                 # padding places shorter audio randomly within the time frame of the padded length
        maxlen = (target_ms//1000)*sr
        
        if len(aud) == maxlen:
            return aud, sr

        elif len(aud) > maxlen:
            return aud[:maxlen], sr

        elif len(aud) < maxlen:
            
            #     random padding positions
            pad = maxlen - len(aud)
            pad = np.zeros((pad))

            # pad_begin_len = rng.randint(0, maxlen - len(aud))
            # pad_end_len = maxlen - len(aud) - pad_begin_len

            #     actaual padding
            # pad_begin = np.zeros((pad_begin_len))
            # pad_end = np.zeros((pad_end_len))

            return np.concatenate((aud, pad), 0), sr



class aud_img:
    @staticmethod
    def melspec(data, sr):
        spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
        spec = lb.amplitude_to_db(spec, ref=np.min)
        spec = np.expand_dims(spec, axis=2)
        return np.stack((spec,)*3, axis=2).squeeze()

    @staticmethod
    def mfcc(data, sr):
        mfcc_ = lb.feature.mfcc(data, sr)
        #mfcc_ = sk.preprocessing.scale(mfcc_, axis=1)
        mfcc_ = np.expand_dims(mfcc_, axis=2)
        return np.stack((mfcc_,)*3, axis=2).squeeze()

    # @staticmethod
    # def display_audio_img(spec, sr , mfcc=False):
    #     fig, ax = plt.subplots()
        
    #     if mfcc:
    #         specshow(spec, sr=sr, x_axis='time')
    #     else:
    #         img = specshow(spec, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
    #         fig.colorbar(img, ax=ax, format='%+2.0f dB')



class ds_create:
    
    # @staticmethod    
    # def label_from_bpath(bpath):                                                       # probably will not be used
    #     return bpath.decode('utf-8').split('\\')[-2]

    # @staticmethod
    # #depreciated
    # def one_label_dataset(path, label):                                                # path taken in must be raw
    #     return tf.data.Dataset.zip((
    #         tf.data.Dataset.list_files(path),
    #         tf.data.Dataset.from_tensor_slices(tf.constant(value=label_to_ind[label], dtype=tf.dtypes.int32 ,shape=len(tf.data.Dataset.list_files(path))))
    #     ))

    @staticmethod
    def slices_for_onelabel(path, label):                                              #for zhihao's local pc
        paths = glob.glob(path + label + '/*.wav')
        labels = [label_to_ind[label]]*len(paths)
        return paths, labels
    
    @staticmethod
    def preprocess_mel_eachlabel(file_path, label):                          
        
        data, sr = aud_util.loadaud(file_path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)                                
        mel = aud_img.melspec(data, sr)
        mel = tf.expand_dims(mel, axis=2)
        
        return mel, label

    # @staticmethod
    # def path_to_mel(path):                                                              # temporary work around
        
    #     data, sr = aud_util.loadaud(path, sr=16000, mono=True)
    #     data, sr = aud_util.pad_trunc(data, sr, max_ms)                                
    #     mel = aud_img.melspec(data, sr)
    #     mel = tf.expand_dims(mel, axis=2)

    #     return mel

    @staticmethod
    def dfpremel(path):
        data, sr = aud_util.loadaud(path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)                                
        mel = aud_img.melspec(data, sr)

        return mel
    
    @staticmethod
    def dfpremfcc(path):
        data, sr = aud_util.loadaud(path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)                                
        mfcc = aud_img.mfcc(data, sr)

        return mfcc

In [4]:
angry_o, _0= ds_create.slices_for_onelabel('Data/NLP Training Dataset/', 'angry')
fear_o, _1 = ds_create.slices_for_onelabel('Data/NLP Training Dataset/', 'fear')
happy_o, _2 = ds_create.slices_for_onelabel('Data/NLP Training Dataset/', 'happy')
neutral_o, _3 = ds_create.slices_for_onelabel('Data/NLP Training Dataset/', 'neutral')
sad_o, _4 = ds_create.slices_for_onelabel('Data/NLP Training Dataset/', 'sad')

angry_r, r_0= ds_create.slices_for_onelabel('Data/RAVDESS Dataset Sorted/', 'angry')
fear_r, r_1 = ds_create.slices_for_onelabel('Data/RAVDESS Dataset Sorted/', 'fear')
happy_r, r_2 = ds_create.slices_for_onelabel('Data/RAVDESS Dataset Sorted/', 'happy')
neutral_r, r_3 = ds_create.slices_for_onelabel('Data/RAVDESS Dataset Sorted/', 'neutral')
sad_r, r_4 = ds_create.slices_for_onelabel('Data/RAVDESS Dataset Sorted/', 'sad')

slices = angry_o + fear_o + happy_o + neutral_o + sad_o + angry_r + fear_r + happy_r + neutral_r + sad_r
labels = _0 + _1 + _2 + _3 + _4 + r_0 + r_1 + r_2 + r_3 + r_4

In [5]:
df = pd.DataFrame()

df['relative_audio_paths'] = slices
df['int_labels'] = labels
df['1hot_labels'] = list(to_categorical(labels))

df['imgs_3c'] = list(map(ds_create.dfpremel, slices))


df = sk.utils.shuffle(df)
df.reset_index(inplace=True, drop=True)

  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
  spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
  spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
  spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
  spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes ampli

In [6]:
df.head()

Unnamed: 0,relative_audio_paths,int_labels,1hot_labels,imgs_3c
0,Data/RAVDESS Dataset Sorted/sad\03-01-04-02-01...,4,"[0.0, 0.0, 0.0, 0.0, 1.0]","[[[43.858283984794255, 43.858283984794255, 43...."
1,Data/NLP Training Dataset/sad\c73b7dd221.wav,4,"[0.0, 0.0, 0.0, 0.0, 1.0]","[[[78.2314953322689, 78.2314953322689, 78.2314..."
2,Data/NLP Training Dataset/happy\3ec232c819.wav,2,"[0.0, 0.0, 1.0, 0.0, 0.0]","[[[75.24989060450864, 75.24989060450864, 75.24..."
3,Data/NLP Training Dataset/fear\70e43673a5.wav,1,"[0.0, 1.0, 0.0, 0.0, 0.0]","[[[75.28514040319726, 75.28514040319726, 75.28..."
4,Data/NLP Training Dataset/fear\4e2022eef3.wav,1,"[0.0, 1.0, 0.0, 0.0, 0.0]","[[[86.63499887802784, 86.63499887802784, 86.63..."


In [7]:
input_shape = df.iloc[0,3].shape
print(input_shape)

(128, 126, 3)


## Build Model

In [8]:
xIn = Input(input_shape)
net = tf.keras.applications.efficientnet_v2.EfficientNetV2S(weights='imagenet', include_top=False)
x = net(xIn)
x = Flatten()(x)
x = Dense(256, activation='swish')(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='swish')(x)
x = Dropout(0.5)(x)
xOut = Dense(5, activation='softmax')(x)

model = Model(xIn, xOut)
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=['acc', tfa.metrics.F1Score(num_classes=5, average='weighted', threshold=0.5)])
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 126, 3)]     0         
                                                                 
 efficientnetv2-s (Functiona  (None, None, None, 1280)  20331360 
 l)                                                              
                                                                 
 flatten (Flatten)           (None, 20480)             0         
                                                                 
 dense (Dense)               (None, 256)               5243136   
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                             

In [9]:
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.1, verbose=1),
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint('Model_weights', monitor='val_loss', verbose=1, save_best_only=True)
]

batch_size = 32
epochs = 50

for count in range(1,6):
    history = model.fit(
        x=tf.stack(df['imgs_3c']),
        y=tf.stack(df['1hot_labels']),
        batch_size=batch_size,
        epochs=epochs,
        callbacks=callbacks,
        validation_split=0.2
    )
    model.save('Saved Models/model_' + str(count))

Epoch 1/50


NotFoundError: Graph execution error:

Detected at node 'gradient_tape/model/efficientnetv2-s/block6o_project_conv/Conv2D/Conv2DBackpropInput' defined at (most recent call last):
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\ipykernel\kernelbase.py", line 473, in dispatch_queue
      await self.process_one()
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\ipykernel\kernelbase.py", line 462, in process_one
      await dispatch(*args)
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\ipykernel\kernelbase.py", line 369, in dispatch_shell
      await result
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\ipykernel\kernelbase.py", line 664, in execute_request
      reply_content = await reply_content
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\ipykernel\ipkernel.py", line 355, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\ipykernel\zmqshell.py", line 532, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\IPython\core\interactiveshell.py", line 2863, in run_cell
      result = self._run_cell(
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\IPython\core\interactiveshell.py", line 2909, in _run_cell
      return runner(coro)
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\IPython\core\interactiveshell.py", line 3106, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\IPython\core\interactiveshell.py", line 3309, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\mandy\AppData\Roaming\Python\Python39\site-packages\IPython\core\interactiveshell.py", line 3369, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\mandy\AppData\Local\Temp\ipykernel_1468\1921741812.py", line 11, in <cell line: 10>
      history = model.fit(
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\training.py", line 863, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\optimizer_v2\optimizer_v2.py", line 530, in minimize
      grads_and_vars = self._compute_gradients(
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\optimizer_v2\optimizer_v2.py", line 583, in _compute_gradients
      grads_and_vars = self._get_gradients(tape, loss, var_list, grad_loss)
    File "c:\Users\mandy\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\optimizer_v2\optimizer_v2.py", line 464, in _get_gradients
      grads = tape.gradient(loss, var_list, grad_loss)
Node: 'gradient_tape/model/efficientnetv2-s/block6o_project_conv/Conv2D/Conv2DBackpropInput'
No algorithm worked!  Error messages:
  Profiling failure on CUDNN engine 1#TC: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 21498128 bytes.
  Profiling failure on CUDNN engine 1: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 21498128 bytes.
  Profiling failure on CUDNN engine 2#TC: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 943194112 bytes.
  Profiling failure on CUDNN engine 2: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 943194112 bytes.
  Profiling failure on CUDNN engine 3#TC: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 101449728 bytes.
  Profiling failure on CUDNN engine 3: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 101449728 bytes.
  Profiling failure on CUDNN engine 0#TC: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16777216 bytes.
  Profiling failure on CUDNN engine 0: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16777216 bytes.
	 [[{{node gradient_tape/model/efficientnetv2-s/block6o_project_conv/Conv2D/Conv2DBackpropInput}}]] [Op:__inference_train_function_48769]

In [None]:
model_1 = load_model('Saved Models/model_1')
model_2 = load_model('Saved Models/model_2')
model_3 = load_model('Saved Models/model_3')
model_4 = load_model('Saved Models/model_4')
model_5 = load_model('Saved Models/model_5')

## Predict on Evaluation or Test Data

In [None]:
class test_gen:

    @staticmethod
    def path_to_mel(path):
        c = ds_create.dfpremel(path)
        return c
    
    @staticmethod
    def path_to_mfcc(path):
        c = ds_create.dfpremfcc(path)
        return c

    @staticmethod
    def int_to_label(int):
        return ind_to_label[int]

In [None]:
q_df = pd.DataFrame()
paths = sorted(glob.glob('Data/NLP Interim Dataset/*.wav'))
q_data = list(map(test_gen.path_to_mel, paths))

q_data = tf.stack(q_data)


pred_1 = model_1.predict(q_data)
pred_2 = model_2.predict(q_data)
pred_3 = model_3.predict(q_data)
pred_4 = model_4.predict(q_data)
pred_5 = model_5.predict(q_data)

In [None]:
pred_comb = pred_1 * 0.2 + pred_2 * 0.2 + pred_3 * 0.2 + pred_4 * 0.2 + pred_5 * 0.2
pred_comb = tf.argmax(pred_comb, axis=1)

In [None]:
q_df['paths'] = sorted(glob.glob('Data/NLP Interim Dataset/*.wav'))
q_df['labels'] = list(map(
    test_gen.int_to_label,
    list(pred_comb)
))

In [None]:
q_df.head()

In [None]:
q_df.to_csv('Submissions/qualifiers1.csv', header=False, index=False)