In [None]:
!python --version

In [2]:
import librosa as lb
from librosa.display import specshow
import glob
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

from tensorflow.keras.utils import to_categorical
import tensorflow as tf
import tensorflow_io as tfio
import tensorflow_addons as tfa
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, load_model

import sklearn as sk

## Audio Data Processing

In [3]:
max_ms = 4000

ind_to_label = {
    0: 'angry',
    1: 'fear',
    2: 'happy',
    3: 'neutral',
    4: 'sad'
}

label_to_ind = {
    lab: ind for ind, lab in ind_to_label.items()
}

In [4]:
class aud_util:

    # load audio file, *mono argument (bool) can auto convert to mono, while default sr is converted to 22050*
    @staticmethod
    def loadaud(audio_file_path, sr=None, mono=False):
        return lb.load(audio_file_path, sr=sr, mono=mono)  # returns (data, sr)

    # @staticmethod
    # def mono_channel_withsr(audio_data_with_sr):
    #     return lb.to_mono(audio_data_with_sr[0]), audio_data_with_sr[1]

    # @staticmethod
    # def resample_withsr(data, in_sr, new_sr=22050):
    #     if in_sr == new_sr:
    #         return data
    #     else:
    #         return lb.resample(data, orig_sr=sr, new_sr=new_sr)

    # padding places shorter audio randomly within the time frame of the padded length
    @staticmethod
    def pad_trunc(aud, sr, target_ms):
        maxlen = (target_ms // 1000) * sr

        if len(aud) == maxlen:
            return aud, sr

        elif len(aud) > maxlen:
            return aud[:maxlen], sr

        elif len(aud) < maxlen:

            #     random padding positions
            pad = maxlen - len(aud)
            pad = np.zeros((pad))

            # pad_begin_len = rng.randint(0, maxlen - len(aud))
            # pad_end_len = maxlen - len(aud) - pad_begin_len

            #     actaual padding
            # pad_begin = np.zeros((pad_begin_len))
            # pad_end = np.zeros((pad_end_len))

            return np.concatenate((aud, pad), 0), sr

In [5]:
class aud_img:
    @staticmethod
    def melspec(data, sr):
        spec = lb.feature.melspectrogram(data, sr=sr, power=1)  # power = 1/2 changes amplitude_to_db or power_to_db
        spec = lb.amplitude_to_db(spec, ref=np.min)
        spec = np.expand_dims(spec, axis=2)
        return np.stack((spec,) * 3, axis=2).squeeze()

    @staticmethod
    def mfcc(data, sr):
        mfcc_ = lb.feature.mfcc(data, sr)
        #mfcc_ = sk.preprocessing.scale(mfcc_, axis=1)
        mfcc_ = np.expand_dims(mfcc_, axis=2)
        return np.stack((mfcc_,) * 3, axis=2).squeeze()

    # @staticmethod
    # def display_audio_img(spec, sr , mfcc=False):
    #     fig, ax = plt.subplots()

    #     if mfcc:
    #         specshow(spec, sr=sr, x_axis='time')
    #     else:
    #         img = specshow(spec, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
    #         fig.colorbar(img, ax=ax, format='%+2.0f dB')

In [6]:
class ds_create:

    # @staticmethod
    # def label_from_bpath(bpath):                                                       # probably will not be used
    #     return bpath.decode('utf-8').split('\\')[-2]

    # @staticmethod
    # #depreciated
    # def one_label_dataset(path, label):                                                # path taken in must be raw
    #     return tf.data.Dataset.zip((
    #         tf.data.Dataset.list_files(path),
    #         tf.data.Dataset.from_tensor_slices(tf.constant(value=label_to_ind[label], dtype=tf.dtypes.int32 ,shape=len(tf.data.Dataset.list_files(path))))
    #     ))

    @staticmethod
    def slices_for_onelabel(path, label):  #for zhihao's local pc
        paths = glob.glob(path + "/" + label + '/*.wav')
        labels = [label_to_ind[label]] * len(paths)
        return paths, labels


    @staticmethod
    def preprocess_mel_eachlabel(file_path, label):
        data, sr = aud_util.loadaud(file_path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)
        mel = aud_img.melspec(data, sr)
        mel = tf.expand_dims(mel, axis=2)

        return mel, label


    # @staticmethod
    # def path_to_mel(path):                                                              # temporary work around

    #     data, sr = aud_util.loadaud(path, sr=16000, mono=True)
    #     data, sr = aud_util.pad_trunc(data, sr, max_ms)
    #     mel = aud_img.melspec(data, sr)
    #     mel = tf.expand_dims(mel, axis=2)

    #     return mel

    @staticmethod
    def dfpremel(path):
        data, sr = aud_util.loadaud(path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)
        mel = aud_img.melspec(data, sr)

        return mel


    @staticmethod
    def dfpremfcc(path):
        data, sr = aud_util.loadaud(path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)
        mfcc = aud_img.mfcc(data, sr)

        return mfcc

In [7]:
project_temp_path = '/tmp/pycharm_project_261/NLP/'

In [8]:
angry, _0 = ds_create.slices_for_onelabel(project_temp_path+'datasets/NLP Training Dataset/ASR Training Dataset', 'angry')
fear, _1 = ds_create.slices_for_onelabel(project_temp_path+'datasets/NLP Training Dataset/ASR Training Dataset', 'fear')
happy, _2 = ds_create.slices_for_onelabel(project_temp_path+'datasets/NLP Training Dataset/ASR Training Dataset', 'happy')
neutral, _3 = ds_create.slices_for_onelabel(project_temp_path+'datasets/NLP Training Dataset/ASR Training Dataset', 'neutral')
sad, _4 = ds_create.slices_for_onelabel(project_temp_path+'datasets/NLP Training Dataset/ASR Training Dataset', 'sad')

slices = angry + fear + happy + neutral + sad
labels = _0 + _1 + _2 + _3 + _4

In [9]:
df = pd.DataFrame()

df['audio_paths'] = slices
df['int_labels'] = labels
df['1hot_labels'] = list(to_categorical(labels))

df['imgs_3c'] = list(map(ds_create.dfpremel, slices))

df = sk.utils.shuffle(df)
df.reset_index(inplace=True, drop=True)

  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  after removing the cwd from sys.path.
 0.00000000e+00 0.00000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  after removing the cwd from sys.path.
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  after removing the cwd from sys.path.
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  after removing the cwd from sys.pa

In [10]:
df.head()

Unnamed: 0,audio_paths,int_labels,1hot_labels,imgs_3c
0,/tmp/pycharm_project_261/NLP/datasets/NLP Trai...,3,"[0.0, 0.0, 0.0, 1.0, 0.0]","[[[65.78410325470202, 65.78410325470202, 65.78..."
1,/tmp/pycharm_project_261/NLP/datasets/NLP Trai...,1,"[0.0, 1.0, 0.0, 0.0, 0.0]","[[[68.00196919158077, 68.00196919158077, 68.00..."
2,/tmp/pycharm_project_261/NLP/datasets/NLP Trai...,2,"[0.0, 0.0, 1.0, 0.0, 0.0]","[[[82.21379265670487, 82.21379265670487, 82.21..."
3,/tmp/pycharm_project_261/NLP/datasets/NLP Trai...,4,"[0.0, 0.0, 0.0, 0.0, 1.0]","[[[83.68194478200978, 83.68194478200978, 83.68..."
4,/tmp/pycharm_project_261/NLP/datasets/NLP Trai...,3,"[0.0, 0.0, 0.0, 1.0, 0.0]","[[[87.26147394310544, 87.26147394310544, 87.26..."


In [11]:
input_shape = df.iloc[0, 3].shape
print(input_shape)

(128, 126, 3)


## Build Model

In [34]:
xIn = Input(input_shape)

x = Conv2D(32, (3, 3), activation='relu', padding='same')(xIn)
# x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
# x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
# x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
# x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
# x = MaxPooling2D((2, 2), padding='same')(x)

x = Reshape((-1, 32))(x)

x = Bidirectional(LSTM(128, input_shape = (None, 64, 63, 32), return_sequences=True))(x)

x = Flatten()(x)

x = Dense(256, activation='swish')(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='swish')(x)
x = Dropout(0.5)(x)
xOut = Dense(5, activation='softmax')(x)

model = Model(xIn, xOut)
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='categorical_crossentropy',
              metrics=['acc', tfa.metrics.F1Score(num_classes=5, average='weighted', threshold=0.5)])
model.summary()

ResourceExhaustedError: failed to allocate memory [Op:AddV2]

In [21]:
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.1, verbose=1),
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint('Model_weights', monitor='val_loss', verbose=1, save_best_only=True)
]

In [23]:
batch_size = 16
epochs = 50

for count in range(1, 10):
    history = model.fit(
        x=tf.stack(df['imgs_3c']),
        y=tf.stack(df['1hot_labels']),
        batch_size=batch_size,
        epochs=epochs,
        callbacks=callbacks,
        validation_split=0.2
    )
    model.save('Saved Models/model_' + str(count))

Epoch 1/50

Epoch 00001: val_loss improved from 1.60950 to 1.60838, saving model to Model_weights
INFO:tensorflow:Assets written to: Model_weights/assets


INFO:tensorflow:Assets written to: Model_weights/assets


Epoch 2/50

Epoch 00002: val_loss did not improve from 1.60838
Epoch 3/50

Epoch 00003: val_loss improved from 1.60838 to 1.60557, saving model to Model_weights
INFO:tensorflow:Assets written to: Model_weights/assets


INFO:tensorflow:Assets written to: Model_weights/assets


Epoch 4/50

Epoch 00004: val_loss did not improve from 1.60557
Epoch 5/50

Epoch 00005: val_loss improved from 1.60557 to 1.60472, saving model to Model_weights
INFO:tensorflow:Assets written to: Model_weights/assets


INFO:tensorflow:Assets written to: Model_weights/assets


Epoch 6/50

Epoch 00006: val_loss improved from 1.60472 to 1.60260, saving model to Model_weights
INFO:tensorflow:Assets written to: Model_weights/assets


INFO:tensorflow:Assets written to: Model_weights/assets


Epoch 7/50

KeyboardInterrupt: 

In [15]:
model_1 = load_model('Saved Models/model_1')
model_2 = load_model('Saved Models/model_2')
model_3 = load_model('Saved Models/model_3')
model_4 = load_model('Saved Models/model_4')
model_5 = load_model('Saved Models/model_5')



## Predict on Evaluation or Test Data

In [16]:
class test_gen:

    @staticmethod
    def path_to_mel(path):
        c = ds_create.dfpremel(path)
        return c

    @staticmethod
    def path_to_mfcc(path):
        c = ds_create.dfpremfcc(path)
        return c

    @staticmethod
    def int_to_label(int):
        return ind_to_label[int]

In [17]:
q_df = pd.DataFrame()
paths = sorted(glob.glob('Data/NLP Interim Dataset/*.wav'))
q_data = list(map(test_gen.path_to_mel, paths))

q_data = tf.stack(q_data)

pred_1 = model_1.predict(q_data)
pred_2 = model_2.predict(q_data)
pred_3 = model_3.predict(q_data)
pred_4 = model_4.predict(q_data)
pred_5 = model_5.predict(q_data)

ValueError: Expect x to be a non-empty array or dataset.

In [None]:
pred_comb = pred_1 * 0.2 + pred_2 * 0.2 + pred_3 * 0.2 + pred_4 * 0.2 + pred_5 * 0.2
pred_comb = tf.argmax(pred_comb, axis=1)

In [None]:
q_df['paths'] = sorted(glob.glob('Data/NLP Interim Dataset/*.wav'))
q_df['labels'] = list(map(
    test_gen.int_to_label,
    list(pred_comb)
))

In [None]:
q_df.head()

In [None]:
q_df.to_csv('Submissions/qualifiers1.csv', header=False, index=False)