In [76]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random as rng
# import itertools

import librosa as lb
from librosa.display import specshow
import IPython.display as ipd


import sklearn as sk
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical

%matplotlib inline

In [108]:
#  --  Defining Variables  --  #

max_ms = 4000

batchs = 32
epochs = 100

ind_to_label = {
    0 : 'angry',
    1 : 'fear',
    2 : 'happy',
    3 : 'neutral',
    4 : 'sad'
}

label_to_ind = { 
    lab: ind for ind, lab in ind_to_label.items()
    }

In [78]:
'''
Data insights

'''

class aud_stats:
    @staticmethod
    def average_sr():
        pass


In [79]:
'''

PREPROCESSING UTILS


'''


class aud_util:
    
    @staticmethod
    def loadaud(audio_file_path, sr=None, mono=False):                                 # load audio file, *mono argument (bool) can auto convert to mono, while default sr is converted to 22050*
        return lb.load(audio_file_path, sr=sr, mono=mono)                              # returns (data, sr)       

    @staticmethod
    def tf_loadaud(audio_file_path, sr=-1, mono=False):                                # only works with 16-bit audio
        file = tf.io.read_file(audio_file_path)
        
        if mono:
            dc = 1
        elif not mono:
            dc = -1
        
        return tf.audio.decode_wav(file, desired_channels=dc, desired_samples=sr)
    
    @staticmethod
    def mono_channel_withsr(audio_data_with_sr):
        return lb.to_mono(audio_data_with_sr[0]), audio_data_with_sr[1]

    @staticmethod
    def resample_withsr(data, in_sr, new_sr=22050):
        if in_sr == new_sr:
            return data
        else:
            return lb.resample(data, orig_sr=sr, new_sr=new_sr)
    
    @staticmethod
    def pad_trunc(aud, sr, target_ms):                                                 # padding places shorter audio randomly within the time frame of the padded length
        maxlen = (target_ms//1000)*sr
        
        if len(aud) == maxlen:
            return aud, sr

        elif len(aud) > maxlen:
            return aud[:maxlen], sr

        elif len(aud) < maxlen:
            
            # random padding positions
            pad_begin_len = rng.randint(0, maxlen - len(aud))
            pad_end_len = maxlen - len(aud) - pad_begin_len

            #actaual padding
            pad_begin = np.zeros((pad_begin_len))
            pad_end = np.zeros((pad_end_len))

            return np.concatenate((pad_begin, aud, pad_end), 0), sr



class aud_img:
    @staticmethod
    def melspec(data, sr):
        spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
        spec = lb.amplitude_to_db(spec, ref=np.min)
        return spec

    @staticmethod
    def mfcc(data, sr):
        mfcc_ = lb.feature.mfcc(data, sr)
        #mfcc_ = sk.preprocessing.scale(mfcc_, axis=1)
        return mfcc_

    @staticmethod
    def display_audio_img(spec, sr , mfcc=False):
        fig, ax = plt.subplots()
        
        if mfcc:
            specshow(spec, sr=sr, x_axis='time')
        else:
            img = specshow(spec, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
            fig.colorbar(img, ax=ax, format='%+2.0f dB')



class ds_create:
    
    @staticmethod    
    def label_from_bpath(bpath):                                                       # probably will not be used
        return bpath.decode('utf-8').split('\\')[-2]

    @staticmethod
    #depreciated
    def one_label_dataset(path, label):                                                # path taken in must be raw
        return tf.data.Dataset.zip((
            tf.data.Dataset.list_files(path),
            tf.data.Dataset.from_tensor_slices(tf.constant(value=label_to_ind[label], dtype=tf.dtypes.int32 ,shape=len(tf.data.Dataset.list_files(path))))
        ))

    @staticmethod
    def slices_for_onelabel(path, label):
        paths = os.listdir(path)
        paths = list(map(lambda x : 'DATA_NLP_TIL\\'+label+'\\'+x , paths))

        labels = [label_to_ind[label]]*len(paths)

        return paths , labels
    
    @staticmethod
    def preprocess_mel_eachlabel(file_path, label):                          
        
        data, sr = aud_util.loadaud(file_path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)                                
        mel = aud_img.melspec(data, sr)
        mel = tf.expand_dims(mel, axis=2)
        
        return mel, label

    @staticmethod
    def path_to_mel(path):                                                              # temporary work around
        
        data, sr = aud_util.loadaud(path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)                                
        mel = aud_img.melspec(data, sr)
        mel = tf.expand_dims(mel, axis=2)

        return mel


In [80]:
'''
BUILDING DATASET PIPELINE

'''


# my god...
angry, _0= ds_create.slices_for_onelabel(r'DATA_NLP_TIL\angry', 'angry')
fear, _1 = ds_create.slices_for_onelabel(r'DATA_NLP_TIL\fear', 'fear')
happy, _2 = ds_create.slices_for_onelabel(r'DATA_NLP_TIL\happy', 'happy')
neutral, _3 = ds_create.slices_for_onelabel(r'DATA_NLP_TIL\neutral', 'neutral')
sad, _4 = ds_create.slices_for_onelabel(r'DATA_NLP_TIL\sad', 'sad')

slices = angry + fear + happy + neutral + sad
labels = _0 + _1 + _2 + _3 + _4

ds = tf.data.Dataset.zip((
    tf.data.Dataset.list_files(slices, shuffle=False),
    tf.data.Dataset.from_tensor_slices(labels)
))

ds = ds.shuffle(len(ds))

In [96]:
'''
Temporary work around for below error

'''
# requires shuffling in tf.keras.Model.fit

X = list(map(ds_create.path_to_mel, slices))

Y = np.array(to_categorical(labels))
X = np.array(X)




'''
TASKS: MAYBE TRY LOADING THINGS INTO A DATAFRAME FOR THIS CRUDE METHOD....?

'''











  spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
 -0.00076151] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
  1.46116614e-02  1.36162043e-02] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
  0.00544107] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
 -2.7675033e-03  1.1518776e-02] as keyword args. From version 0.10 passing these as positional arguments will r

In [None]:
'''
Supposedly proper mapping that keeps throwing errors

'''
#ds = ds.map(ds_create.preprocess_mel_eachlabel)
ds = ds.map(ds_create.preprocess_mel_eachlabel)
# ds = ds.cache()
# ds = ds.batch(batchs)
# ds = ds.prefetch(16)

In [None]:
xin = Input(X[34].shape)
net = tf.keras.applications.efficientnet.EfficientNetB0(weights='imagenet', include_top=False)
# x = Conv2D(256, (3, 3), padding='valid', activation='swish' )(net)
# x = MaxPool2D((2, 2))(x)

# x = Conv2D(128, (3, 3), padding='valid', activation='swish' )(x)
# x = MaxPool2D((2, 2))(x)

# x = Conv2D(128, (3, 3), padding='valid', activation='swish' )(x)
# x = MaxPool2D((2, 2))(x)

x = Flatten()(net)
x = Dense(128, activation='swish')(x)
x = Dropout(0.5)(x)

xout = Dense(5, activation='softmax')(x)

own = Model(xin, xout)
own.compile(optimizer=tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=['acc'])
own.summary()


In [103]:
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.1, verbose=1),
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint(r'Model_weights', monitor='val_loss', verbose=1, save_best_only=True)
]

In [109]:
history = own.fit(
    x=X,
    y=Y,
    batch_size=batchs,
    epochs=epochs,
    callbacks=callbacks,
    validation_split=0.2,
    shuffle=True
)

Epoch 1/100
Epoch 1: val_loss improved from inf to 4.58305, saving model to Model_weights




INFO:tensorflow:Assets written to: Model_weights\assets


INFO:tensorflow:Assets written to: Model_weights\assets


Epoch 2/100
Epoch 2: val_loss did not improve from 4.58305
Epoch 3/100
Epoch 3: val_loss did not improve from 4.58305
Epoch 4/100
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.

Epoch 4: val_loss did not improve from 4.58305
Epoch 5/100
Epoch 5: val_loss did not improve from 4.58305
Epoch 6/100
Epoch 6: val_loss did not improve from 4.58305
