In [None]:
pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow_addons
  Downloading tensorflow_addons-0.17.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 9.9 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.17.0


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random as rng
import glob
# import itertools

import librosa as lb
from librosa.display import specshow
import IPython.display as ipd


import sklearn as sk
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical

%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#  --  Defining Variables  --  #

max_ms = 4000

batchs = 256
epochs = 100

ind_to_label = {
    0 : 'angry',
    1 : 'fear',
    2 : 'happy',
    3 : 'neutral',
    4 : 'sad'
}

label_to_ind = { 
    lab: ind for ind, lab in ind_to_label.items()
    }

In [None]:
'''
Data insights

'''

class aud_stats:
    @staticmethod
    def average_sr():
        pass


In [None]:
'''

PREPROCESSING UTILS


'''


class aud_util:
    
    @staticmethod
    def loadaud(audio_file_path, sr=None, mono=False):                                 # load audio file, *mono argument (bool) can auto convert to mono, while default sr is converted to 22050*
        return lb.load(audio_file_path, sr=sr, mono=mono)                              # returns (data, sr)       

    @staticmethod
    def tf_loadaud(audio_file_path, sr=-1, mono=False):                                # only works with 16-bit audio
        file = tf.io.read_file(audio_file_path)
        
        if mono:
            dc = 1
        elif not mono:
            dc = -1
        
        return tf.audio.decode_wav(file, desired_channels=dc, desired_samples=sr)
    
    @staticmethod
    def mono_channel_withsr(audio_data_with_sr):
        return lb.to_mono(audio_data_with_sr[0]), audio_data_with_sr[1]

    @staticmethod
    def resample_withsr(data, in_sr, new_sr=22050):
        if in_sr == new_sr:
            return data
        else:
            return lb.resample(data, orig_sr=sr, new_sr=new_sr)
    
    @staticmethod
    def pad_trunc(aud, sr, target_ms):                                                 # padding places shorter audio randomly within the time frame of the padded length
        maxlen = (target_ms//1000)*sr
        
        if len(aud) == maxlen:
            return aud, sr

        elif len(aud) > maxlen:
            return aud[:maxlen], sr

        elif len(aud) < maxlen:
            
            #     random padding positions
            pad = maxlen - len(aud)
            pad = np.zeros((pad))

            # pad_begin_len = rng.randint(0, maxlen - len(aud))
            # pad_end_len = maxlen - len(aud) - pad_begin_len

            #     actaual padding
            # pad_begin = np.zeros((pad_begin_len))
            # pad_end = np.zeros((pad_end_len))

            return np.concatenate((aud, pad), 0), sr



class aud_img:
    @staticmethod
    def melspec(data, sr):
        spec = lb.feature.melspectrogram(data, sr=sr, power=1)                         # power = 1/2 changes amplitude_to_db or power_to_db
        spec = lb.amplitude_to_db(spec, ref=np.min)
        return spec

    @staticmethod
    def mfcc(data, sr):
        mfcc_ = lb.feature.mfcc(data, sr)
        #mfcc_ = sk.preprocessing.scale(mfcc_, axis=1)
        return mfcc_

    @staticmethod
    def display_audio_img(spec, sr , mfcc=False):
        fig, ax = plt.subplots()
        
        if mfcc:
            specshow(spec, sr=sr, x_axis='time')
        else:
            img = specshow(spec, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
            fig.colorbar(img, ax=ax, format='%+2.0f dB')



class ds_create:
    
    @staticmethod    
    def label_from_bpath(bpath):                                                       # probably will not be used
        return bpath.decode('utf-8').split('\\')[-2]

    @staticmethod
    #depreciated
    def one_label_dataset(path, label):                                                # path taken in must be raw
        return tf.data.Dataset.zip((
            tf.data.Dataset.list_files(path),
            tf.data.Dataset.from_tensor_slices(tf.constant(value=label_to_ind[label], dtype=tf.dtypes.int32 ,shape=len(tf.data.Dataset.list_files(path))))
        ))

    @staticmethod
    def slices_for_onelabel(path, label):                                              #for zhihao's local pc
        paths = os.listdir(path)
        paths = list(map(lambda x : 'DATA_NLP_TIL\\'+label+'\\'+x , paths))

        labels = [label_to_ind[label]]*len(paths)

        return paths , labels

    @staticmethod
    def slices_for_onelabel_colab(path, label):                              #for use in google drive, path looks like: /content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/sad/*.wav
        paths = os.listdir(path)
        paths = list(map(lambda x : '/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/'+label+'/'+x , paths))

        labels = [label_to_ind[label]]*len(paths)

        return paths , labels
    
    @staticmethod
    def preprocess_mel_eachlabel(file_path, label):                          
        
        data, sr = aud_util.loadaud(file_path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)                                
        mel = aud_img.melspec(data, sr)
        mel = tf.expand_dims(mel, axis=2)
        
        return mel, label

    @staticmethod
    def path_to_mel(path):                                                              # temporary work around
        
        data, sr = aud_util.loadaud(path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)                                
        mel = aud_img.melspec(data, sr)
        mel = tf.expand_dims(mel, axis=2)

        return mel

    @staticmethod
    def dfpremel(path):
        data, sr = aud_util.loadaud(path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)                                
        mel = aud_img.melspec(data, sr)
        mel = np.expand_dims(mel, axis=2)

        return mel
    
    @staticmethod
    def dfpremfcc(path):
        data, sr = aud_util.loadaud(path, sr=16000, mono=True)
        data, sr = aud_util.pad_trunc(data, sr, max_ms)                                
        mel = aud_img.mfcc(data, sr)
        mel = np.expand_dims(mel, axis=2)

        return mel

    @staticmethod
    def dup_channel(img):
        return np.stack((img,)*3, axis=2).squeeze()


In [None]:
'''
BUILDING DATASET PIPELINE (zhihao local machine)

'''


# for zhihaos local machine
angry, _0= ds_create.slices_for_onelabel(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/angry', 'angry')
fear, _1 = ds_create.slices_for_onelabel(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/fear', 'fear')
happy, _2 = ds_create.slices_for_onelabel(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/happy', 'happy')
neutral, _3 = ds_create.slices_for_onelabel(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/neutral', 'neutral')
sad, _4 = ds_create.slices_for_onelabel(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/sad', 'sad')

slices = angry + fear + happy + neutral + sad
labels = _0 + _1 + _2 + _3 + _4

# ds = tf.data.Dataset.zip((
#     tf.data.Dataset.list_files(slices, shuffle=False),
#     tf.data.Dataset.from_tensor_slices(labels)
# ))

# ds = ds.shuffle(len(ds))

In [None]:
'''
BUILDING DATASET PIPELINE (colab)

'''


# for zhihaos local machine

angry, _0= ds_create.slices_for_onelabel_colab(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/angry', 'angry')
fear, _1 = ds_create.slices_for_onelabel_colab(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/fear', 'fear')
happy, _2 = ds_create.slices_for_onelabel_colab(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/happy', 'happy')
neutral, _3 = ds_create.slices_for_onelabel_colab(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/neutral', 'neutral')
sad, _4 = ds_create.slices_for_onelabel_colab(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/sad', 'sad')

slices = angry + fear + happy + neutral + sad
labels = _0 + _1 + _2 + _3 + _4

In [None]:
'''
Dataframe style

using tf.stack later lol
'''


df = pd.DataFrame()

df['relative_audio_paths'] = slices
df['int_labels'] = labels
df['1hot_labels'] = list(to_categorical(labels))

df['imgs_1c'] = list(map(ds_create.dfpremel, slices))
df['imgs_3c'] = df['imgs_1c'].map(ds_create.dup_channel)


df = sk.utils.shuffle(df)
df.reset_index(inplace=True, drop=True)

In [None]:
df.head()

Unnamed: 0,relative_audio_paths,int_labels,1hot_labels,imgs_1c,imgs_3c
0,/content/drive/MyDrive/NLP/NLP Training Datase...,0,"[1.0, 0.0, 0.0, 0.0, 0.0]","[[[-584.6054259957538], [-496.12201677296343],...","[[[-584.6054259957538, -584.6054259957538, -58..."
1,/content/drive/MyDrive/NLP/NLP Training Datase...,2,"[0.0, 0.0, 1.0, 0.0, 0.0]","[[[-419.7661999396332], [-358.3028907341822], ...","[[[-419.7661999396332, -419.7661999396332, -41..."
2,/content/drive/MyDrive/NLP/NLP Training Datase...,4,"[0.0, 0.0, 0.0, 0.0, 1.0]","[[[-396.3697971229703], [-394.82210978193507],...","[[[-396.3697971229703, -396.3697971229703, -39..."
3,/content/drive/MyDrive/NLP/NLP Training Datase...,0,"[1.0, 0.0, 0.0, 0.0, 0.0]","[[[-356.7272377958902], [-333.5591633403735], ...","[[[-356.7272377958902, -356.7272377958902, -35..."
4,/content/drive/MyDrive/NLP/NLP Training Datase...,2,"[0.0, 0.0, 1.0, 0.0, 0.0]","[[[-532.5982656356001], [-513.5887220733171], ...","[[[-532.5982656356001, -532.5982656356001, -53..."


In [None]:
df.iloc[0,4].shape

(20, 126, 3)

In [None]:
# '''
# Temporary work around for below error

# '''
# # requires shuffling in tf.keras.Model.fit

# X = list(map(ds_create.path_to_mel, slices))

# Y = np.array(to_categorical(labels))
# X = np.array(X)




# '''
# TASKS: MAYBE TRY LOADING THINGS INTO A DATAFRAME FOR THIS CRUDE METHOD to make things clearer....?

# '''











In [None]:
# '''
# Supposedly proper mapping that keeps throwing errors

# '''
# #ds = ds.map(ds_create.preprocess_mel_eachlabel)
# ds = ds.map(ds_create.preprocess_mel_eachlabel)
# # ds = ds.cache()
# # ds = ds.batch(batchs)
# # ds = ds.prefetch(16)

In [None]:
input_shape = df.iloc[0,4].shape
input_shape

(20, 126, 3)

In [None]:
xin = Input(input_shape)

prenet = tf.keras.applications.efficientnet_v2.EfficientNetV2s(weights='imagenet', include_top=False)#, input_shape=input_shape)
x = prenet(xin)

x = Flatten()(x)
x = Dense(128, activation='swish')(x)
x = Dropout(0.5)(x)
# x = Dense(128, activation='swish')(x)
# x = Dropout(0.75)(x)


xout = Dense(5, activation='softmax')(x)

own = Model(xin, xout)
own.compile(optimizer=tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=['acc', tfa.metrics.F1Score(num_classes=5, average='weighted', threshold=0.5)])
own.summary()


AttributeError: ignored

In [None]:
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.1, verbose=1),
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint(r'/content/drive/MyDrive/NLP/Zhihaos stuff/effinet v2s', monitor='val_loss', verbose=0, save_best_only=True)
]

In [None]:
history = own.fit(
    x=tf.stack(df['imgs_3c']),
    y=tf.stack(df['1hot_labels']),
    batch_size=batchs,
    epochs=epochs,
    callbacks=callbacks,
    validation_split=0.2
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 5: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 6/100
Epoch 7/100


In [None]:
tst = ds_create.dfpremel(r'/content/drive/MyDrive/NLP/NLP Training Dataset/ASR Training Dataset/fear/00530e07e3.wav')
tst = ds_create.dup_channel(tst)
tst = np.expand_dims(tst, axis=0)                                             # EXPAND DIMS OF FIRST DIMENSION ARGHHHHHH
pred = own.predict(tst)
pred = np.argmax(pred)
pred

In [None]:
'''
Generating the qualifying csv file

'''


class test_gen:
    @staticmethod
    def path_gen(path):                                              #for zhihao's local pc
        paths = os.listdir(path)
        paths = list(map(lambda x : 'DATA_NLP_TIL\\.qualifying_test\\'+x , paths))

        return paths 

    @staticmethod
    def path_gen_colab(path):                                              #for colab, zhihaos
        paths = os.listdir(path)
        paths = list(map(lambda x : '/content/drive/MyDrive/NLP/NLP Interim Dataset/NLP/'+x , paths))

        return paths 

    @staticmethod
    def path_to_mel(path):
        c = ds_create.dfpremel(path)
        ccc = ds_create.dup_channel(c)
        return ccc
    
    @staticmethod
    def path_to_mfcc(path):
        c = ds_create.dfpremfcc(path)
        ccc = ds_create.dup_channel(c)
        return ccc

    @staticmethod
    def int_to_label(int):
        return ind_to_label[int]
        

In [None]:
q_df = pd.DataFrame()
paths = sorted(glob.glob(r'/content/drive/MyDrive/NLP/NLP Interim Dataset/NLP/*.wav'))
q_data = list(map(test_gen.path_to_mel, paths))

q_data = tf.stack(q_data)

preds = own.predict(q_data)
preds = np.argmax(preds, axis=1)

In [None]:
q_df['paths'] = sorted(os.listdir(r'/content/drive/MyDrive/NLP/NLP Interim Dataset/NLP/'))
q_df['labels'] = list(map(
    test_gen.int_to_label,
    list(preds)
))

In [None]:
q_df.head()

In [None]:
q_df.to_csv(r'/content/drive/MyDrive/NLP/Zhihao nlp preds/qualifiers3.csv', header=False, index=False)