# Deep Learning

In [1]:
# Neural Network
import keras
from keras.utils.generic_utils import get_custom_objects
from keras import backend as K
from keras import regularizers, callbacks
from keras.constraints import max_norm
from keras.models import Model, Sequential, load_model
from keras.layers import Input, Lambda, Dense, Dropout, Flatten, Embedding, Activation, GRUCell, LSTMCell,SimpleRNNCell
from keras.layers import Convolution2D, MaxPooling2D, Convolution1D, Conv1D, SimpleRNN, GRU, LSTM, CuDNNLSTM, CuDNNGRU, Conv2D
from keras.layers import LeakyReLU, PReLU, ThresholdedReLU, ELU
from keras.layers import BatchNormalization, TimeDistributed, Bidirectional
from keras.layers import Wrapper
from keras.regularizers import l2
from keras.optimizers import Adam, SGD, RMSprop, Adagrad, Adadelta, Adamax, Nadam
from keras.callbacks import ModelCheckpoint 
from keras.utils import np_utils
from keras import constraints, initializers, regularizers
import keras.losses
import tensorflow as tf

2022-06-07 18:05:43.080531: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-06-07 18:05:43.080563: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# RNN

In [2]:
def regular_rnn_model(input_dim, output_dim=29):
    # Input
    input_data = Input(name='the_input', shape=(None, input_dim))
    # Recurrent layer
    simp_rnn = GRU(output_dim, return_sequences=True, 
                 implementation=2, name='rnn')(input_data)
    # Softmax Activation Layer
    y_pred = Activation('softmax', name='softmax')(simp_rnn)
    # Specifying the model
    model = Model(inputs=input_data, outputs=y_pred)
    model.output_length = lambda x: x
    print(model.summary())
    return model

In [3]:
model_0 = regular_rnn_model(input_dim=161)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 the_input (InputLayer)      [(None, None, 161)]       0         
                                                                 
 rnn (GRU)                   (None, None, 29)          16704     
                                                                 
 softmax (Activation)        (None, None, 29)          0         
                                                                 
Total params: 16,704
Trainable params: 16,704
Non-trainable params: 0
_________________________________________________________________
None


2022-06-07 18:06:51.132541: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-06-07 18:06:51.132583: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-172-31-60-46.ec2.internal): /proc/driver/nvidia/version does not exist
2022-06-07 18:06:51.132991: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
import os, sys
sys.path.insert(0, '../scripts/')
from prep import prep
from AudioGenerator import AudioGenerator
audio_gen = AudioGenerator(spectrogram=True)

In [8]:
audio_gen.train_model(input_to_softmax=model_0, 
            pickle_path='model_0.pickle', 
            save_model_path='model_0.h5',
            spectrogram=True,
           ) 



  hist = model.fit_generator(


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 6: early stopping


# Bidirectional RNN

In [9]:
def brnn_tdd_model(input_dim, units, activation, output_dim=29):
    # Input
    input_data = Input(name='the_input', shape=(None, input_dim))
    # Bidirectional recurrent layer
    brnn = Bidirectional(LSTM(units, activation=activation, 
        return_sequences=True, implementation=2, name='brnn'))(input_data)
    # TimeDistributed Dense layer
    time_dense = TimeDistributed(Dense(output_dim))(brnn)
    # Softmax activation layer
    y_pred = Activation('softmax', name='softmax')(time_dense)
    # Specifying the model
    model = Model(inputs=input_data, outputs=y_pred)
    model.output_length = lambda x: x
    print(model.summary())
    return model

In [10]:
model_2 = brnn_tdd_model(input_dim=161, units=200, activation='relu') # 161 for Spectrogram/13 for MFCC

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 the_input (InputLayer)      [(None, None, 161)]       0         
                                                                 
 bidirectional (Bidirectiona  (None, None, 400)        579200    
 l)                                                              
                                                                 
 time_distributed (TimeDistr  (None, None, 29)         11629     
 ibuted)                                                         
                                                                 
 softmax (Activation)        (None, None, 29)          0         
                                                                 
Total params: 590,829
Trainable params: 590,829
Non-trainable params: 0
_________________________________________________________________
None


In [11]:
audio_gen.train_model(input_to_softmax=model_2, 
            pickle_path='model_2.pickle', 
            save_model_path='model_2.h5', 
            spectrogram=True)

Epoch 1/30


  hist = model.fit_generator(


  1/583 [..............................] - ETA: 23:58 - loss: 6514.1245Batch 1: Invalid loss, terminating training


# CNN + Deeper Bidirectional RNN

In [12]:
def cnn_deep_brnn_tdd_model(input_dim, filters, activation, kernel_size, conv_stride,
    conv_border_mode, recur_layers, units, output_dim=29):
    # Input
    input_data = Input(name='the_input', shape=(None, input_dim))
    # Convolutional layer
    conv_1d = Conv1D(filters, kernel_size, 
                     strides=conv_stride, 
                     padding=conv_border_mode,
                     activation=activation,
                     name='conv1d')(input_data)
    # Batch normalization
    bn_cnn = BatchNormalization()(conv_1d)
    # Bidirectional recurrent layer
    brnn = Bidirectional(GRU(units, activation=activation, 
        return_sequences=True, name='brnn'))(bn_cnn)
    # Batch normalization 
    bn_rnn = BatchNormalization()(brnn)
    # Loop for additional layers
    for i in range(recur_layers - 1):
        name = 'brnn_' + str(i + 1)
        brnn = Bidirectional(GRU(units, activation=activation, 
        return_sequences=True, implementation=2, name=name))(bn_rnn)
        bn_rnn = BatchNormalization()(brnn)
    # TimeDistributed Dense layer
    time_dense = TimeDistributed(Dense(output_dim))(bn_rnn)
    # Softmax activation layer
    y_pred = Activation('softmax', name='softmax')(time_dense)
    # Specifying the model
    model = Model(inputs=input_data, outputs=y_pred)
    model.output_length = lambda x: audio_gen.cnn_output_length(
        x, kernel_size, conv_border_mode, conv_stride)
    print(model.summary())
    return model

In [13]:
model_3 = cnn_deep_brnn_tdd_model(input_dim=161, # 161 for Spectrogram/13 for MFCC
                                  filters=200,
                                  activation='relu',
                                  kernel_size=11, 
                                  conv_stride=2,
                                  conv_border_mode='valid',
                                  recur_layers=2,
                                  units=200)

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 the_input (InputLayer)      [(None, None, 161)]       0         
                                                                 
 conv1d (Conv1D)             (None, None, 200)         354400    
                                                                 
 batch_normalization (BatchN  (None, None, 200)        800       
 ormalization)                                                   
                                                                 
 bidirectional_1 (Bidirectio  (None, None, 400)        482400    
 nal)                                                            
                                                                 
 batch_normalization_1 (Batc  (None, None, 400)        1600      
 hNormalization)                                                 
                                                           

In [14]:
audio_gen.train_model(input_to_softmax=model_3, 
            pickle_path='model_3.pickle', 
            save_model_path='model_3.h5', 
            spectrogram=True) # True for Spectrogram/False for MFCC

Epoch 1/30


  hist = model.fit_generator(


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
 67/583 [==>...........................] - ETA: 7:37 - loss: -0.6924

KeyboardInterrupt: 