# Deep Convnets

In [1]:
import keras
from keras import models, layers
import numpy as np
import matplotlib.pyplot as plt
from kapre.time_frequency import Melspectrogram
from kapre.utils import Normalization2D
import sys
sys.path.append('../')
from utils import DataFeed
from keras import regularizers
from keras.optimizers import RMSprop, Nadam
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


### Load Data

In [2]:
data_path='../preprocessing/preprocessed_data'

In [None]:
train_data, train_labels = DataFeed.Dataset.create(data_path, ['train/voxforge', 'train/youtube'], num=50000, use_premade=True)

In [3]:
val_data, val_labels = DataFeed.Dataset.create(data_path, ['val/youtube', 'val/voxforge'], num=-1, shuffle=True)

In [11]:
training_generator = DataFeed.DataGenerator(data_path, ['train/voxforge', 'train/youtube', 'train/librivox'], num=-1, batch_size=64)

In [5]:
callbacks = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=5),
             keras.callbacks.ModelCheckpoint('conv2d_deep.h5', monitor='val_loss', save_best_only=True)]

## Topcoder_net
based on [repo](https://github.com/pietz/language-recognition/blob/master/Language%20Classifier.ipynb)

In [24]:
model = models.Sequential()
model.add(Melspectrogram(n_dft=512, input_shape=(1, 5 * 16000,),
                         padding='same', sr=16000, n_mels=192, n_hop=418,
                         fmin=0.0, fmax=8000, power_melgram=1.0,
                         return_decibel_melgram=True, trainable_fb=False,
                         trainable_kernel=False))
model.add(Normalization2d(str_axis='data_sample'))
model.add(layers.Conv2D(16, (3, 3), activation='elu', padding='same'))
model.add(layers.MaxPooling2D())

model.add(layers.Conv2D(32, (3, 3), activation='elu', padding='same'))
model.add(layers.MaxPooling2D())

model.add(layers.Conv2D(64, (3, 3), activation='elu', padding='same'))
model.add(layers.MaxPooling2D())

model.add(layers.Conv2D(128, (3, 3), activation='elu', padding='same'))
model.add(layers.MaxPooling2D())

model.add(layers.Conv2D(256, (3, 3), activation='elu', padding='same'))
model.add(layers.MaxPooling2D())

model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(3, activation='softmax'))
model.summary()

=> up to 86% val acc

## Resnet

In [14]:
from keras.applications import vgg16, resnet50
from keras_contrib.applications.resnet import ResNet18, ResNet

In [41]:
datagen = ImageDataGenerator(
                            featurewise_center=True,
                            featurewise_std_normalization=True,
                            rotation_range=20,
                            width_shift_range=0.2,
                            height_shift_range=0.2,
                            horizontal_flip=True,
                            dtype='float32')

In [43]:
model = models.Sequential()
model.add(Melspectrogram(n_dft=512, input_shape=(1, 5 * 16000,),
                         padding='same', sr=16000, n_mels=192, n_hop=418,
                         fmin=0.0, fmax=8000, power_melgram=1.0,
                         return_decibel_melgram=False, trainable_fb=False,
                         trainable_kernel=False))
#model.add(layers.Lambda(baum)) try using data augmentation
model.add(ResNet(input_shape=(192, 192, 1), classes=3, block='basic', repetitions=[2, 2, 2], dropout=0.5))
model.summary()

reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
melspectrogram_18 (Melspectr (None, 192, 192, 1)       312512    
_________________________________________________________________
model_8 (Model)              (None, 3)                 2788419   
Total params: 3,100,931
Trainable params: 2,783,811
Non-trainable params: 317,120
_________________________________________________________________


=> up to 76% val_acc, overfits

## Mobilenet

In [4]:
from keras.applications.mobilenetv2 import MobileNetV2

In [21]:
input_tensor = layers.Input(shape=(1, 80000))
x = Melspectrogram(n_dft=512,
                   padding='same', sr=16000, n_mels=223, n_hop=360,
                   fmin=0.0, fmax=10000, power_melgram=1.0,
                   return_decibel_melgram=False, trainable_fb=False,
                   trainable_kernel=False)(input_tensor)

inception_model = MobileNetV2(include_top=True, input_tensor=x, weights=None,
                              alpha=0.25, classes=3)

x = inception_model.output


model = models.Model(input_tensor, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_16 (InputLayer)           (None, 1, 80000)     0                                            
__________________________________________________________________________________________________
melspectrogram_16 (Melspectrogr (None, 223, 223, 1)  320479      input_16[0][0]                   
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 1)  0           melspectrogram_16[0][0]          
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 8)  72          Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (

### Train Model

In [8]:
model.compile(optimizer=RMSprop(),
              metrics=['accuracy'],
              loss='categorical_crossentropy')

In [10]:
history = model.fit_generator(generator=training_generator,
                        epochs=20,
                        validation_data=(val_data, val_labels), 
                        shuffle=True,
                        use_multiprocessing=True,
                        workers=8,
                        max_queue_size=20,          
                        callbacks=callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
 1276/14135 [=>............................] - ETA: 14:46 - loss: 0.0769 - acc: 0.9735

Process ForkPoolWorker-107:
Process ForkPoolWorker-108:
Process ForkPoolWorker-110:
Process ForkPoolWorker-106:
Process ForkPoolWorker-111:
Process ForkPoolWorker-105:
Process ForkPoolWorker-112:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
 

KeyboardInterrupt: 

In [12]:
history = model.fit_generator(generator=training_generator,
                        epochs=5,
                        validation_data=(val_data, val_labels), 
                        shuffle=True,
                        use_multiprocessing=True,
                        workers=8,
                        max_queue_size=20,          
                        callbacks=callbacks)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
