## Preparation

In [1]:
from comet_ml import Experiment
import keras
from keras import models, layers
from keras.optimizers import RMSprop
import numpy as np
from kapre.time_frequency import Melspectrogram
from kapre.augmentation import AdditiveNoise
from kapre.utils import Normalization2D
from keras import regularizers

import sys
import os
sys.path.append('../')
from utils import DataFeed

Using TensorFlow backend.


In [2]:
# to avoid different initizialization of weights
np.random.seed(42)

## Data

In [3]:
data_path = '../preprocessing/preprocessed_data'

train_data, train_labels = DataFeed.Dataset.create(data_path, ['train/voxforge', 'train/youtube'], num=50000, use_premade=True)
val_data, val_labels = DataFeed.Dataset.create(data_path, ['val/youtube', 'val/voxforge'], num=-1, shuffle=True)

## Partly Freezed Model

In [5]:
from keras import Input

In [15]:
Input_tensor = Input(shape=(1, 5 * 16000,))

Freezed_layer = Melspectrogram(n_dft=512,
                               padding='same', sr=16000, n_mels=28,
                               fmin=0.0, fmax=10000, power_melgram=1.0,
                               return_decibel_melgram=False, trainable_fb=False,
                               trainable_kernel=False)(Input_tensor)

main_layers = models.Sequential([
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.3),
    layers.Flatten(),
    layers.Dense(1048, activation='relu'),
    layers.Dense(3, activation='softmax')
])

model_1 = models.Model(Input_tensor, main_layers(Freezed_layer))

In [16]:
model_1.compile(optimizer=RMSprop(),
              metrics=['accuracy'],
              loss='categorical_crossentropy')

## Unfrozen Model

In [17]:
Unfreezed_layer = Melspectrogram(n_dft=512,
                               padding='same', sr=16000, n_mels=28,
                               fmin=0.0, fmax=10000, power_melgram=1.0,
                               return_decibel_melgram=False, trainable_fb=False,
                               trainable_kernel=True)(Input_tensor)

model_2 = models.Model(Input_tensor, main_layers(Unfreezed_layer))

In [21]:
model_2.compile(optimizer=RMSprop(lr=0.0005),
              metrics=['accuracy'],
              loss='categorical_crossentropy')

## Training

In [20]:
model_1.fit(x=train_data,
                        y=train_labels,
                        batch_size=128, 
                        epochs=8,
                        validation_data=(val_data, val_labels), 
                        shuffle=True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f96211eb8d0>

In [22]:
model_2.fit(x=train_data,
                        y=train_labels,
                        batch_size=128, 
                        epochs=3,
                        validation_data=(val_data, val_labels), 
                        shuffle=True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f961b7d0b70>