# Help
This document explains all lines of code that are used in the individual model notebooks.

### Importieren von Bibliotheken

In [2]:
# Experiment can be used to log the training process on an external server
from comet_ml import Experiment

# Libary for Deep Learning
import keras

# Keras.layers contains all different types of layers 
from keras import layers

# Keras.models.Model transforms a stack of layers in an actual Model
from keras import models

# Contains different regularization methods to apply to layers
from keras import regularizers

# Numpy is used for arrays and numerical computation
import numpy as np

# Matplotlib can create Diagrams and other Figures
import matplotlib.pyplot as plt

# Keras Layer that applies the FFT to the raw input audio
from kapre.time_frequency import Melspectrogram

# Keras Layer that normalizes inputs
from kapre.utils import Normalization2D

# In order to import the self-written code in an other directory,
# the path is added
import sys
sys.path.append('../')

# Import the self-written DataFeed class to load Data
from utils import DataFeed

### Random Seed

In [None]:
# Sets the numpy random seed
# this way the network weights will always be initialized the same way -> better comparable
np.random.seed(42)

### Loading Data

In [None]:
# path to the common parent folder of voxforge, youtube and librivox data
data_path= '../preprocessing/preprocessed_data'

# Load 50000 samples from the 'train/voxforge' and 'train/youtube' folder
#     If premade is true: load the predefined selection of 50'000 samples (originally there are 100000)
#     train_data is array of 50'000 samples, train_labels is array of 50'000 solutions
#     If shuffle=True: shuffle order of samples
train_data, train_labels = DataFeed.Dataset.create(data_path, ['train/voxforge', 'train/youtube'], num=50000, use_premade=True)

# Same as above but load validation data
#     num=-1 indicates to load all available data
val_data, val_labels = DataFeed.Dataset.create(data_path, ['val/youtube', 'val/voxforge'], num=-1, shuffle=True)

# Instead of the procedure above, that load the entire data into RAM, use a generator
#     Batch_size indicates how many samples will be processed at once
#     The data will slowly arrive in Batches while Training
training_generator = DataFeed.DataGenerator(data_path, ['train/voxforge', 'train/youtube'], num=-1, batch_size=128)

### Callbacks

In [3]:
# Callbacks are executex at the end of each epoch (epoch means having processed all samples once)
callbacks = [
    # This callbacks looks at the val_acc and stops the training
    # if the validation accuracy hasn't improved five times in a row
    keras.callbacks.EarlyStopping(monitor='val_acc', patience=3),
    
    # This callback looks at the validation loss, if it is the best so far, it saves the model locally (as .h5)
    keras.callbacks.ModelCheckpoint('berlin_net.h5', monitor='val_loss', save_best_only=True)]

### Creating the Model

In [4]:
# Creates the instance of a sequential Model
# 'Sequential' means that all layers are simply chained after another
model = models.Sequential()

# Model.add(layer) adds the layer at the end of the model
# Mespectrogram is special layer that applies the FFT to the input data
#     input_shape: (num_channels, sample) sample = 5s * sr = 5* 16k = 80000, audio is always Mono
#     n_dft: The number of DFT points, presumably power of 2
#     fmin: minimal frequency
#     fmax: maximal frequency
#     sr: sample rate
#     n_mels: number of frequency buckets = height of image
#     trainable_x: allow the paramters to train
#     return_decibel_melgram: convert to decibel
model.add(Melspectrogram(n_dft=512, input_shape=(1, 5 * 16000,),
                         padding='same', sr=16000, n_mels=28,
                         fmin=0.0, fmax=10000, power_melgram=1.0,
                         return_decibel_melgram=True, trainable_fb=False,
                         trainable_kernel=False))

# Normalizes entire data sample
model.add(Normalization2D(str_axis='data_sample'))

# A 2D Convolutional Layer
#     with 64 nodes
#     each node looks at 3x3 nodes from the previous layer
#     activation_function is ReLU
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# A Maxpooling 2D Layer over the area 2x2
model.add(layers.MaxPooling2D((2, 2)))

# Use 30% Dropout for next layer
model.add(layers.Dropout(0.3))

# If the output of the last layer is multi-dimensional, flatten it
# e.g [[1],[1]] -> [1, 1]
model.add(layers.Flatten())

# Fully Connected Layer with 1024 Nodes
model.add(layers.Dense(1024, activation='relu'))

# Fully Connected Layer with 3 nodes and activation function = Softmax
model.add(layers.Dense(3, activation='softmax'))

# Prints a summary of the network
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
melspectrogram_1 (Melspectro (None, 28, 313, 1)        270364    
_________________________________________________________________
normalization2d_1 (Normaliza (None, 28, 313, 1)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 26, 311, 64)       640       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 155, 64)       0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 13, 155, 64)       0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 128960)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              132056064 
__________

The summary shows the number of paramters per layer and the output shape of each layer.

### Finishing the Model

In [5]:
# Sets import settings of the network
#     the optimizer is RMSprop, another option would be Stochastic Gradient Descent
#     the network computes the accuracy and the mae=mean absolute error after each epoch
#     the network tries to optimize the categorical_crossentropy
model.compile(optimizer='RMSprop',
              metrics=['accuracy', 'mae'],
              loss='categorical_crossentropy')

### Training the Model

In [None]:
# History will be dictionary with the data about the training process
# model.fit launches the training loop
# batch_size defines how many samples to process at once
# epochs=16 means thath the model will process every sample 16 times
history = model.fit(x=train_data,
                    y=train_labels,
                    batch_size=128, 
                    epochs=16,
                    validation_data=(val_data, val_labels), 
                    shuffle=True,
                    callbacks=callbacks)

In [None]:
# To train it's either model.fit or model.fit_generator. If the data is in form of a generator, choose the later
#    worker=8 means to run on 8 threads
#    max_queue_size limit the length of the queue to wait for training
history = model.fit_generator(generator=training_generator,
                              epochs=10,
                              validation_data=(val_data, val_labels), 
                              shuffle=True,
                              use_multiprocessing=True,
                              workers=8,
                              max_queue_size=20,          
                              callbacks=callbacks)

### Evaluating a Model

In [None]:
# Load a keras Model from a .h5 file
#     custom_objects=[...] is required for it to recognize the kapre layers
loaded_model = keras.models.load_model('berlin_net/berlin_net.h5', custom_objects={'Melspectrogram':Melspectrogram, 
                                                                             'Normalization2D': Normalization2D})
# Evaluate a Model on test_data
#     model.evaluate return array with accuracy and loss
#     model.metrics_names return array with names of used metrics
loaded_model.evaluate(x=test_data,
                      y=test_labels), loaded_model.metrics_names