<a href="https://colab.research.google.com/github/opprud/ml/blob/master/walkthrough_MOJ.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, Dense, Dropout, BatchNormalization, MaxPooling1D, Activation, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import plot_model
from tensorflow.keras.regularizers import l2
import preprocess
from tensorflow.keras.callbacks import TensorBoard
import numpy as np

In [0]:
import os
import zipfile

local_zip = 'data.zip'

zip_ref = zipfile.ZipFile(local_zip, 'r')

zip_ref.extractall('/data')
zip_ref.close()

In [0]:
# Training parametera
batch_size = 128
epochs = 20
num_classes = 10
length = 2048
BatchNorm = True # Whether to batch normalize
number = 1000 # Number of samples per class
normal = True # Is it standardized?
rate = [0.7,0.2,0.1] # Test set verification set division ratio

## import train/test data

In [0]:
#path = r'data\0HP'
path = r'./data/0HP'

x_train, y_train, x_valid, y_valid, x_test, y_test = preprocess.prepro(d_path=path,length=length,
                                                                  number=number,
                                                                  normal=normal,
                                                                  rate=rate,
                                                                  enc=True, enc_step=28)
# When you input the convolution, you need to modify it to increase the number of channels
x_train, x_valid, x_test = x_train[:,:,np.newaxis], x_valid[:,:,np.newaxis], x_test[:,:,np.newaxis]
# Enter the dimensions of the data
input_shape =x_train.shape[1:]

print('Training sample dimension:', x_train.shape)
print(x_train.shape[0], 'Number of training samples')
print('Verify the dimensions of the sample', x_valid.shape)
print(x_valid.shape[0], 'Verify the number of samples')
print('Test sample dimensions', x_test.shape)
print(x_test.shape[0], 'Number of test samples')

## Asses class imbalance
Count the number of occurences in the training dataset, to see wether we have class imbalance
The numbers show we have an equal number of active outputs ('1') in each row... so all is OK

In [0]:
!pwd

In [0]:
import pandas as pd

df = pd.DataFrame(y_test)
print(df.head())

for i in range(10):
    print(df[i].value_counts())


# Define model
wdcnn layer definition, each layer contains
* A 1D conviolution filter
* Optional batch normalisation
* A relu activation
* A MAX pool for dimesionality reduction

In [0]:
# Defining the convolution layer
def wdcnn(filters, kernerl_size, strides, conv_padding, pool_padding,  pool_size, BatchNormal):
    """wdcnn Layer neuron

    :param filters: Number of convolution kernels, integer
    :param kernerl_size: Convolution kernel size, integer
    :param strides: Step size, integer
    :param conv_padding: 'same','valid'
    :param pool_padding: 'same','valid'
    :param pool_size: Pooled layer core size, integer
    :param BatchNormal: Whether Batchnormal, Boolean
    :return: model
    """
    model.add(Conv1D(filters=filters, kernel_size=kernerl_size, strides=strides,
                     padding=conv_padding, kernel_regularizer=l2(1e-4)))
    if BatchNormal:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=pool_size, padding=pool_padding))
    return model


## Build a sequential deep model
The model is composed of a number of layers, as proposed in the article.
The model is build from 1Dconvolutional filters combined with BatchNormalisation to ensure the full dynamic range of the layers is utilised.
The model is composed as follows
* Five convolutional layers
* A flattening layer before
* Two fully connected layers

**Relu** functions are used for activation, and in the last layer a **Softmax** activation, to allow classification into **num_classes** categories.

In [0]:
# Instantiated sequential model
model = Sequential()
# Set up the input layer, the first layer of convolution. Because you want to specify input_shape, it is released separately.
model.add(Conv1D(filters=16, kernel_size=64, strides=16, padding='same',kernel_regularizer=l2(1e-4), input_shape=input_shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2))

# second layer conv
model = wdcnn(filters=32, kernerl_size=3, strides=1, conv_padding='same',
              pool_padding='valid',  pool_size=2, BatchNormal=BatchNorm)
# Third layer conv
model = wdcnn(filters=64, kernerl_size=3, strides=1, conv_padding='same',
              pool_padding='valid', pool_size=2, BatchNormal=BatchNorm)
# Fourth layer conv
model = wdcnn(filters=64, kernerl_size=3, strides=1, conv_padding='same',
              pool_padding='valid', pool_size=2, BatchNormal=BatchNorm)
# Fifth layer conv
model = wdcnn(filters=64, kernerl_size=3, strides=1, conv_padding='valid',
              pool_padding='valid', pool_size=2, BatchNormal=BatchNorm)
# Flatten from convolution to full connection
model.add(Flatten())

# Flatten from convolution to full connection
model.add(Dense(units=100, activation='relu', kernel_regularizer=l2(1e-4)))
# Decrease the output layer
model.add(Dense(units=num_classes, activation='softmax', kernel_regularizer=l2(1e-4)))


# Compiling the model The evaluation function is similar to the loss function, 
# but the results of the evaluation function are not used in the training process.
model.compile(optimizer='Adam', loss='categorical_crossentropy',metrics=['accuracy'])

# TensorBoard Call to check the training situation
tb_cb = TensorBoard(log_dir='logs')


In [0]:
model.summary()

In [0]:
!pwd

## optional skip traning if models has been saved
Using google colab and GPU for training is much more efficient

In [0]:
# Start model training 
model.fit(x=x_train, y=y_train, batch_size=batch_size, epochs=epochs,
          verbose=1, validation_data=(x_valid, y_valid), shuffle=True,
          callbacks=[tb_cb])

## Save model

In [0]:
import os

save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'keras_wdcnn_2.h5'

# Save model and weights
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
    
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)


## Load pretrained model instead

In [0]:
!ls savedModels/

In [0]:
model.load_weights("./saved_models/keras_wdcnn_2.h5")
print("Loaded model from disk")


In [0]:
# evaluate
score = model.evaluate(x=x_train, y=y_train, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [0]:
# evaluate
score = model.evaluate(x=x_test, y=y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


## Plot model layers

In [0]:
plot_model(model=model, to_file='wdcnn.png', show_shapes=True)

In [0]:
!ls

In [0]:
!tensorboard --logdir='logs'