## Importing Dependencies

In [1]:
import os
import keras
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.optimizers import SGD, Adam
from keras.regularizers import l2

## Reading Dataset

In [22]:
df = pd.read_pickle(os.path.join(os.getcwd(), 'gisette.pickle'), compression='infer')

## Summary of the Loaded Dataset

In [23]:
df.keys()

dict_keys(['training', 'validation', 'testing'])

In [24]:
print(type(df['training']))
print(df['training'].keys())
print(df['training']['labels'][44])

<class 'dict'>
dict_keys(['data', 'labels'])
1.0


## Train, Test and Validation split
#### Since the test data doesnot have labels, so training data is split into 5k and 1k

In [25]:
dataset_train_x = df['training']['data']
dataset_train_y = df['training']['labels']

dataset_validation_x = df['validation']['data']
dataset_validation_y = df['validation']['labels']

dataset_test_x = df['training']['data'][5000:]
dataset_test_y = df['training']['labels'][5000:]

dataset_train_x = df['training']['data'][:5000]
dataset_train_y = df['training']['labels'][:5000]

## Summary of Split

In [6]:
print("Training   : ", dataset_train_x.shape, dataset_train_y.shape)
print("validation : ", dataset_validation_x.shape, dataset_validation_y.shape)
print("Testing    : ", dataset_test_x.shape, dataset_test_y.shape)

Training   :  (5000, 5000) (5000,)
validation :  (1000, 5000) (1000,)
Testing    :  (1000, 5000) (1000,)


## Pre-Processing the Values
#### Changing labels to {0,1} and the attributes value to [0,1]

In [26]:
dataset_train_x = dataset_train_x/1000
dataset_validation_x = dataset_validation_x/1000
dataset_test_x = dataset_test_x/1000

def change_label(lt):
    res = []
    for x in lt:
        if x == -1:
            res.append(0)
        else :
            res.append(1)
    return res

print(dataset_test_y[0:5])
dataset_train_y = np.array(change_label(dataset_train_y))
dataset_validation_y = np.array(change_label(dataset_validation_y))
dataset_test_y = np.array(change_label(dataset_test_y))
print("tarin",dataset_train_y[0:5])
print("val",dataset_validation_y[0:5])
print("test",dataset_test_y[0:5])

print("tarin",dataset_train_x[0])
print("val",dataset_validation_x[0])
print("test",dataset_test_x[0])

[ 1.  1.  1. -1.  1.]
tarin [1 0 1 1 1]
val [1 1 0 1 1]
test [1 1 1 0 1]
tarin [0.55  0.    0.495 ... 0.    0.    0.983]
val [0.688 0.    0.    ... 0.    0.769 0.   ]
test [0. 0. 0. ... 0. 0. 0.]


## Analyzing the Attributes

In [28]:
print("MAX :", max(dataset_train_x[0][:]))
print("MIN :", min(dataset_train_x[0][:]))

MAX : 0.999
MIN : 0.0


## Callback to visualize the training epochs

In [29]:
class real_time_callback(tf.keras.callbacks.Callback):

    def __init__(self):
        super().__init__()
        self.loss_list = []
        self.val_loss = []
        self.acc_list = []
        self.val_acc = []

    def on_epoch_end(self, epoch, logs={}): 
      self.loss_list.append(logs.get('loss'))
      self.val_loss.append(logs.get('val_loss'))
      self.acc_list.append(logs.get('binary_accuracy'))
      #self.acc_list.append(logs.get('accuracy'))
      self.val_acc.append(logs.get('val_binary_accuracy'))
      #self.val_acc.append(logs.get('val_accuracy'))

      fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20,4))

      ax1.plot(self.loss_list)
      ax1.set_title('model loss')
      ax1.set_ylabel('loss')
      ax1.set_xlabel('epoch')
      ax1.legend(['train', 'val'], loc='upper left')
      ax1.plot(self.val_loss)

      ax2.set_title('model accuracy')
      ax2.set_ylabel('accuracy')
      ax2.set_xlabel('epoch')
      ax2.legend(['train', 'val'], loc='upper left')
      ax2.plot(self.acc_list)
      ax2.plot(self.val_acc)
      plt.show()

## Perfroming Search and Training model to find Optimal Capacity

In [None]:
filter_1 = 0
filter_2 = 0

EPOCH = 1000

flters = [[2,0] , [4,0], [8,0], [16,0], [32,0],
          [2,2] , [4,2], [8,2], [16,2], [32,2],
          [2,4] , [4,4], [8,4], [16,4], [32,4],
          [2,8] , [4,8], [8,8], [16,8], [32,8],
          [2,16] , [4,16], [8,16], [16,16], [32,16],
          [2,23] , [4,32], [8,32], [16,32], [32,32]
]

for c,i in enumerate(flters):
    filter_1 = i[0]
    filter_2 = i[1]
    model = Sequential()
    model.add(Dense(filter_1, input_dim=dataset_train_x.shape[1], activation='tanh'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    if filter_2 > 0:
        model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dense(1, activation='sigmoid'))

    #model.summary()
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.000001), loss=keras.losses.BinaryCrossentropy(), metrics=['binary_accuracy'])

    history = model.fit(dataset_train_x, dataset_train_y, validation_data=(dataset_validation_x, dataset_validation_y), epochs=EPOCH, batch_size=32, callbacks=[real_time_callback()])
    
    file_name = 'model_' + str(filter_1) + '_' + str(filter_2) + '_' + str(EPOCH)
    model.save('model/' + file_name)
    with open('history/' + file_name, 'wb') as file_pi:
        pickle.dump(history.history, file_pi)

## Read History of the Trained model

In [None]:
with open('history/model_32_32_1000', "rb") as file_pi:
    hist = pickle.load(file_pi)
    print(hist)

## Manual Prediction of labels

In [12]:
print(model.predict(dataset_train_x[:5]), dataset_train_y[:5])

[[0.98584807]
 [0.01081104]
 [0.99679893]
 [0.9471442 ]
 [0.98803836]] [1 0 1 1 1]


## Loading the trained models

In [32]:
EPOCH = 1000
filter_1 = 16
filter_2 = 0
file_name = 'model_' + str(filter_1) + '_' + str(filter_2) + '_' + str(EPOCH)
loaded_model = keras.models.load_model('model/' + file_name)

dataset_x = dataset_test_x 
dataset_y = dataset_test_y
count = len(dataset_test_y)

print(loaded_model.evaluate(dataset_x[:count], dataset_y[:count]))

[0.06745307892560959, 0.9800000190734863]


In [33]:
loaded_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 16)                80016     
                                                                 
 batch_normalization_3 (Batc  (None, 16)               64        
 hNormalization)                                                 
                                                                 
 dropout_3 (Dropout)         (None, 16)                0         
                                                                 
 dense_7 (Dense)             (None, 1)                 17        
                                                                 
Total params: 80,097
Trainable params: 80,065
Non-trainable params: 32
_________________________________________________________________


## Draw Model Layer Sequence

In [34]:
import visualkeras
visualkeras.layered_view(loaded_model, legend=True, to_file='output_model.png').show()

Traceback (most recent call last):
  File "<string>", line 1, in <module>
KeyboardInterrupt
