<a href="https://colab.research.google.com/github/paulinakaszuba94/dataworkshop_matrix3/blob/master/day5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install hyperopt



In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage import color, exposure
import os
import datetime

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score

%reload_ext tensorboard

from hyperopt import hp, STATUS_OK, tpe, Trials, fmin

In [3]:
cd '/content/drive/My Drive/Colab Notebooks/dataworkshop_matrix/matrix3/dataworkshop_matrix3'

/content/drive/My Drive/Colab Notebooks/dataworkshop_matrix/matrix3/dataworkshop_matrix3


In [0]:
train = pd.read_pickle('data/train.p')
test = pd.read_pickle('data/test.p')

X_train, y_train = train['features'], train['labels']
X_test, y_test = test['features'], test['labels']

df = pd.read_csv('data/signnames.csv')
labels_dict = df.to_dict()['b']

In [0]:
if y_train.ndim == 1: y_train = to_categorical(y_train)
if y_test.ndim == 1: y_test = to_categorical(y_test)

In [0]:
input_shape = X_train.shape[1:]
num_classes = y_train.shape[1]

In [0]:
def train_model(model, X_train, y_train, params_fit={}):
  model.compile(loss= 'categorical_crossentropy', optimizer='Adam', metrics='accuracy')

  logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
  tensorboard_callback = tf.keras.callbacks.TensorBoard( logdir, histogram_freq=1)
  
  model.fit(
      X_train, 
      y_train,
      batch_size=params_fit.get('batch size', 128),
      epochs=params_fit.get('epochs', 5),
      verbose=params_fit.get('verbose', 1),
      validation_data=params_fit.get('validation data', (X_train, y_train)),
      callbacks=[tensorboard_callback]
  )
  return model

def predict(model_trained, X_test, y_test, scoring=accuracy_score):
  y_test_norm = np.argmax(y_test, axis=1)

  y_pred_prob = model_trained.predict(X_test)
  y_pred = np.argmax(y_pred_prob, axis=1)

  return scoring(y_test_norm, y_pred)

In [0]:
def get_cnn_v5(input_shape, num_classes):
  return Sequential([
  Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=input_shape),
  Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=input_shape),
  Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
  MaxPool2D(),
  Dropout(0.3),
  
  Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
  Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
  MaxPool2D(),
  Dropout(0.3),

  Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
  Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
  MaxPool2D(),
  Dropout(0.3),

  Flatten(),

  Dense(1024, activation='relu'),
  Dropout(0.3),

  Dense(1024, activation='relu'),
  Dropout(0.3),

  Dense(num_classes, activation='softmax')
  ])

In [20]:
model = get_cnn_v5(input_shape, num_classes)
model_trained = train_model(model, X_train, y_train)

predict(model_trained, X_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


0.964172335600907

In [21]:
model_trained.evaluate(X_test, y_test)



[0.16561594605445862, 0.96417236328125]

In [0]:
def get_model(params):
  return Sequential([
  Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=input_shape),
  Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
  MaxPool2D(),
  Dropout(params['dropout_cnn_block_one']),
  
  Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
  Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
  MaxPool2D(),
  Dropout(params['dropout_cnn_block_two']),

  Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same'),
  Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
  MaxPool2D(),
  Dropout(params['dropout_cnn_block_three']),

  Flatten(),

  Dense(1024, activation='relu'),
  Dropout(params['dropout_dense_block_one']),

  Dense(1024, activation='relu'),
  Dropout(params['dropout_dense_block_two']),

  Dense(num_classes, activation='softmax')
  ])

In [0]:
def func_obj(params):
  model = get_model(params)
  model.compile(loss= 'categorical_crossentropy', optimizer='Adam', metrics=[ 'accuracy' ])
  
  model.fit(
      X_train, 
      y_train,
      batch_size=int(params.get('batch size', 128)),
      epochs=5,
      verbose=0
  )

  score = model.evaluate(X_test, y_test, verbose=0)
  accuracy = score[1]
  print(params, 'accuracy = {}'.format(accuracy))

  return {'loss': -accuracy, 'status': STATUS_OK, 'model': model} 
  #the function wants to minimize. accuracy attempts to be "1". to combine it, the accuracy must be presented as going to "-1".


In [35]:
space = {
    'batch size' : hp.quniform('batch size', 100, 200, 10),
    'dropout_cnn_block_one': hp.uniform('dropout_cnn_block_one', 0.3, 0.5),
    'dropout_cnn_block_two': hp.uniform('dropout_cnn_block_two', 0.3, 0.5),
    'dropout_cnn_block_three': hp.uniform('dropout_cnn_block_three', 0.3, 0.5),
    'dropout_dense_block_one': hp.uniform('dropout_dense_block_one', 0.3, 0.7),
    'dropout_dense_block_two': hp.uniform('dropout_dense_block_two', 0.3, 0.7)
}

best = fmin(
    func_obj,
    space,
    tpe.suggest,
    30,
    Trials()
)

{'batch size': 120.0, 'dropout_cnn_block_one': 0.4601859486602975, 'dropout_cnn_block_three': 0.47138276303558224, 'dropout_cnn_block_two': 0.4868418903047815, 'dropout_dense_block_one': 0.36623570062283045, 'dropout_dense_block_two': 0.40255604176628557}
accuracy = 0.9251700639724731
{'batch size': 200.0, 'dropout_cnn_block_one': 0.34474095229990936, 'dropout_cnn_block_three': 0.3055805801516353, 'dropout_cnn_block_two': 0.3113889668585235, 'dropout_dense_block_one': 0.5103362572793112, 'dropout_dense_block_two': 0.6637268085487114}
accuracy = 0.9442176818847656
{'batch size': 180.0, 'dropout_cnn_block_one': 0.446726375471451, 'dropout_cnn_block_three': 0.38515542764639044, 'dropout_cnn_block_two': 0.3971770171416992, 'dropout_dense_block_one': 0.5015997014444543, 'dropout_dense_block_two': 0.5430038289293266}
accuracy = 0.8795918226242065
{'batch size': 190.0, 'dropout_cnn_block_one': 0.466490658803859, 'dropout_cnn_block_three': 0.3481629524027034, 'dropout_cnn_block_two': 0.3881128