# Avaliando landmarks - ResNet18 - GridSearch(epoch vs batch size)

## Import data

In [1]:
# !pip install --upgrade pip

In [2]:
## Run on aws ec2 machine (conda_tersorflow_p36 kernel)
# !pip install tensorflow==1.13.1
# !pip install image-classifiers
# !pip install tensorflow-gpu==1.13.1

In [3]:
import os
import tensorflow as tf
from keras import callbacks, Model
import matplotlib.pyplot as plt
import numpy as np
import keras
from keras import optimizers
from keras.utils.io_utils import HDF5Matrix
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Activation, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import random
import seaborn as sns
from sklearn import datasets, metrics

%matplotlib inline

Using TensorFlow backend.


In [4]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 4127605359904207207
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 12705287089786501085
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 6732846262141161139
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 11330115994
locality {
  bus_id: 1
  links {
  }
}
incarnation: 10648547885565590165
physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:1e.0, compute capability: 3.7"
]


## Classificando Landmarks (Análise dos dados)

### Lendo o conjunto de dados

In [5]:
seed = random.seed(42)

sample_datagen = ImageDataGenerator(rescale=1./255)
base_path = '/home/ubuntu/landmarks/landmarks'
target_size = (224, 224)
input_shape = (224, 224, 3)
classes = ["47378", "120885", "85758", "180901", "48522", "101399", 
           "190822", "97734", "146250", "186080", "21253", "142644", 
           "31531", "165596", "56827", "38482", "20102", "178519", 
           "152827", "173511"]

seed = 7
np.random.seed(seed)
sample_generator = sample_datagen.flow_from_directory(base_path + '/subset_train',
                                                      target_size=target_size,
                                                      batch_size=32,
                                                      class_mode="sparse",
                                                      seed = seed)

sample_test_generator = sample_datagen.flow_from_directory(base_path + "/subset_test",
                                                           target_size = target_size,
                                                           batch_size = 32,
                                                           class_mode = "categorical",
                                                           seed = seed)

total_classes = np.max(sample_generator.labels) + 1

x_sample, y_sample = sample_generator.next()
x_sample_test, y_sample_test = sample_test_generator.next()
print('\n')
print('Showing y sample:', y_sample)
print('\n')
print('samples in train: %i' % sample_generator.labels.shape,
      'samples in test: %i' % sample_test_generator.labels.shape,
      'features: %s' % str(x_sample.shape[1:]),
      'classes: %i' % total_classes,
      sep='\n', end='\n\n')

print('shape:', x_sample.shape, x_sample_test.shape)

Found 12508 images belonging to 20 classes.
Found 3128 images belonging to 20 classes.


Showing y sample: [ 8.  6.  8.  3.  8. 16. 18.  8.  2. 19.  3.  7.  1. 16. 11. 15. 17.  5.
  2. 14.  5. 15.  1. 19. 15.  2.  7.  4.  9. 18.  8.  2.]


samples in train: 12508
samples in test: 3128
features: (224, 224, 3)
classes: 20

shape: (32, 224, 224, 3) (32, 224, 224, 3)


## Treinamento 
### Parâmetros para treinamento e validação

In [6]:
rms = optimizers.RMSprop(lr = 0.0002,
                         decay = 1e-6)

device = '/gpu:0'

epochs = 64
batch = 32

Instructions for updating:
Colocations handled automatically by placer.


## Funções de auxílio

In [7]:
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, TerminateOnNaN, ReduceLROnPlateau
from keras.wrappers.scikit_learn import KerasClassifier

import types

## Adapted from taken from keras.wrappers.scikit_learn.KerasClassifier.fit 
class KerasBatchClassifier(KerasClassifier):

    
    def fit(self, X, y=None, **kwargs):

        # taken from keras.wrappers.scikit_learn.KerasClassifier.fit ###################################################
        if self.build_fn is None:
            self.model = self.__call__(**self.filter_sk_params(self.__call__))
        elif not isinstance(self.build_fn, types.FunctionType) and not isinstance(self.build_fn, types.MethodType):
            self.model = self.build_fn(**self.filter_sk_params(self.build_fn.__call__))
        else:
            self.model = self.build_fn(**self.filter_sk_params(self.build_fn))

        loss_name = self.model.loss
        if hasattr(loss_name, '__name__'):
            loss_name = loss_name.__name__

        ################################################################################################################
        epochs = self.sk_params['epochs'] if 'epochs' in self.sk_params else 100
        batch = self.sk_params['batch_size'] if 'batch_size' in self.sk_params else 32
        print('[Debug] - epochs=', epochs)
        print('[Debug] - batch=', batch)
        
        patience = epochs // 3
        
        base_path = kwargs['base_path']
        target_size = kwargs['target_size']
        
        datagen = ImageDataGenerator(rescale = 1./255, validation_split = 0.2)

        self.validation_flow = datagen.flow_from_directory(
            base_path + "/subset_train",
            target_size = target_size,
            batch_size = batch,
            class_mode = "categorical",
            subset ='validation')
        
        self.validation_steps = self.validation_flow.samples // batch
        
        train_flow = datagen.flow_from_directory(
            base_path + "/subset_train",
            target_size = target_size,
            batch_size = batch,
            class_mode = "categorical",
            subset = 'training')
        
        train_steps = train_flow.samples // batch

        model_checkpoint = ModelCheckpoint("results/best_weights.{epoch:02d}-{loss:.5f}.hdf5", 
                                           verbose=1, 
                                           save_best_only=True,
                                           mode="auto")
        terminate_onnan = TerminateOnNaN()
        reduce_plateau = ReduceLROnPlateau(patience=patience)
        
        callbacks = [model_checkpoint, terminate_onnan, reduce_plateau]

        self.__history = self.model.fit_generator(
            train_flow,  
            steps_per_epoch=train_steps,
            validation_data=self.validation_flow, 
            validation_steps=self.validation_steps, 
            epochs=epochs,
            callbacks=callbacks,
            verbose = 1
        )
        

        return self.__history

    def score(self, X, y=None, **kwargs):
        outputs = self.model.evaluate_generator(self.validation_flow, self.validation_steps)
        if type(outputs) is not list:
            outputs = [outputs]
        for name, output in zip(self.model.metrics_names, outputs):
            if name == 'acc':
                return output
        raise Exception('The model is not configured to compute accuracy. '
                        'You should pass `metrics=["accuracy"]` to '
                        'the `model.compile()` method.')

    @property
    def history(self):
        return self.__history

In [8]:
from sklearn.model_selection import GridSearchCV, ShuffleSplit

def grid_search(create_model, param_grid, train_epochs = epochs):
    model = KerasBatchClassifier(build_fn=create_model)

    grid = GridSearchCV(estimator=model, 
                        param_grid=param_grid, 
                        cv=ShuffleSplit(test_size=0.20, n_splits=1, random_state=0))
    with tf.device(device):
        return grid.fit((1, 1, 1), base_path = base_path, target_size = target_size, n_jobs=1)

### Definindo a rede

In [9]:
from classification_models.resnet import ResNet18, preprocess_input

def build_resNet18(optimizer = rms):
    model = ResNet18(input_shape = input_shape,
                   weights = "imagenet",
                   include_top=False)

    for layer in model.layers:
          layer.trainable = False

    output = model.output

    output = Flatten(name = 'flat_mdc')(output)

    output = Dense(total_classes,
                   activation ='softmax',
                   name = 'saida_mdc')(output)

    model = Model(inputs = model.input, outputs = output)

    model.compile(loss ='categorical_crossentropy', 
                  optimizer = optimizer, 
                  metrics=['accuracy'])

#     model.summary()
    return model

In [10]:
def build_resNet18_tuning():
    model = build_resNet18()
    for layer in model.layers:
        layer.trainable = True

    model.compile(loss ='categorical_crossentropy',
                  optimizer = rms,
                  metrics=['accuracy'])

#     model.summary()
    return model

### GridSearch 1 - batch size vs epochs

In [11]:
batch_size = [30, 60, 120]
epochs = [4, 8, 12, 24]

param_grid = dict(batch_size=batch_size, epochs=epochs)
grid_result = grid_search(build_resNet18_tuning, param_grid)

[Debug] - epochs= 4
[Debug] - batch= 30
Found 2492 images belonging to 20 classes.
Found 10016 images belonging to 20 classes.
Instructions for updating:
Use tf.cast instead.
Epoch 1/4

Epoch 00001: val_loss improved from inf to 3.75297, saving model to results/best_weights.01-2.25160.hdf5
Epoch 2/4

Epoch 00002: val_loss improved from 3.75297 to 3.08606, saving model to results/best_weights.02-0.85483.hdf5
Epoch 3/4

Epoch 00003: val_loss did not improve from 3.08606
Epoch 4/4

Epoch 00004: val_loss improved from 3.08606 to 0.86511, saving model to results/best_weights.04-0.16800.hdf5
[Debug] - epochs= 8
[Debug] - batch= 30
Found 2492 images belonging to 20 classes.
Found 10016 images belonging to 20 classes.
Epoch 1/8

Epoch 00001: val_loss improved from inf to 1.34431, saving model to results/best_weights.01-2.10293.hdf5
Epoch 2/8

Epoch 00002: val_loss improved from 1.34431 to 1.08602, saving model to results/best_weights.02-0.76558.hdf5
Epoch 3/8

Epoch 00003: val_loss improved fr

Epoch 4/8

Epoch 00004: val_loss improved from 2.42778 to 2.16829, saving model to results/best_weights.04-2.09274.hdf5
Epoch 5/8

Epoch 00005: val_loss improved from 2.16829 to 1.99449, saving model to results/best_weights.05-1.81525.hdf5
Epoch 6/8

Epoch 00006: val_loss improved from 1.99449 to 1.83161, saving model to results/best_weights.06-1.61709.hdf5
Epoch 7/8

Epoch 00007: val_loss improved from 1.83161 to 1.75677, saving model to results/best_weights.07-1.43788.hdf5
Epoch 8/8

Epoch 00008: val_loss improved from 1.75677 to 1.63324, saving model to results/best_weights.08-1.30145.hdf5
[Debug] - epochs= 12
[Debug] - batch= 60
Found 2492 images belonging to 20 classes.
Found 10016 images belonging to 20 classes.
Epoch 1/12

Epoch 00001: val_loss improved from inf to 3.41593, saving model to results/best_weights.01-3.88195.hdf5
Epoch 2/12

Epoch 00002: val_loss improved from 3.41593 to 2.82335, saving model to results/best_weights.02-3.02996.hdf5
Epoch 3/12

Epoch 00003: val_loss 

Epoch 4/12

Epoch 00004: val_loss improved from 2.90233 to 2.61500, saving model to results/best_weights.04-2.55686.hdf5
Epoch 5/12

Epoch 00005: val_loss improved from 2.61500 to 2.40362, saving model to results/best_weights.05-2.25341.hdf5
Epoch 6/12

Epoch 00006: val_loss improved from 2.40362 to 2.17195, saving model to results/best_weights.06-2.01466.hdf5
Epoch 7/12

Epoch 00007: val_loss improved from 2.17195 to 2.13185, saving model to results/best_weights.07-1.80561.hdf5
Epoch 8/12

Epoch 00008: val_loss improved from 2.13185 to 1.90137, saving model to results/best_weights.08-1.65127.hdf5
Epoch 9/12

Epoch 00009: val_loss improved from 1.90137 to 1.88232, saving model to results/best_weights.09-1.50435.hdf5
Epoch 10/12

Epoch 00010: val_loss improved from 1.88232 to 1.77318, saving model to results/best_weights.10-1.38013.hdf5
Epoch 11/12

Epoch 00011: val_loss improved from 1.77318 to 1.73751, saving model to results/best_weights.11-1.28157.hdf5
Epoch 12/12

Epoch 00012: val_



ResourceExhaustedError: OOM when allocating tensor with shape[512,512,3,3] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node stage4_unit1_conv2_9/convolution}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[{{node loss_19/mul}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


### Avaliando modelo treinado

In [12]:
print("Best: %f using %s\n" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

NameError: name 'grid_result' is not defined