## Load Tensorboard

In [1]:
%load_ext tensorboard

## Import Libraries

In [2]:
import os
import numpy as np
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import urllib
from sklearn import preprocessing

import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Input
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau

## Load Data

In [3]:
data = pd.read_csv("./labels.csv")
imgs = ["{}.jpg".format(x) for x in list(data.id)]
img_label = list(data['breed'])
ndata = pd.DataFrame({'id': imgs,'breed': img_label})
ndata.breed = ndata.breed.astype(str)
spndata = np.split(ndata, [9199], axis=0)
train_data = spndata[0]
val_data = spndata[1]

In [38]:
BATCH_SIZE = 1024
EPOCHS = 10
IMG_HEIGHT = 150
IMG_WIDTH = 150
TRAIN_PATH = '../img/train/'
TEST_PATH = '../img/test/'

## Preprocess Image

In [5]:
train_image_generator = ImageDataGenerator(rescale=1./255)
val_image_generator = ImageDataGenerator(rescale=1./255)

In [37]:
train_data_gen = train_image_generator.flow_from_dataframe(
                                        dataframe = train_data,
                                        directory=TRAIN_PATH,x_col="id",
                                        y_col="breed",
                                        class_mode="categorical",
                                        target_size=(IMG_HEIGHT,IMG_WIDTH),
                                        batch_size=BATCH_SIZE,
                                        shuffle=True)
val_data_gen = val_image_generator.flow_from_dataframe(
                                        dataframe = val_data,
                                        directory=TRAIN_PATH,x_col="id",
                                        y_col="breed",
                                        class_mode="categorical",
                                        target_size=(IMG_HEIGHT,IMG_WIDTH),
                                        batch_size=BATCH_SIZE)

Found 9199 validated image filenames belonging to 120 classes.
Found 1023 validated image filenames belonging to 120 classes.


## Define Model

In [7]:
#Define you model here
def get_base_model():
    input1 = Input(shape=(150, 150, 3))
    x = Conv2D(20, kernel_size=3, activation='relu', input_shape=(150,150,3))(input1)
    x = MaxPooling2D(2)(x)
    x = Conv2D(40, kernel_size=3, activation='relu')(x)
    x = MaxPooling2D(2)(x)
    x = Flatten()(x)
    x = Dense(400, activation='relu')(x)
    out = Dense(120, activation='softmax')(x)
    
    model = Model(inputs=input1, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy',
                  metrics=['categorical_crossentropy','categorical_accuracy','accuracy'])
    
    return model

In [8]:
base_model = get_base_model()

In [9]:
# Total parmas should not exceed 120M or the VM may crash
base_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 150, 150, 3)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 148, 148, 20)      560       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 20)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 40)        7240      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 40)        0         
_________________________________________________________________
flatten (Flatten)            (None, 51840)             0         
_________________________________________________________________
dense (Dense)                (None, 400)               207364

## Train the model

In [11]:
# Path to save model parameters
weight_path_model_base ='model_base.h5'
# Path to write tensorboard
tensorboard_path_model_base = 'Graphs/cnn_nn'

callbacks_list_model_base = [
    TensorBoard(log_dir=tensorboard_path_model_base, histogram_freq=1, write_graph=True, write_grads=True),
    ModelCheckpoint(
            weight_path_model_base,
            save_best_only=True,
            save_weights_only=True,
            monitor='val_loss',
            mode='min',
            verbose=1
        ),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.00001)
]

history_base = base_model.fit_generator(train_data_gen, epochs=EPOCHS, verbose=1, validation_data=val_data_gen,
                                        callbacks=callbacks_list_model_base)

Epoch 1/10
Epoch 00001: val_loss improved from inf to 4.80280, saving model to model_base.h5
Epoch 2/10
Epoch 00002: val_loss improved from 4.80280 to 4.78545, saving model to model_base.h5
Epoch 3/10
Epoch 00003: val_loss improved from 4.78545 to 4.76427, saving model to model_base.h5
Epoch 4/10
Epoch 00004: val_loss improved from 4.76427 to 4.71281, saving model to model_base.h5
Epoch 5/10
Epoch 00005: val_loss improved from 4.71281 to 4.64529, saving model to model_base.h5
Epoch 6/10
Epoch 00006: val_loss improved from 4.64529 to 4.56248, saving model to model_base.h5
Epoch 7/10
Epoch 00007: val_loss did not improve from 4.56248
Epoch 8/10
Epoch 00008: val_loss did not improve from 4.56248
Epoch 9/10
Epoch 00009: val_loss did not improve from 4.56248
Epoch 10/10
Epoch 00010: val_loss did not improve from 4.56248


## Load class details

In [30]:
classes = train_data_gen.class_indices

In [31]:
inverted_classes = dict(map(reversed, classes.items()))

In [32]:
class_list = [i for i in classes.keys()]

## Load test data

In [39]:
test_image_generator = ImageDataGenerator(rescale=1./255)
test_data_gen = test_image_generator.flow_from_directory(batch_size=BATCH_SIZE,
                                                           directory=TEST_PATH,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           class_mode=None)

Found 10357 images belonging to 1 classes.


## Run on test data

In [41]:
test_result = base_model.predict_generator(test_data_gen, verbose=1)



## Prepare data for csv

In [72]:
filenames = [i[i.find('/')+1:i.rfind('.')] for i in test_data_gen.filenames]
filenames_arr = np.array(filenames)[np.newaxis].T
test_result_with_label = np.concatenate((filenames_arr, test_result), axis=1)
csv_header = ['id'] + class_list

## Export data to csv

In [73]:
# Be aware that the file with the same name should not exist, or it will be overwrited.
result_dataframe = pd.DataFrame(test_result_with_label, columns=csv_header)
result_dataframe.to_csv('./result.csv', index=False)