In [1]:
# For folder processing
from scipy.io import loadmat
import os
import tarfile
import urllib.request

# For Image Processing and Display
from PIL import Image
from matplotlib import pyplot as plt
%matplotlib inline 
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

# For Visualization
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected = True)
from jupyter_plotly_dash import JupyterDash
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

# For data processing
from collections import defaultdict
import numpy as np
import pandas as pd

# For Training Image Classification Model
import keras
import os.path
from keras.models import load_model
from keras.applications.resnet50 import preprocess_input, decode_predictions, ResNet50
import shutil


from sklearn.preprocessing import LabelBinarizer
from keras.callbacks import ModelCheckpoint
from keras.utils import to_categorical
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
import numpy as np

Using TensorFlow backend.


In [2]:
def retrieve_dataset(url, target_folder):
    """
    Function:
        - Creates a subdirectory "./Data" in the current working directory if it doesn't exist already
        - Downloads the dataset into the newly created folder
        
    Arguments:
        - url: (str) URL of dataset
        - target_folder: (str) relative path of target folder
        
    Return:
        - A "./Data" folder containing the zipped dataset
    """
    # Path to downloaded file
    target_file = target_folder + "/" + url.split("/")[-1]
    
    # If the dataset has already been downloaded, terminate function
    if os.path.exists(target_file):
        print("Data has already been downloaded at '{}'.".format(target_file))
        return
    
    # If target folder doesn't exist yet, create it
    if not os.path.exists(target_folder):
        os.mkdir(target_folder.split("/")[-1])
        print("Created a new folder at '{}'.".format(target_folder))
        
    # Downloading the dataset
    print("Downloading dataset from '{}', please wait...".format(url))
    urllib.request.urlretrieve(url, target_file)  
    print("File successfully downloaded to '{}''.".format(path))

In [3]:
url_1 = "http://imagenet.stanford.edu/internal/car196/cars_train.tgz" 
target_folder = "./data"

# Preprocessing

In [4]:
# Load matlab file conntaining that maps the label and full name of the car
original_dict_metadata = loadmat('devkit/cars_meta.mat')
#dict_meta = dictionary containing metadata

In [5]:
dict_idx_carname = {int(index+1):classname[0] for index, classname in enumerate(original_dict_metadata["class_names"][0])}

In [6]:
# Load matlab file
original_dict_traindata = loadmat('devkit/cars_train_annos.mat')

master_dict_traindata = {}
# Annotations = {"fname": {"bbox_xmin": 12, "bbox_xmax": 13, "bbox_y1": , "bbox_y2"}}

master_dict_traindata = {image[5][0]:{"bbox_xmin": image[0][0][0], "bbox_xmin": image[0][0][0], "bbox_xmax": image[1][0][0],"bbox_ymin": image[2][0][0],"bbox_ymax": image[3][0][0],"classnumber": image[4][0][0], "classname": dict_idx_carname[image[4][0][0]]} for image in original_dict_traindata["annotations"][0]}

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
df = pd.DataFrame.from_dict(data = master_dict_traindata, orient = "index")

In [9]:
df_train, df_valid = train_test_split(df, train_size = 0.9, random_state = 88, stratify = df.classnumber)

In [10]:

def create_subfolders():
    
    # Creation of train and valid subfolder
    folders = ["train", "valid"]
    for folder in folders:
        if not os.path.exists(path +"/"+ folder):
            os.makedirs(path +"/"+ folder)
    
    # Creation of car subfolders
    for folder in folders:
        for carname in dict_idx_carname.values():
            if not os.path.exists(path + "/"+ folder + "/"+ carname):
                os.makedirs(path +"/"+ folder + "/" + carname)


In [11]:
# sorting image to its own folder
def sort_images_to_subfolders():
    for folder in folders:
        if folder == "train":
            df_focus = df_train
        else:
            df_focus = df_valid
        for image_filename in df_focus.index:
            carname = df_focus.loc[image_filename, "classname"]

            source_path_complete = "./data" + "/" + image_filename
            destination_path_complete = path + "/" + folder + "/" + carname + "/" + image_filename
            shutil.move(source_path_complete, destination_path_complete)


# Model

In [12]:
def load_untrained_transfer_model():
    """
    If model isn't saved - download model, save it to current directory and return it
    If model already exists, simply return the model
    """
    model_path = "./model/Untrained_transfer.h5"
    if os.path.exists(model_path):
        print(f"Retrieving saved model from {model_path}")
        untrained_transfer_model = load_model(model_path)
        print("Saved model retrieved!")
        return untrained_transfer_model
    else:
        ## Build model
        print("Downloading and building model...")
        untrained_transfer_model = Sequential()
        untrained_transfer_model.add(ResNet50(include_top = False, pooling = "avg", weights = "imagenet"))
        untrained_transfer_model.add(Dense(512, activation = "relu"))
        untrained_transfer_model.add(Dropout(0.5))
        untrained_transfer_model.add(Dense(512, activation = "relu"))
        untrained_transfer_model.add(Dropout(0.5))
        untrained_transfer_model.add(Dense(196, activation = "softmax"))
        
        untrained_transfer_model.layers[0].trainable = False
        
        ## Save model
        print(f"Model built and saved in {model_path}")
        untrained_transfer_model.save(model_path)
        return untrained_transfer_model

In [13]:
transfer_model = load_untrained_transfer_model()

Retrieving saved model from ./model/Untrained_transfer.h5
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Saved model retrieved!



No training configuration found in save file: the model was *not* compiled. Compile it manually.



In [15]:
transfer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dense_1 (Dense)              (None, 512)               1049088   
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 196)               100548    
Total params: 25,000,004
Trainable params: 24,946,884
Non-trainable params: 53,120
___________________________________________________________

In [33]:
df_train.shape[0]

7329

In [51]:
transfer_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [52]:
def preprocess_image(np_image):
    resized_np_image = np.resize(np_image,(224,224,3))
    preprocessed_np_image = preprocess_input(resized_np_image)
    return preprocessed_np_image

In [53]:
#df_train["preprocessed_np_image"] = df_train["np_image"].apply(lambda x: preprocess_image(x))

In [54]:
from keras.preprocessing.image import ImageDataGenerator

In [55]:

train_data = './data2/train'
valid_data = './data2/valid'
n_train_samples = df_train.shape[0]
n_valid_samples = df_valid.shape[0]
verbose = 1
batch_size = 16
n_epochs = 5
patience = 3

In [56]:
train_ImageDataGenerator = ImageDataGenerator(rotation_range=20.,
                                    width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    zoom_range=0.2,
                                    horizontal_flip=True,
                                    preprocessing_function = preprocess_image)


valid_ImageDataGenerator = ImageDataGenerator(preprocessing_function = preprocess_image)




In [57]:
# generators
train_generator = train_ImageDataGenerator.flow_from_directory(train_data, (224, 224), batch_size=batch_size,
                                                     class_mode='categorical')
valid_generator = valid_ImageDataGenerator.flow_from_directory(valid_data, (224, 224), batch_size=batch_size,
                                                     class_mode='categorical')


Found 7329 images belonging to 196 classes.
Found 815 images belonging to 196 classes.


In [58]:
from keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TensorBoard

In [59]:
# define callbacks
tensor_board = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=True)

log_file_path = 'logs/training.log'
csv_logger = CSVLogger(log_file_path, append=False)

early_stop = EarlyStopping('val_acc', patience=patience)

reduce_lr = ReduceLROnPlateau('val_acc', factor=0.1, patience=int(patience / 4), verbose=1)

trained_models_path = 'model/model'
model_names = trained_models_path + '.{epoch:02d}-{val_acc:.2f}.hdf5'
model_checkpoint = ModelCheckpoint(model_names, monitor='val_acc', verbose=1, save_best_only=True)


callbacks = [tensor_board, model_checkpoint, csv_logger, early_stop, reduce_lr]



In [61]:
# fine tune the model
transfer_model.fit_generator(
    generator = train_generator,
    steps_per_epoch = n_train_samples / batch_size,
    validation_data = valid_generator,
    validation_steps = n_valid_samples / batch_size,
    epochs = 6,
    callbacks = callbacks,
    verbose = 1)

Epoch 1/6

Epoch 00001: val_acc improved from -inf to 0.01227, saving model to model/model.01-0.01.hdf5
Epoch 2/6

Epoch 00002: val_acc did not improve from 0.01227

Epoch 00002: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 3/6

Epoch 00003: val_acc improved from 0.01227 to 0.02454, saving model to model/model.03-0.02.hdf5
Epoch 4/6

Epoch 00004: val_acc did not improve from 0.02454

Epoch 00004: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 5/6

Epoch 00005: val_acc did not improve from 0.02454

Epoch 00005: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 6/6

Epoch 00006: val_acc did not improve from 0.02454

Epoch 00006: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.


<keras.callbacks.History at 0x7f917ab59668>