In [1]:
# For folder processing
from scipy.io import loadmat
import os
import tarfile
import urllib.request

# For Image Processing and Display
from PIL import Image
from matplotlib import pyplot as plt
%matplotlib inline 
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

# For Visualization
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected = True)
from jupyter_plotly_dash import JupyterDash
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

# For data processing
from collections import defaultdict
import numpy as np

# For Training Image Classification Model
import keras
import os.path
from keras.models import load_model
from keras.applications.resnet50 import preprocess_input, decode_predictions, ResNet50

from sklearn.preprocessing import LabelBinarizer
from keras.callbacks import ModelCheckpoint
from keras.utils import to_categorical
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
import numpy as np

Using TensorFlow backend.


In [2]:
def retrieve_dataset(url, target_folder):
    """
    Function:
        - Creates a subdirectory "./Data" in the current working directory if it doesn't exist already
        - Downloads the dataset into the newly created folder
        
    Arguments:
        - url: (str) URL of dataset
        - target_folder: (str) relative path of target folder
        
    Return:
        - A "./Data" folder containing the zipped dataset
    """
    # Path to downloaded file
    target_file = target_folder + "/" + url.split("/")[-1]
    
    # If the dataset has already been downloaded, terminate function
    if os.path.exists(target_file):
        print("Data has already been downloaded at '{}'.".format(target_file))
        return
    
    # If target folder doesn't exist yet, create it
    if not os.path.exists(target_folder):
        os.mkdir(target_folder.split("/")[-1])
        print("Created a new folder at '{}'.".format(target_folder))
        
    # Downloading the dataset
    print("Downloading dataset from '{}', please wait...".format(url))
    urllib.request.urlretrieve(url, target_file)  
    print("File successfully downloaded to '{}''.".format(path))

In [3]:
url_1 = "http://imagenet.stanford.edu/internal/car196/cars_train.tgz" 
target_folder = "./data"

In [4]:
# Load matlab file
original_dict_metadata = loadmat('devkit/cars_meta.mat')
#dict_meta = dictionary containing metadata

In [5]:
dict_idx_carname = {int(index+1):classname[0] for index, classname in enumerate(original_dict_metadata["class_names"][0])}

In [6]:
dict_idx_carname

{1: 'AM General Hummer SUV 2000',
 2: 'Acura RL Sedan 2012',
 3: 'Acura TL Sedan 2012',
 4: 'Acura TL Type-S 2008',
 5: 'Acura TSX Sedan 2012',
 6: 'Acura Integra Type R 2001',
 7: 'Acura ZDX Hatchback 2012',
 8: 'Aston Martin V8 Vantage Convertible 2012',
 9: 'Aston Martin V8 Vantage Coupe 2012',
 10: 'Aston Martin Virage Convertible 2012',
 11: 'Aston Martin Virage Coupe 2012',
 12: 'Audi RS 4 Convertible 2008',
 13: 'Audi A5 Coupe 2012',
 14: 'Audi TTS Coupe 2012',
 15: 'Audi R8 Coupe 2012',
 16: 'Audi V8 Sedan 1994',
 17: 'Audi 100 Sedan 1994',
 18: 'Audi 100 Wagon 1994',
 19: 'Audi TT Hatchback 2011',
 20: 'Audi S6 Sedan 2011',
 21: 'Audi S5 Convertible 2012',
 22: 'Audi S5 Coupe 2012',
 23: 'Audi S4 Sedan 2012',
 24: 'Audi S4 Sedan 2007',
 25: 'Audi TT RS Coupe 2012',
 26: 'BMW ActiveHybrid 5 Sedan 2012',
 27: 'BMW 1 Series Convertible 2012',
 28: 'BMW 1 Series Coupe 2012',
 29: 'BMW 3 Series Sedan 2012',
 30: 'BMW 3 Series Wagon 2012',
 31: 'BMW 6 Series Convertible 2007',
 32: 

In [7]:
carname_list = dict_idx_carname.values()
dict_brand_count = defaultdict(int)
for carname in carname_list:
    brand = carname.split(" ")[0]
    dict_brand_count[brand] += 1

In [8]:
# Load matlab file
original_dict_traindata = loadmat('devkit/cars_train_annos.mat')

master_dict_traindata = {}
# Annotations = {"fname": {"bbox_xmin": 12, "bbox_xmax": 13, "bbox_y1": , "bbox_y2"}}

master_dict_traindata = {image[5][0]:{"bbox_xmin": image[0][0][0], "bbox_xmin": image[0][0][0], "bbox_xmax": image[1][0][0],"bbox_ymin": image[2][0][0],"bbox_ymax": image[3][0][0],"class_number": image[4][0][0], "classname": dict_idx_carname[image[4][0][0]]} for image in original_dict_traindata["annotations"][0]}

In [11]:
def load_images_to_dictionary(image_path = "./data", dictionary = master_dict_traindata):
    """
    Function: 
        -loads images from the target folder
        -resizes and preprocesses the images 
        -append the original and preprocssed images, file names and labels into a "dataset"
    Arguments: 
        -folder (str): the directory containing the images
        -label (str): the class of images
        -width (int): desired width after resizing
        -height (int): desired height after resizing
    Returns: 
        -dataset (list): list of tuples: (nd.array of original image, nd.array of preprocessed image, image name, image label)
    """
    image_list = os.listdir(image_path)
    
    for index, filename in enumerate(image_list):
        img = Image.open(os.path.join(image_path,filename))
        img_np = np.array(img)
        dictionary[filename]["image"] = img_np
        #dictionary[filename]["preprocessed_image"] = preprocess_input(img_np)
    return dictionary

In [None]:
master_dict_traindata = load_images_to_dictionary()

In [None]:
def show_image_in_dict(image_dict = master_dict_traindata, index = 0):
    """
    Function:
        - shows an image from the dataset and its label
    Arguments:
        - dataset (list): list of tuples: (nd.array of original image, nd.array of preprocessed image, image name, image label)
    Returns:
         none, but the function will show the original and preprocessedimage, its name and its label
    """
    
    file_name = list(image_dict.keys())[index]
    
    fig, ax = plt.subplots(figsize=(9,5))
    ax.imshow(image_dict[file_name]["image"], interpolation='nearest')
    ax.set_title("Your original image after resizing:")
    plt.show()
    print(f"Image file name: {file_name}")
    print(f"Image class: {image_dict[file_name]['classname']}")

In [None]:
_ = interact(show_image_in_dict, image_dict = fixed(master_dict_traindata), index=widgets.IntSlider(min=0,max=len(master_dict_traindata)-1,step=1,value=50), continous_update = False)

In [None]:
dict_classname_count = defaultdict(int)
for datapoint in master_dict_traindata.values():
    classname = datapoint["classname"]
    dict_classname_count[classname] += 1


trace = go.Bar(x = list(dict_classname_count.keys()), y = list(dict_classname_count.values()))
data = [trace]
layout = go.Layout(title = "Count Per Class", xaxis = {
        'categoryorder': 'array',
        'categoryarray': [x for _, x in sorted(zip(list(dict_classname_count.values()), list(dict_classname_count.keys())))]
    })


figure = go.Figure(data = data, layout = layout)
iplot(figure)

In [None]:
def load_resnet50_model():
    """
    Function:
        - If the ResNet50 model isn't already saved, the function will download the model, save it to ./model/ResNet50.h5 and return it
        - If model is already saved, the function will simply return the model
    Return:
        - The original ResNet50 model
    """
    model_path = "./model/ResNet50.h5"
    if os.path.exists(model_path):
        print(f"Retrieving saved model from {model_path}...")
        model = load_model(model_path)
        print("Saved model retrieved!")
        return model
    else:
        from keras.applications.resnet50 import ResNet50
        print("Downloading model...")
        os.makedirs ("./model")
        model = ResNet50(weights='imagenet')
        model.save(model_path)
        print(f"Model retrieved and saved in {model_path}")
        return model

In [None]:
resnet50 = load_resnet50_model()

In [None]:
resnet50.summary()

In [None]:
def test_model(model = resnet50, image_dict = master_dict_traindata, index = 0):
    
    
    # Grab a sample image from the dataset and show it
    print("==================================")
    print("Image:")
    show_image_in_dict(master_dict_traindata, index)
    print("==================================")
    
    # Get the numpy array of the sample image
    file_name = list(image_dict.keys())[index]
    sample_image = image_dict[file_name]["preprocessed_image"]
    sample_image = np.expand_dims(sample_image, axis=0)


    # Input it to the chosen model
    preds = model.predict(sample_image)
    
    # Get the top 5 prediction classes and their corresponding probabilities
    prediction_classes = [item[1] for item in decode_predictions(preds, top=5)[0]]
    prediction_probabilities = [item[2]*100 for item in decode_predictions(preds, top=5)[0]]
    
    # Show these probabilities as interactive bar charts
    trace = go.Bar(x = prediction_classes, y = prediction_probabilities)
    data = [trace]
    layout =  go.Layout(xaxis=dict(title= 'Snake class',
                                 tickfont= dict(family='Old Standard TT, serif',
                                                                              size=15,
                                                                              color='black')),
                        yaxis=dict(title= 'Probability (%)', hoverformat = '.2f'),
                        autosize=True,
                        width=600,
                        height=400)
    figure = go.Figure(data = data, layout = layout)
    iplot(figure)
