In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import datetime as dt
import json
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
import os

from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.applications import xception
from keras.layers import GlobalAveragePooling2D, Dense
from keras import Model
from keras.optimizers import SGD
from keras import losses
import keras

from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [2]:
DATA_FLODER = os.path.join("..", "data")
TRAIN_FOLDER = os.path.join(DATA_FLODER, "train")
TEST_FOLDER = os.path.join(DATA_FLODER, "test")
SUBMISSIONS_FOLDER = os.path.join(DATA_FLODER, "submissions")
LABELS_FILE = os.path.join(DATA_FLODER, "labels.csv")
IMG_EXTENSION = ".jpg"

MODELS_FOLDER = os.path.join("..", "models")
MODEL_FILE = os.path.join(MODELS_FOLDER, "model.h5")

RANDOM_SEED = 42

full_file_path = lambda x,y: os.path.join(x, y)

In [54]:
IMG_SIZE = 299

# Labels Dataframe

In [43]:
df = pd.read_csv(LABELS_FILE)

n_classes = len(df['breed'].unique())
breed_freqs = df['breed'].value_counts(normalize=True)

df['filename'] = df['id'] + IMG_EXTENSION 
df['breed_freq'] = df['breed'].map(lambda x: breed_freqs[x])

df.head()

Unnamed: 0,id,breed,filename,breed_freq
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull,000bec180eb18c7604dcecc8fe0dba07.jpg,0.008511
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo,001513dfcb2ffafc82cccf4d8bbaba97.jpg,0.007826
2,001cdf01b096e06d78e9e5112d419397,pekinese,001cdf01b096e06d78e9e5112d419397.jpg,0.007337
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick,00214f311d5d2247d5dfe4fe24b2303d.jpg,0.008315
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever,0021f9ceb3235effd7fcde7f7538ed62.jpg,0.006554


# Data generator

Create an Image Data Generator with data augmentation, point the genenrator to the image files using the lables DataFrame.

In [55]:
img_generator = ImageDataGenerator(
        zoom_range=0.2,
        rotation_range=30,
        validation_split=0.1,
        preprocessing_function=xception.preprocess_input,
        horizontal_flip=True)

all_generator = img_generator.flow_from_dataframe(
        dataframe=df,
        directory=TRAIN_FOLDER,
        x_col="filename",
        y_col="breed",
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=32,
        class_mode='categorical')

json.dump(all_generator.class_indices, open(os.path.join(MODELS_FOLDER, "classes.json"), "w"))

Found 9200 validated image filenames belonging to 120 classes.
Found 1022 validated image filenames belonging to 120 classes.


# Model

Load Xception model pretarined on ImageNet without top layer. <br>
Add Average Pooling and last dense layer with n_classes units and Softmax.

In [9]:
base_model = xception.Xception(weights="imagenet", include_top=False)
avg = GlobalAveragePooling2D()(base_model.output)
output = Dense(n_classes, activation="softmax")(avg)

model = Model(inputs=base_model.input, outputs=output)

# Training

Fine tune the Xception model on the dogs data from Kaggle

## Freeze

Freeze already trained layers

In [11]:
print("Number of layers in base model {}".format(len(base_model.layers)))
for layer in base_model.layers:
    layer.trainable = False

## Train Head

Train the top of the network

In [None]:
optimizer = SGD(lr=0.2, momentum=0.9, decay=0.01)
model.compile(loss=losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy', 'top_k_categorical_accuracy'])
history = model.fit_generator(all_generator, epochs=5)

model.save(MODEL_FILE)

Epoch 1/5

## Unfreeze

Unfreeze other layers and train a bit with low learning rate.

In [None]:
unfreezing_steps = 2
step_len = len(base_model.layers)/unfreezing_steps

for unfreezing_step in range(unfreezing_steps):
    print("Unfreezing {}/{} layers".format(int(step_len*(unfreezing_step+1))+1, len(base_model.layers)))
    for layer in base_model.layers[:-(int(step_len*(unfreezing_step+1))+1)]:
        layer.trainable = True

    optimizer = SGD(lr=0.01, momentum=0.9, decay=0.001)
    model.compile(loss=losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy', 'top_k_categorical_accuracy'])
    history = model.fit_generator(all_generator, epochs=1)

    model.save(MODEL_FILE)

# Predict

Make some predictions on training set

In [40]:
class_labels = { v:k for k,v in all_generator.class_indices.items()}

for i in range(5):
    img_df = df.iloc[random.randint(0, len(df)-1)]
    image_path = os.path.join(TRAIN_FOLDER, img_df.filename)
    
    img = image.load_img(image_path, target_size=(IMG_SIZE, IMG_SIZE))

    plt.imshow(img)

    img = np.expand_dims(img, axis=0)
    result = model.predict(xception.preprocess_input(img))
    label = class_labels[np.argmax(result[0])]
    plt.title(f"Predicted: {label} - True: {img_df.breed}")

    plt.show()

Predict test

In [None]:
sample_submission = pd.read_csv(os.path.join(DATA_FOLDER, "sample_submission.csv"))
submission = pd.DataFrame(columns=sample_submission.columns)

for index, row in sample_submission.iterrows():
    
    if index % 500 == 0:
        print(index/len(sample_submission))

    image_path = os.path.join(TEST_FOLDER, f"{row['id']}.jpg")
    img = image.load_img(image_path, target_size=(IMG_SIZE, IMG_SIZE))

    img = np.expand_dims(img, axis=0)
    result = model.predict(xception.preprocess_input(img))
    new_row = {'id': row['id']}
    for i, r in enumerate(result[0]):
        new_row[class_labels[i]] = r
    submission = submission.append(new_row, ignore_index=True)
    
submission.head()
submission.to_csv(os.path.join(DATA_FOLDER, "submission.csv"), ignore_index=True)