In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.xception import Xception
from keras.applications.inception_v3 import InceptionV3

from keras.applications.xception import preprocess_input as xception_preprocessor
from keras.applications.inception_v3 import preprocess_input as inception_v3_preprocessor

from sklearn.metrics import log_loss, accuracy_score
from keras.layers import Input, Dense, GlobalAveragePooling2D, Dropout
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras import regularizers

from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Load training data

In [2]:
LABELS = "/Users/junyan/Downloads/Homework/DeepLearning/Project/data/labels.csv"

train_df = pd.read_csv(LABELS)

top_breeds = sorted(list(train_df['breed'].value_counts().head(120).index))
train_df = train_df[train_df['breed'].isin(top_breeds)]


In [3]:
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from sklearn.model_selection import train_test_split

SEED = 1234

TRAIN_FOLDER = "/Users/junyan/Downloads/Homework/DeepLearning/Project/data/train/"
TEST_FOLDER = "/Users/junyan/Downloads/Homework/DeepLearning/Project/data/test/"

DIM = 299

train_df['image_path'] = train_df.apply( lambda x: ( TRAIN_FOLDER + x["id"] + ".jpg" ), axis=1 )

train_data = np.array([ img_to_array(load_img(img, target_size=(DIM, DIM))) for img in train_df['image_path'].values.tolist()]).astype('float32')
train_labels = train_df['breed']


x_train, x_validation, y_train, y_validation = train_test_split(train_data, train_labels, test_size=0.2, stratify=np.array(train_labels), random_state=SEED)


In [4]:
x_train.shape

(8177, 299, 299, 3)

One hot encoded

In [5]:
y_train = pd.get_dummies(y_train.reset_index(drop=True), columns=top_breeds).as_matrix()
y_validation = pd.get_dummies(y_validation.reset_index(drop=True), columns=top_breeds).as_matrix()

print(y_train[0])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 0 0]


Load pretrained model's weight

In [6]:
from os import makedirs
from os.path import expanduser, exists, join

!ls /Users/junyan/Downloads/Homework/DeepLearning/Project/data/pretrained-model/

cache_dir = expanduser(join('~', '.keras'))
if not exists(cache_dir):
    makedirs(cache_dir)
models_dir = join(cache_dir, 'models')
if not exists(models_dir):
    makedirs(models_dir)
    
!cp /Users/junyan/Downloads/Homework/DeepLearning/Project/data/pretrained-model/*notop* ~/.keras/models/
!cp /Users/junyan/Downloads/Homework/DeepLearning/Project/data/pretrained-model/imagenet_class_index.json ~/.keras/models/
!cp /Users/junyan/Downloads/Homework/DeepLearning/Project/data/pretrained-model/resnet50* ~/.keras/models/

Kuszma.JPG.zip
imagenet_class_index.json
inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5.zip
inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5.zip
inception_v3_weights_tf_dim_ordering_tf_kernels.h5.zip
inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5.zip
resnet50_weights_tf_dim_ordering_tf_kernels.h5.zip
resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5.zip
vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5.zip
xception_weights_tf_dim_ordering_tf_kernels.h5.zip
xception_weights_tf_dim_ordering_tf_kernels_notop.h5.zip


# Build models and train

In [51]:
num_classes = len(top_breeds)

models = {
    "InceptionV3": {
        "model": InceptionV3,
        "preprocessor": inception_v3_preprocessor,
        "input_shape": (299,299,3),
        "seed": 1234,
        "pooling": "avg"
    #},
    #"Xception": {
    #    "model": Xception,
    #    "preprocessor": xception_preprocessor,
    #    "input_shape": (299,299,3),
    #    "seed": 5512,
    #    "pooling": "avg"
    }
}

for model_name, model in models.items():
    print("Predicting : {}".format(model_name))
    filename = model_name + '_features.npy'
    validfilename = model_name + '_validfeatures.npy'
    if exists(filename):
        features = np.load(filename)
        validation_features = np.load(validfilename)
    else:
        #image propocessing:
        train_datagen = ImageDataGenerator(
                zoom_range = 0.3,
                width_shift_range=0.1,
                height_shift_range=0.1)
        validation_datagen = ImageDataGenerator()
        

        # Use pretrained model to generate Bottleneck Features:
        Inception_model = InceptionV3(weights='imagenet', include_top=False, input_shape = (299,299,3), pooling="avg")
        
        train_datagen.preprocessing_function = inception_v3_preprocessor
        train_generator = train_datagen.flow(x_train, y_train, shuffle=False, batch_size = 32, seed = 1234)
        features = Inception_model.predict_generator(train_generator,verbose=1)
        
        validation_datagen.preprocessing_function = inception_v3_preprocessor
        validation_generator = validation_datagen.flow(x_validation, y_validation, shuffle=False, batch_size = 32, seed = 1234)
        validation_features = Inception_model.predict_generator(validation_generator,verbose=1)
        
        np.save(filename, features)
        np.save(validfilename, validation_features)
        
      
    print features.shape
    # Bulid top layers:
    inputs = Input(shape=(2048,))
    #x = Dense(1024, activation='relu')(inputs)
    #x = BatchNormalization()(x)
    #x = Dropout(0.5)(x)
    predictions = Dense(120, activation='softmax')(inputs)
    
    model_top = Model(inputs = inputs, outputs = predictions)
    model_top.summary()
    
    model_top.compile(optimizer = Adam(0.0001), 
                      loss = categorical_crossentropy, 
                      metrics = ['accuracy'])
    
    # Train the top model:
    model_top.fit(features, y_train,
                batch_size=256, epochs=130, verbose=1, 
                validation_data=(validation_features, y_validation))
    


Predicting : InceptionV3
(8177, 2048)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_23 (InputLayer)        (None, 2048)              0         
_________________________________________________________________
dense_28 (Dense)             (None, 120)               245880    
Total params: 245,880
Trainable params: 245,880
Non-trainable params: 0
_________________________________________________________________
Train on 8177 samples, validate on 2045 samples
Epoch 1/130
Epoch 2/130
Epoch 3/130
Epoch 4/130
Epoch 5/130
Epoch 6/130
Epoch 7/130
Epoch 8/130
Epoch 9/130
Epoch 10/130
Epoch 11/130
Epoch 12/130
Epoch 13/130
Epoch 14/130
Epoch 15/130
Epoch 16/130
Epoch 17/130
Epoch 18/130
Epoch 19/130
Epoch 20/130
Epoch 21/130
Epoch 22/130
Epoch 23/130
Epoch 24/130
Epoch 25/130
Epoch 26/130
Epoch 27/130
Epoch 28/130
Epoch 29/130
Epoch 30/130
Epoch 31/130
Epoch 32/130
Epoch 33/130
Epoch 34/130
Epoch 35/130


Epoch 56/130
Epoch 57/130
Epoch 58/130
Epoch 59/130
Epoch 60/130
Epoch 61/130
Epoch 62/130
Epoch 63/130
Epoch 64/130
Epoch 65/130
Epoch 66/130
Epoch 67/130
Epoch 68/130
Epoch 69/130
Epoch 70/130
Epoch 71/130
Epoch 72/130
Epoch 73/130
Epoch 74/130
Epoch 75/130
Epoch 76/130
Epoch 77/130
Epoch 78/130
Epoch 79/130
Epoch 80/130
Epoch 81/130
Epoch 82/130
Epoch 83/130
Epoch 84/130
Epoch 85/130
Epoch 86/130
Epoch 87/130
Epoch 88/130
Epoch 89/130
Epoch 90/130
Epoch 91/130
Epoch 92/130
Epoch 93/130
Epoch 94/130
Epoch 95/130
Epoch 96/130
Epoch 97/130
Epoch 98/130
Epoch 99/130
Epoch 100/130
Epoch 101/130
Epoch 102/130
Epoch 103/130
Epoch 104/130
Epoch 105/130
Epoch 106/130
Epoch 107/130
Epoch 108/130
Epoch 109/130
Epoch 110/130
Epoch 111/130
Epoch 112/130
Epoch 113/130
Epoch 114/130
Epoch 115/130


Epoch 116/130
Epoch 117/130
Epoch 118/130
Epoch 119/130
Epoch 120/130
Epoch 121/130
Epoch 122/130
Epoch 123/130
Epoch 124/130
Epoch 125/130
Epoch 126/130
Epoch 127/130
Epoch 128/130
Epoch 129/130
Epoch 130/130


# Load testing data and predict

In [15]:
#loading test images
from tqdm import tqdm
import cv2
import os
from keras.preprocessing import image

TEST_FOLDER = '/Users/junyan/Downloads/Homework/DeepLearning/Project/data/sample_submission.csv'
test_data = pd.read_csv(TEST_FOLDER)

x_test = []
for i in tqdm(test_data['id'].values):
    img = cv2.imread('/Users/junyan/Downloads/Homework/DeepLearning/Project/data/test/{}.jpg'.format(i))
    x_test.append(cv2.resize(img, (299, 299)))
    
x_test = np.array(x_test, np.float32)
print x_test.shape

100%|██████████| 10357/10357 [00:41<00:00, 251.84it/s]


(10357, 299, 299, 3)


In [52]:
testfilename = 'InceptionV3_testfeatures.npy'
if exists(testfilename):
        test_features = np.load(testfilename)
else:
    test_datagen = ImageDataGenerator()
    test_datagen.preprocessing_function = inception_v3_preprocessor
    test_generator = test_datagen.flow(x_test, shuffle=False, batch_size=32, seed = 1234)
    test_features = Inception_model.predict_generator(test_generator, verbose=1)
    np.save('InceptionV3_testfeatures.npy', test_features)

In [53]:
predictions = model_top.predict(test_features)
print predictions.shape

(10357, 120)


In [54]:
#generating Kaggle upload file
df_train = pd.read_csv('/Users/junyan/Downloads/Homework/DeepLearning/Project/data/labels.csv')
targets_series = df_train['breed']
one_hot = pd.get_dummies(targets_series, sparse = True)
one_hot_labels = np.asarray(one_hot)

In [55]:
#generating Kaggle upload file
test_DF = pd.read_csv('/Users/junyan/Downloads/Homework/DeepLearning/Project/data/sample_submission.csv')
sub = pd.DataFrame(predictions)
col_names = one_hot.columns.values
sub.columns = col_names
sub.insert(0, 'id', test_DF['id'])
sub.head(5)
print(sub.shape)
sub.to_csv('Inception_sub2.csv', sep=',',index=False)

(10357, 121)
