In [2]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.xception import Xception
from keras.applications.inception_v3 import InceptionV3

from keras.applications.xception import preprocess_input as xception_preprocessor
from keras.applications.inception_v3 import preprocess_input as inception_v3_preprocessor

from sklearn.metrics import log_loss, accuracy_score
from keras.layers import Input, Dense, GlobalAveragePooling2D, Dropout
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras import regularizers

from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Load training data

In [98]:
LABELS = "/Users/junyan/Downloads/Homework/DeepLearning/Project/data/labels.csv"

train_df = pd.read_csv(LABELS)

top_breeds = sorted(list(train_df['breed'].value_counts().head(120).index))
train_df = train_df[train_df['breed'].isin(top_breeds)]


In [4]:
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from sklearn.model_selection import train_test_split

SEED = 1234

TRAIN_FOLDER = "/Users/junyan/Downloads/Homework/DeepLearning/Project/data/train/"
TEST_FOLDER = "/Users/junyan/Downloads/Homework/DeepLearning/Project/data/test/"

DIM = 299

train_df['image_path'] = train_df.apply( lambda x: ( TRAIN_FOLDER + x["id"] + ".jpg" ), axis=1 )

train_data = np.array([ img_to_array(load_img(img, target_size=(DIM, DIM))) for img in train_df['image_path'].values.tolist()]).astype('float32')
train_labels = train_df['breed']


x_train, x_validation, y_train, y_validation = train_test_split(train_data, train_labels, test_size=0.2, stratify=np.array(train_labels), random_state=SEED)


In [5]:
x_train.shape

(8177, 299, 299, 3)

One hot encoded

In [6]:
y_train = pd.get_dummies(y_train.reset_index(drop=True), columns=top_breeds).as_matrix()
y_validation = pd.get_dummies(y_validation.reset_index(drop=True), columns=top_breeds).as_matrix()

print(y_train[0])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 0 0]


Load pretrained model's weight

In [8]:
from os import makedirs
from os.path import expanduser, exists, join

!ls /Users/junyan/Downloads/Homework/DeepLearning/Project/data/pretrained-model/

cache_dir = expanduser(join('~', '.keras'))
if not exists(cache_dir):
    makedirs(cache_dir)
models_dir = join(cache_dir, 'models')
if not exists(models_dir):
    makedirs(models_dir)
    
!cp /Users/junyan/Downloads/Homework/DeepLearning/Project/data/pretrained-model/*notop* ~/.keras/models/
!cp /Users/junyan/Downloads/Homework/DeepLearning/Project/data/pretrained-model/imagenet_class_index.json ~/.keras/models/
!cp /Users/junyan/Downloads/Homework/DeepLearning/Project/data/pretrained-model/resnet50* ~/.keras/models/

Kuszma.JPG.zip
imagenet_class_index.json
inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5.zip
inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5.zip
inception_v3_weights_tf_dim_ordering_tf_kernels.h5.zip
inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5.zip
resnet50_weights_tf_dim_ordering_tf_kernels.h5.zip
resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5.zip
vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5.zip
xception_weights_tf_dim_ordering_tf_kernels.h5.zip
xception_weights_tf_dim_ordering_tf_kernels_notop.h5.zip


# Build models and train

In [106]:
models = {
    #"InceptionV3": {
    #    "model": InceptionV3,
    #    "preprocessor": inception_v3_preprocessor,
    #    "input_shape": (299,299,3),
    #    "seed": 1234,
    #    "pooling": "avg"
    #},
    "Xception": {
        "model": Xception,
        "preprocessor": xception_preprocessor,
        "input_shape": (299,299,3),
        "seed": 5512,
        "pooling": "avg"
    }
}

for model_name, model in models.items():
    print("Predicting : {}".format(model_name))
    filename = model_name + '_features.npy'
    validfilename = model_name + '_validfeatures.npy'
    if exists(filename):
        features = np.load(filename)
        validation_features = np.load(validfilename)
    else:
        #image propocessing:
        train_datagen = ImageDataGenerator(
                zoom_range = 0.3,
                width_shift_range=0.1,
                height_shift_range=0.1)
        validation_datagen = ImageDataGenerator()
        

        # Use pretrained model to generate Bottleneck Features:
        Xception_model = Xception(weights='imagenet', include_top=False, input_shape = (299,299,3), pooling="avg")
        
        train_datagen.preprocessing_function = xception_preprocessor
        train_generator = train_datagen.flow(x_train, y_train, shuffle=False, batch_size=batch_size, seed = 5512)
        features = Xception_model.predict_generator(train_generator,verbose=1)
        
        validation_datagen.preprocessing_function = xception_preprocessor
        validation_generator = validation_datagen.flow(x_validation, y_validation, shuffle=False, batch_size=batch_size, seed = 5512)
        validation_features = Xception_model.predict_generator(validation_generator,verbose=1)
        
        np.save(filename, features)
        np.save(validfilename, validation_features)
        
      
    
    # Bulid top layers:
    inputs = Input(shape=(2048,))
    #x = Dense(256, activation='relu')(inputs)
    #x = BatchNormalization()(x)
    #x = Dropout(0.5)(x)
    predictions = Dense(120, activation='softmax')(inputs)
    
    model_top = Model(inputs = inputs, outputs = predictions)
    model_top.summary()
    
    model_top.compile(optimizer = Adam(0.0001), 
                      loss = categorical_crossentropy, 
                      metrics = ['accuracy'])
    
    # Train the top model:
    model_top.fit(features, y_train,
                batch_size=256, epochs=150, verbose=1, 
                validation_data=(validation_features, y_validation))
    


Predicting : Xception
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_29 (InputLayer)        (None, 2048)              0         
_________________________________________________________________
dense_56 (Dense)             (None, 120)               245880    
Total params: 245,880
Trainable params: 245,880
Non-trainable params: 0
_________________________________________________________________
Train on 8177 samples, validate on 2045 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epo

Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150


Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


# Load testing data and predict

In [16]:
#loading test images
from tqdm import tqdm
import cv2
import os
from keras.preprocessing import image

TEST_FOLDER = '/Users/junyan/Downloads/Homework/DeepLearning/Project/data/sample_submission.csv'
test_data = pd.read_csv(TEST_FOLDER)

x_test = []
for i in tqdm(test_data['id'].values):
    img = cv2.imread('/Users/junyan/Downloads/Homework/DeepLearning/Project/data/test/{}.jpg'.format(i))
    x_test.append(cv2.resize(img, (299, 299)))
    
x_test = np.array(x_test, np.float32)
print x_test.shape

100%|██████████| 10357/10357 [00:40<00:00, 256.08it/s]


(10357, 299, 299, 3)


In [107]:
testfilename = 'Xception_testfeatures.npy'
if exists(testfilename):
        test_features = np.load(testfilename)
else:
    test_datagen = ImageDataGenerator()
    test_datagen.preprocessing_function = xception_preprocessor
    test_generator = test_datagen.flow(x_test, shuffle=False, batch_size=batch_size, seed = 5512)
    test_features = Xception_model.predict_generator(test_generator, verbose=1)
    np.save('Xception_testfeatures.npy', test_features)

In [108]:
predictions = model_top.predict(test_features)
print predictions.shape

(10357, 120)


In [109]:
#generating Kaggle upload file
df_train = pd.read_csv('/Users/junyan/Downloads/Homework/DeepLearning/Project/data/labels.csv')
targets_series = df_train['breed']
one_hot = pd.get_dummies(targets_series, sparse = True)
one_hot_labels = np.asarray(one_hot)

In [110]:
#generating Kaggle upload file
test_DF = pd.read_csv('/Users/junyan/Downloads/Homework/DeepLearning/Project/data/sample_submission.csv')
sub = pd.DataFrame(predictions)
col_names = one_hot.columns.values
sub.columns = col_names
sub.insert(0, 'id', test_DF['id'])
sub.head(5)
print(sub.shape)
sub.to_csv('Xception_sub2.csv', sep=',',index=False)

(10357, 121)
