In [53]:
%matplotlib inline
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib as mpl
from mpl_toolkits.axes_grid1 import ImageGrid
from os import listdir, makedirs
from os.path import join, exists, expanduser
from tqdm import tqdm
from sklearn.metrics import log_loss, accuracy_score
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications import xception
from keras.applications import inception_v3
from keras.applications.vgg16 import preprocess_input, decode_predictions
from sklearn.linear_model import LogisticRegression
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.layers import Flatten, Dense, Dropout,Input
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator

In [2]:
SEED = 1987
data_dir = ''
labels = pd.read_csv(join(data_dir, 'labels.csv'))
sample_submission = pd.read_csv(join(data_dir, 'sample_submission.csv'))
print(len(listdir(join(data_dir, 'train'))), len(labels))
print(len(listdir(join(data_dir, 'test'))), len(sample_submission))

10222 10222
10357 10357


In [3]:
# This part is optional for if you want to train only on most popular classes. 
# NUM_CLASSES = 16
# selected_breed_list = list(labels.groupby('breed').count().sort_values(by='id', ascending=False).head(NUM_CLASSES).index)
# labels = labels[labels['breed'].isin(selected_breed_list)]

In [3]:
labels['target'] = 1
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)
np.random.seed(seed=SEED)
rnd = np.random.random(len(labels))
train_idx = rnd < 0.8
valid_idx = rnd >= 0.8
labels_pivot = labels_pivot.drop('id', axis=1)
y_train = labels_pivot.values
ytr = y_train[train_idx]
yv = y_train[valid_idx]

In [4]:
def read_img(img_id, train_or_test, size):
    """Read and resize image.
    # Arguments
        img_id: string
        train_or_test: string 'train' or 'test'.
        size: resize the original image.
    # Returns
        Image as numpy array.
    """
    img = image.load_img(join(data_dir, train_or_test, '%s.jpg' % img_id), target_size=size)
    img = image.img_to_array(img)
    return img

In [6]:
INPUT_SIZE = 299
x_train = np.zeros((len(labels), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, img_id in tqdm(enumerate(labels['id'])):
    img = read_img(img_id, 'train', (INPUT_SIZE, INPUT_SIZE))
    #x = xception.preprocess_input(np.expand_dims(img.copy(), axis=0))
    x_train[i] = img
print('Train Images shape: {} size: {:,}'.format(x_train.shape, x_train.size))

10222it [00:53, 191.96it/s]


Train Images shape: (10222, 299, 299, 3) size: 2,741,571,066


In [7]:
Xtr = x_train[train_idx]
Xv = x_train[valid_idx]
print((Xtr.shape, Xv.shape, ytr.shape, yv.shape))

((8140, 299, 299, 3), (2082, 299, 299, 3), (8140, 120), (2082, 120))


In [8]:
del x_train

In [17]:
### Loading Xception features
xception_bottleneck = xception.Xception(weights='imagenet', include_top=False, pooling='avg')
train_datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range = 0.3,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    horizontal_flip = True,
    preprocessing_function = xception.preprocess_input)

In [45]:
batch_size = 32
generator = train_datagen.flow(Xtr, ytr, shuffle=False, batch_size=batch_size, seed=SEED)

In [46]:
# xception_bottleneck_features = []
# xception_labels = []
# for i in range(len(Xtr) // batch_size):
#     x, y = next(generator)
#     xception_bottleneck_features.append(xception_bottleneck.predict(x))
#     xception_labels.append(y) 
#     print(i,len(Xtr) // batch_size)

# xception_bottleneck_features = np.concatenate(xception_bottleneck_features)
# xception_labels = np.concatenate(xception_labels)
xception_bottleneck_features = xception_bottleneck.predict_generator(generator)
xception_labels=ytr

In [51]:
xception_logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=SEED)
xception_logreg.fit(xception_bottleneck_features, (xception_labels * range(120)).sum(axis=1))

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=1987, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

In [56]:
batch_size = 32
validation_datagen = ImageDataGenerator(preprocessing_function = xception.preprocess_input)
generator = validation_datagen.flow(Xv, yv, shuffle=False, batch_size=batch_size, seed=SEED)
xception_validation_features = xception_bottleneck.predict_generator(generator)

In [57]:
valid_probs = xception_logreg.predict_proba(xception_validation_features)
valid_preds = xception_logreg.predict(xception_validation_features)
print('Validation Xception LogLoss {}'.format(log_loss(yv, valid_probs)))
print('Validation Xception Accuracy {}'.format(accuracy_score((yv * range(120)).sum(axis=1), valid_preds)))

Validation Xception LogLoss 0.3515817905870127
Validation Xception Accuracy 0.8952929875120077


Let's try Inception V3 module. 

In [58]:
### Loading Inception features
inception_v3_bottleneck = inception_v3.InceptionV3(weights='imagenet', include_top=False, pooling='avg')
train_datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range = 0.3,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    horizontal_flip = True,
    preprocessing_function = inception_v3.preprocess_input)

In [59]:
batch_size = 32
generator = train_datagen.flow(Xtr, ytr, shuffle=False, batch_size=batch_size, seed=SEED)

# inception_v3_bottleneck_features = []
# inception_v3_labels = []
# for i in range(len(Xtr) // batch_size):
#     x, y = next(generator)
#     inception_v3_bottleneck_features.append(inception_v3_bottleneck.predict(x))
#     inception_v3_labels.append(y) 
#     print(i,len(Xtr) // batch_size)

# inception_v3_bottleneck = np.concatenate(inception_v3_bottleneck)
# inception_v3_labels = np.concatenate(inception_v3_labels)
inception_v3_bottleneck_features = inception_v3_bottleneck.predict_generator(generator,verbose=1)
inception_v3_labels=ytr



In [61]:
inception_v3_logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=SEED)
inception_v3_logreg.fit(inception_v3_bottleneck_features, (inception_v3_labels * range(120)).sum(axis=1))

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=1987, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

In [62]:
batch_size = 32
validation_datagen = ImageDataGenerator(preprocessing_function = inception_v3.preprocess_input)
generator = validation_datagen.flow(Xv, yv, shuffle=False, batch_size=batch_size, seed=SEED)
inception_v3_validation_features = inception_v3_bottleneck.predict_generator(generator)

In [63]:
valid_probs = inception_v3_logreg.predict_proba(inception_v3_validation_features)
valid_preds = inception_v3_logreg.predict(inception_v3_validation_features)
print('Validation Xception LogLoss {}'.format(log_loss(yv, valid_probs)))
print('Validation Xception Accuracy {}'.format(accuracy_score((yv * range(120)).sum(axis=1), valid_preds)))

Validation Xception LogLoss 0.36028253045998804
Validation Xception Accuracy 0.8928914505283382


Let's Try to stack the features of Xception and Inception and see how that works. 

In [67]:
X = np.hstack([xception_bottleneck_features, inception_v3_bottleneck_features])
V = np.hstack([xception_validation_features, inception_v3_validation_features])
print('Full train bottleneck features shape: {} size: {:,}'.format(X.shape, X.size))
print('Full valid bottleneck features shape: {} size: {:,}'.format(V.shape, V.size))
logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=SEED)
logreg.fit(X, (ytr * range(120)).sum(axis=1))
valid_probs = logreg.predict_proba(V)
valid_preds = logreg.predict(V)
print('Validation Xception + Inception LogLoss {}'.format(log_loss(yv, valid_probs)))
print('Validation Xception + Inception Accuracy {}'.format(accuracy_score((yv * range(120)).sum(axis=1), valid_preds)))

Full train bottleneck features shape: (8140, 4096) size: 33,341,440
Full valid bottleneck features shape: (2082, 4096) size: 8,527,872
Validation Xception + Inception LogLoss 0.31424559722120965
Validation Xception + Inception Accuracy 0.9087415946205571


In [None]:
# def generate_features(model_info, data, labels, datagen):
#     print("generating features...")
#     datagen.preprocessing_function = model_info["preprocessor"]
#     generator = datagen.flow(data, labels, shuffle=False, batch_size=batch_size, seed=model_info["seed"])
#     bottleneck_model = model_info["model"](weights='imagenet', include_top=False, input_shape=model_info["input_shape"], pooling=model_info["pooling"])
#     return bottleneck_model.predict_generator(generator)

# models = {
#     "InceptionV3": {
#         "model": InceptionV3,
#         "preprocessor": inception_v3_preprocessor,
#         "input_shape": (299,299,3),
#         "seed": 1234,
#         "pooling": "avg"
#     },
#     "Xception": {
#         "model": Xception,
#         "preprocessor": xception_preprocessor,
#         "input_shape": (299,299,3),
#         "seed": 5512,
#         "pooling": "avg"
#     }
# }

Creating submission file

In [78]:
INPUT_SIZE = 224
for i, img_id in tqdm(enumerate(sample_submission['id'])):
    img = read_img(img_id, 'test', (INPUT_SIZE, INPUT_SIZE))
    x1 = inception_v3.preprocess_input(np.expand_dims(img.copy(), axis=0))
    x2 = xception.preprocess_input(np.expand_dims(img.copy(), axis=0))
    y1 = inception_v3_bottleneck.predict(x1)
    y2 = xception_bottleneck.predict(x2)
    y3=np.hstack([y2, y1])
    P = logreg.predict(y3)
    y=np.zeros((1,120))
    y[0,P.astype('int')-1]=1
    sample_submission.iloc[i,1:121] = np.asarray(y[0,0:120])

10357it [16:30, 10.46it/s]


In [79]:
#sample_submission.iloc[1,1:121] = np.asarray(y[0,0:120])

In [80]:
#sample_submission.head()

In [82]:
sample_submission.to_csv('submission6.csv',index=False)

In [77]:
sample_submission.head()

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,00102ee9d8eb90812350685311fe5890,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0012a730dfa437f5f3613fb75efcd4ce,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,001510bc8570bbeee98c8d80c8a95ec1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,001a5f3114548acdefa3d4da05474c2e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
