In [1]:
import numpy as np
import pandas as pd 
import keras
from keras.applications.vgg19 import VGG19
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten


import os
from tqdm import tqdm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [2]:
df_train = pd.read_csv('../input/labels.csv')
df_test = pd.read_csv('../input/sample_submission.csv')

In [3]:
targets_series = pd.Series(df_train['breed'])
one_hot = pd.get_dummies(targets_series, sparse = True)

In [4]:
one_hot_labels = np.asarray(one_hot)

In [5]:
def read_and_resize_images(df,nrow=224,ncol=224,channels=3):
    from scipy import misc
    i = 0
    x = []
    y = []
    for f, breed in tqdm(df.values):
        img = misc.imread('../input/train/{}.jpg'.format(f))
        label = one_hot_labels[i]
        x.append(misc.imresize(img, (nrow, ncol,channels)))
        y.append(label)
        i += 1
    return x,y

In [6]:
x_train,y_train = read_and_resize_images(df_train)

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  import sys
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  if __name__ == '__main__':
100%|████████████████████████████████████████████████████████████████████████████| 10222/10222 [02:00<00:00, 85.17it/s]


In [7]:
def read_and_resize_images_test(df,nrow=224,ncol=224,channels=3):
    from scipy import misc
    x = []
    for f in tqdm(df['id'].values):
        img = misc.imread('../input/test/{}.jpg'.format(f))
        x.append(misc.imresize(img, (nrow, ncol,channels)))
    return x

In [8]:
x_test = read_and_resize_images_test(df_test)

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  """
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  
100%|████████████████████████████████████████████████████████████████████████████| 10357/10357 [02:59<00:00, 57.57it/s]


In [9]:
y_train_raw = np.array(y_train, np.uint8)
x_train_raw = np.array(x_train, np.float32) / 255.
x_test  = np.array(x_test, np.float32) / 255.

In [10]:
print(x_train_raw.shape)
print(y_train_raw.shape)
print(x_test.shape)

(10222, 224, 224, 3)
(10222, 120)
(10357, 224, 224, 3)


In [11]:
num_class = y_train_raw.shape[1]

In [12]:
X_train, X_valid, Y_train, Y_valid = train_test_split(x_train_raw, y_train_raw, test_size=0.3, random_state=1)

In [13]:
base_model = VGG19(#weights='imagenet',
    weights = None, include_top=False, input_shape=(224, 224, 3))

# Add a new top layer
x = base_model.output
x = Flatten()(x)
predictions = Dense(num_class, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# First: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [14]:
model.fit(X_train, Y_train, epochs=1, validation_data=(X_valid, Y_valid), verbose=1)

Train on 7155 samples, validate on 3067 samples
Epoch 1/1






<keras.callbacks.History at 0x27f97d03ac8>

In [15]:
preds = model.predict(x_test, verbose=1)



In [16]:
sub = pd.DataFrame(preds)
col_names = one_hot.columns.values
sub.columns = col_names
# Insert the column id from the sample_submission at the start of the data frame
sub.insert(0, 'id', df_test['id'])
sub.head(5)

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.00805,0.009947,0.008731,0.008837,0.007782,0.007521,0.009322,0.009846,0.00807,...,0.007892,0.007811,0.007052,0.00768,0.008373,0.008222,0.007906,0.008442,0.007829,0.007658
1,00102ee9d8eb90812350685311fe5890,0.007917,0.010251,0.008766,0.008906,0.00771,0.007367,0.009454,0.010038,0.008031,...,0.007867,0.007679,0.006871,0.007595,0.008436,0.008229,0.007926,0.008456,0.007753,0.007598
2,0012a730dfa437f5f3613fb75efcd4ce,0.008029,0.009849,0.008694,0.008789,0.007842,0.007619,0.009269,0.009753,0.008102,...,0.007926,0.007824,0.007121,0.007721,0.00836,0.008246,0.007971,0.008424,0.007826,0.007699
3,001510bc8570bbeee98c8d80c8a95ec1,0.007976,0.010337,0.00874,0.008917,0.007741,0.007405,0.009515,0.010022,0.00793,...,0.007893,0.007553,0.006838,0.007592,0.008421,0.0082,0.007841,0.008407,0.007621,0.007562
4,001a5f3114548acdefa3d4da05474c2e,0.008043,0.010079,0.008693,0.008868,0.007804,0.007516,0.009406,0.00985,0.008018,...,0.007929,0.007652,0.006996,0.007646,0.008404,0.008233,0.007875,0.008398,0.007724,0.007655
