In [41]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import keras
from keras.applications.vgg19 import VGG19
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten

import os
from tqdm import tqdm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import cv2
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output

In [42]:
print(check_output(["ls", "../dog"]).decode("utf8"))

dog_0130.ipynb
labels.csv
sample_submission.csv
test
train



In [43]:
df_train = pd.read_csv('../dog/labels.csv')
df_test = pd.read_csv('../dog/sample_submission.csv')

In [44]:
df_train.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [45]:
targets_series = pd.Series(df_train['breed'])
one_hot = pd.get_dummies(targets_series, sparse = True)

In [46]:
one_hot_labels = np.asarray(one_hot)

In [47]:
img_size = 90

In [48]:
x_train = []
y_train = []
x_test = []

In [49]:
i = 0
for f , breed in tqdm(df_train.values):
    img = cv2.imread('../dog/train/{}.jpg'.format(f))
    label = one_hot_labels[i]
    x_train.append(cv2.resize(img, (img_size, img_size)))
    y_train.append(label)
    i += 1

100%|██████████| 10222/10222 [00:46<00:00, 221.33it/s]


In [50]:
i

10222

In [51]:
for f in tqdm(df_test['id'].values):
    img = cv2.imread('../dog/test/{}.jpg'.format(f))
    x_test.append(cv2.resize(img, (img_size, img_size)))

100%|██████████| 10357/10357 [00:47<00:00, 217.98it/s]


In [53]:
y_train_raw = np.array(y_train, np.uint8)
x_train_raw = np.array(x_train, np.float32)/255.
x_test = np.array(x_test, np.float32)/255.

In [54]:
x_test.shape

(10357, 90, 90, 3)

In [29]:
num_class = y_train_raw.shape[1]

In [31]:
X_train, X_valid, Y_train, Y_valid = train_test_split(x_train_raw, y_train_raw, test_size = 0.3, random_state = 1)

In [34]:
base_model = VGG19(weights = None, include_top = False, input_shape= (img_size, img_size, 3))

x = base_model.output
x = Flatten()(x)
predictions = Dense(num_class, activation = 'softmax')(x)

model = Model(inputs = base_model.input, outputs = predictions)
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics= ['accuracy'])

callbacks_list = [keras.callbacks.EarlyStopping(monitor = 'val_acc', patience = 3, verbose = 1)]
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 90, 90, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 90, 90, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 90, 90, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 45, 45, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 45, 45, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 45, 45, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 22, 22, 128)       0         
__________

In [35]:
model.fit(X_train, Y_train, epochs = 1, validation_data = (X_valid, Y_valid), verbose = 1)

Train on 7155 samples, validate on 3067 samples
Epoch 1/1


<keras.callbacks.History at 0x125313b00>

In [55]:
preds = model.predict(x_test, verbose = 1)



In [57]:
sub = pd.DataFrame(preds)
col_names = one_hot.columns.values
sub.columns = col_names
sub.insert(0, 'id', df_test['id'])
sub.head()

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008258,0.008732,0.008427,0.00852,0.008185,0.008148,0.008666,0.008803,0.008313,...,0.008326,0.008216,0.008049,0.008233,0.008397,0.008324,0.008362,0.008297,0.008271,0.008181
1,00102ee9d8eb90812350685311fe5890,0.008245,0.008764,0.008433,0.008529,0.008177,0.008134,0.008683,0.008834,0.008309,...,0.008324,0.008204,0.008028,0.008231,0.008403,0.008324,0.008363,0.008296,0.008269,0.00817
2,0012a730dfa437f5f3613fb75efcd4ce,0.008262,0.008724,0.008424,0.008515,0.008189,0.008154,0.008661,0.008791,0.008316,...,0.008327,0.008217,0.008055,0.008234,0.008396,0.008325,0.008362,0.008297,0.008271,0.008184
3,001510bc8570bbeee98c8d80c8a95ec1,0.008247,0.008769,0.008435,0.008531,0.008177,0.008133,0.008686,0.008837,0.008305,...,0.008325,0.008201,0.008025,0.008231,0.0084,0.008321,0.00836,0.008295,0.008264,0.008167
4,001a5f3114548acdefa3d4da05474c2e,0.008259,0.008741,0.008426,0.008521,0.008186,0.008147,0.008669,0.008804,0.008309,...,0.008328,0.008209,0.008044,0.008232,0.008398,0.008325,0.00836,0.008296,0.008269,0.008179


In [59]:
sub.to_csv('submitto_0130.csv', index = False)