In [102]:
import keras
from pandas import DataFrame as df
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import cv2
%matplotlib inline

In [103]:
root_dir = 'e:/Kaggle/03_dog_breed_identification/'
train_dir = root_dir+'train/'
test_dir = root_dir+'test/'

labels_file = root_dir+'labels.csv'
rst_file = root_dir+'sample_submission.csv'


In [104]:
def resize_img(img, dsize=(64,64)):
    
    ori_y, ori_x,ori_z = img.shape[:3]
    d_y,d_x = dsize
    
    #dsize_keep_aspect_ratio
    dsize_kar = (ori_y*d_x//ori_x ,d_x) if (ori_y//ori_x >= d_y//d_x) else ( d_y ,ori_x*d_y//ori_y)
    #print(dsize_kar)
    
    img = cv2.resize(img,dsize = (dsize_kar[1],dsize_kar[0]))
    #print(img.shape)
    img = img[:d_y,:d_x,:] #
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

In [105]:
labels = df.from_csv(labels_file)
all_data = []
for i, f in enumerate(labels.index):
    img = cv2.imread(train_dir+f+'.jpg')
    img = resize_img(img)
    all_data.append(img)
    #if not i%1000:
    #    print(i)
    

all_data = np.array(all_data)

In [106]:
targets_series = pd.Series(labels['breed'])
one_hot = pd.get_dummies(targets_series, sparse = True)
y = np.asarray(one_hot)

In [107]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(all_data, y, test_size=0.33, random_state=16)

In [108]:
from keras.models import Sequential, Model, load_model
from keras import applications
from keras import optimizers
from keras.layers import Dropout, Flatten, Dense

img_rows, img_cols, img_channel = 64, 64, 3

base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(img_rows, img_cols, img_channel))

In [109]:
num_class = labels.breed.nunique()

add_model = Sequential()
add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(Dense(num_class, activation='softmax'))

model = Model(inputs=base_model.input, outputs=add_model(base_model.output))
model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 90, 90, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 90, 90, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 90, 90, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 45, 45, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 45, 45, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 45, 45, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 22, 22, 128)       0         
__________

In [111]:
add_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_4 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 120)               245880    
Total params: 245,880
Trainable params: 245,880
Non-trainable params: 0
_________________________________________________________________


In [110]:
model.fit(X_train, y_train, epochs=1, validation_data=(X_val, y_val), verbose=1)

Train on 6848 samples, validate on 3374 samples
Epoch 1/1


<keras.callbacks.History at 0x1f96f319be0>

In [112]:
model.save('dog_breed_identification.m')

In [113]:
test_labels = df.from_csv(rst_file)
test_data = []
for i, f in enumerate(test_labels.index):
    img = cv2.imread(test_dir+f+'.jpg')
    img = resize_img(img)
    val_data.append(img)
    #if not i%1000:
    #    print(i)
    

test_data = np.array(test_data)

In [115]:
predict = model.predict(test_data)

In [126]:
predict_df = df(predict, columns=test_labels.columns, index=test_labels.index)
predict_df.to_csv(test_dir+'predict.csv')