In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm

import keras

from keras.models import Model, Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3

from sklearn import preprocessing
from sklearn.model_selection import train_test_split, KFold

import cv2

Using TensorFlow backend.


In [10]:
#https://www.kaggle.com/orangutan/keras-vgg19-starter
df_train = pd.read_csv('Data/labels.csv')

In [13]:
df_train.head()

Unnamed: 0.1,Unnamed: 0,id,breed
0,0,n02098413_2584,lhasa
1,1,n02098413_6354,lhasa
2,2,n02098413_12117,lhasa
3,3,n02098413_10144,lhasa
4,4,n02098413_13405,lhasa


In [4]:
targets_series = pd.Series(df_train['breed'])
one_hot = pd.get_dummies(targets_series, sparse = True)
one_hot_labels = np.asarray(one_hot)

In [5]:
img_size = 224

x_train = []
y_train = []

In [6]:
i = 0 
for f, breed in tqdm(df_train.values):
    img = cv2.imread('Data/train/{}.jpg'.format(f))
    label = one_hot_labels[i]
    x_train.append(cv2.resize(img, (img_size, img_size)))
    y_train.append(label)
    i += 1

100%|██████████| 10222/10222 [00:33<00:00, 307.86it/s]


In [7]:
y_train_raw = np.array(y_train, np.uint8)
x_train_raw = np.array(x_train, np.float32) / 255

In [8]:
print(x_train_raw.shape)
print(y_train_raw.shape)

(10222, 224, 224, 3)
(10222, 120)


In [9]:
num_class = y_train_raw.shape[1]
num_class

120

## Take 10% of data as test data

In [10]:
X_train, X_test, y_train, y_test = train_test_split(x_train_raw, y_train_raw, test_size=0.1, random_state=1, stratify=y_train_raw)

In [11]:
# Create the base pre-trained model
# Can't download weights in the kernel
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))

# Add a new top layer
x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(num_class, activation='softmax')(x)

# This is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# First: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy', 
              optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.0, decay=1e-5, nesterov=True), 
              metrics=['accuracy'])



## Cross Validation for Model

In [24]:
kf = KFold(n_splits = 3, shuffle=True)
i = 0
for train_index, test_index in kf.split(x_train_raw):
    X_train_split, y_train_split = X_train[train_index], y_train[train_index]
    X_valid_split, y_valid_split = X_train[test_index], y_train[test_index]
    print('**** Split {} ****'.format(i))
    i += 1
    model.fit(X_train_split, y_train_split, epochs=30, validation_data=(X_valid, y_valid), verbose=1)

MemoryError: 

In [12]:
model.fit(X_train, y_train, epochs=10, batch_size=128, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fa3b55e32b0>

In [19]:
model.evaluate(x=X_test, y=y_test)



[5.520991241477452, 0.01075268814291073]

In [16]:
print(model.metrics_names)

['loss', 'acc']


In [22]:
keras.backend.clear_session()