In [1]:
# Colab library to upload files to notebook
from google.colab import files

# Install Kaggle library
!pip install -q kaggle

# Upload kaggle API key file
uploaded = files.upload()

Saving kaggle.json to kaggle.json


In [2]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

kaggle.json


In [0]:
!pip uninstall -y kaggle
!pip install --upgrade pip
!pip install kaggle==1.5.6
!kaggle -v

In [4]:
!kaggle competitions download -c dog-breed-identification

Downloading dog-breed-identification.zip to /content
100% 690M/691M [00:21<00:00, 28.9MB/s]
100% 691M/691M [00:21<00:00, 33.4MB/s]


In [0]:
!unzip \*.zip

In [1]:
import csv
import glob
from google.colab.patches import cv2_imshow
import cv2
import os
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, Activation
from keras.optimizers import adam 
from sklearn.model_selection import train_test_split
EPOCHS = 10
data = []
label = []

#import the neccesary packages
import pandas as pd
import numpy as np

#constants
num_classes = 120 # the number of breeds we want to classify
seed = 42 # makes the random numbers in numpy predictable
im_size = 299 # This size of the images
batch_size = 32

#read the csv into a dataframe, group the breeds and 
df = pd.read_csv('labels.csv')
selected_breed_list = list(df.groupby('breed').count().sort_values(by='id', ascending=False).head(num_classes).index)
df = df[df['breed'].isin(selected_breed_list)]
df['filename'] = df.apply(lambda x: ('train/' + x['id'] + '.jpg'), axis=1)


breeds = pd.Series(df['breed'])
print("total number of breeds to classify",len(breeds.unique()))

print(df.head())
from keras.preprocessing import image

def read_img(img_id, train_or_test, size):
    """Read and resize image.
    # Arguments
        img_id: string
        train_or_test: string 'train' or 'test'.
        size: resize the original image.
    # Returns
        Image as numpy array.
    """
    path =  train_or_test + "/" + img_id + ".jpg"
    img = image.load_img(path, target_size=size)
    return image.img_to_array(img)

from sklearn.preprocessing import LabelEncoder
label_enc = LabelEncoder()
np.random.seed(seed=seed)
rnd = np.random.random(len(df))
train_idx = rnd < 0.9
valid_idx = rnd >= 0.9
y_train = label_enc.fit_transform(df["breed"].values)
ytr = y_train[train_idx]
yv = y_train[valid_idx]
from tqdm import tqdm
from keras.applications import xception
x_train = np.zeros((train_idx.sum(), im_size, im_size, 3), dtype='float32')
x_valid = np.zeros((valid_idx.sum(), im_size, im_size, 3), dtype='float32')
train_i = 0
valid_i = 0
for i, img_id in tqdm(enumerate(df['id'])):
    img = read_img(img_id, 'train', (im_size, im_size))
    x = xception.preprocess_input(np.expand_dims(img.copy(), axis=0))
    if train_idx[i]:
        x_train[train_i] = x
        train_i += 1
    elif valid_idx[i]:
        x_valid[valid_i] = x
        valid_i += 1
print('Train Images shape: {} size: {:,}'.format(x_train.shape, x_train.size))
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rotation_range=45,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.25,
                                   horizontal_flip=True,
                                   fill_mode='nearest')

train_generator = train_datagen.flow(x_train, 
                                     ytr, 
                                     batch_size=batch_size)


valid_datagen = ImageDataGenerator()

valid_generator = valid_datagen.flow(x_valid, 
                                     yv, 
                                     batch_size=batch_size)

from keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization, Dropout
from keras.optimizers import Adam, SGD, RMSprop
from keras.models import Model, Input

# create the base pre-trained model
base_model = xception.Xception(weights='imagenet', include_top=False)
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional Xception layers
for layer in base_model.layers:
    layer.trainable = False

# add a global spatial average pooling layer
x = base_model.output
x = BatchNormalization()(x)
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
# and a logistic layer and set it to the number of breeds we want to classifiy, 
predictions = Dense(num_classes, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

import datetime
from keras.callbacks import EarlyStopping, ModelCheckpoint

epochs = 10
learning_rate = 0.001

# checkpoints
early_stopping = EarlyStopping(monitor='val_acc', patience=5)
STAMP = "{}_dog_breed_model".format(datetime.date.today().strftime("%Y-%m-%d"))

bst_model_path = "{}.h5".format(STAMP)
model_checkpoint = ModelCheckpoint(bst_model_path,
                                   save_best_only=True,
                                   save_weights_only=False,
                                  verbose=1)



# compile the model (should be done *after* setting layers to non-trainable)
optimizer = RMSprop(lr=learning_rate, rho=0.9)
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=["accuracy"])

hist = model.fit_generator(train_generator,
                           steps_per_epoch=train_idx.sum() // batch_size,
                           epochs=epochs, callbacks=[early_stopping, model_checkpoint],
                           validation_data=valid_generator,
                           validation_steps=valid_idx.sum() // batch_size)

# serialize weights to HDF5
model.save(bst_model_path)
print("Saved model to disk")


Using TensorFlow backend.


total number of breeds to classify 120
                                 id  ...                                    filename
0  000bec180eb18c7604dcecc8fe0dba07  ...  train/000bec180eb18c7604dcecc8fe0dba07.jpg
1  001513dfcb2ffafc82cccf4d8bbaba97  ...  train/001513dfcb2ffafc82cccf4d8bbaba97.jpg
2  001cdf01b096e06d78e9e5112d419397  ...  train/001cdf01b096e06d78e9e5112d419397.jpg
3  00214f311d5d2247d5dfe4fe24b2303d  ...  train/00214f311d5d2247d5dfe4fe24b2303d.jpg
4  0021f9ceb3235effd7fcde7f7538ed62  ...  train/0021f9ceb3235effd7fcde7f7538ed62.jpg

[5 rows x 3 columns]


10222it [00:44, 229.65it/s]


Train Images shape: (9242, 299, 299, 3) size: 2,478,732,126
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.26639, saving model to 2020-05-07_dog_breed_model.h5




Epoch 2/10

Epoch 00002: val_loss did not improve from 0.26639
Epoch 3/10

Epoch 00003: val_loss improved from 0.26639 to 0.13373, saving model to 2020-05-07_dog_breed_model.h5
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.13373
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.13373
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.13373
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.13373
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.13373
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.13373
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.13373
Saved model to disk


In [16]:
from keras.models import load_model
from keras.preprocessing import image
import matplotlib.pyplot as plt
import numpy as np
import os
from google.colab import files

def predict_from_image(img_path):
    img = image.load_img(img_path, target_size=(299, 299))
    img_tensor = image.img_to_array(img) # (height, width, channels)
    img_tensor = np.expand_dims(img_tensor, axis=0) # (1, height, width, channels), add a dimension because the model expects this shape: (batch_size, height, width, channels)
    img_tensor /= 255.

    pred = model.predict(img_tensor)
    predicted_class = [np.argmax(pred)]

    return predicted_class

#img_path1 = '/content/Lakeland-Terrier.jpg'
#img_path2 = '/content/Golden_Retriever.jpg'
img_path3 = '/content/Dhole.jpg'
predict_from_image(img_path3)


[19 37 85 ...  3 75 28]


[36]