In [1]:
import tensorflow as tf
import tensorflow_hub as hub

from tensorflow import keras

from keras import models
from keras import layers
from keras import callbacks
from keras import optimizers

Using TensorFlow backend.


In [2]:
tf.__version__

'1.15.3'

In [3]:
import os, cv2, math
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [4]:
import logging

logger = tf.get_logger()
logger.setLevel(logging.ERROR)

import warnings

warnings.simplefilter('ignore')

In [5]:
exec_path = os.getcwd()
data_path = '/home/naivoder/hd/kaggle/datasets/dog-breed-identification/'

labels_path = os.path.join(data_path, 'labels.csv')

train_folder = os.path.join(data_path, 'train/')
test_folder = os.path.join(data_path, 'test/')

df = pd.read_csv(labels_path)

In [6]:
num_train = len(next(os.walk(train_folder))[2])
num_test = len(next(os.walk(test_folder))[2])

print("Training images:", num_train)
print("Testing images:", num_test)

Training images: 10222
Testing images: 10357


In [7]:
df.describe()

Unnamed: 0,id,breed
count,10222,10222
unique,10222,120
top,49cdf9df49fbf1068c791dcecbb2031f,scottish_deerhound
freq,1,126


In [8]:
breed = set(df['breed'])

num_images = len(df)
num_classes = len(breed)

class_to_num = dict(zip(breed, range(num_classes)))
num_to_class = dict(zip(range(num_classes), breed))

In [9]:
width = 224
batch_size = 8
epochs = 500
drop_out = 0.5

In [10]:
rgb = True
num_channels = 3 if rgb else 1

In [11]:
images = np.zeros((num_train, width, width, num_channels), dtype=np.float16)
labels = np.zeros((num_train, num_classes), dtype=np.int8)

for i in tqdm(range(num_images)):
    images[i] = cv2.resize(cv2.imread(train_folder + '%s.jpg' % df['id'][i]), (width, width))
    labels[i][class_to_num[df['breed'][i]]] = 1

100%|██████████| 10222/10222 [00:28<00:00, 364.42it/s]


In [12]:
from sklearn.model_selection import train_test_split
x_train, y_train, x_test, y_test = train_test_split(images, labels, test_size=0.2)

In [23]:
from keras.preprocessing.image import ImageDataGenerator

traingen = ImageDataGenerator(rescale=1./255)
train_data = traingen.flow(x_train, y_train)

ValueError: `x` (images tensor) and `y` (labels) should have the same length. Found: x.shape = (8177, 224, 224, 3), y.shape = (2045, 224, 224, 3)

In [11]:
inputs = layers.Input((224,224,3))

x = layers.Conv2D(filters=18, kernel_size=(3,3), padding='same')(inputs)
x = layers.Dropout(drop_out)(x)
x = layers.ReLU()(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)

x = layers.Conv2D(filters=36, kernel_size=(3,3), padding='same')(x)
x = layers.Dropout(drop_out)(x)
x = layers.ReLU()(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)

x = layers.Conv2D(filters=64, kernel_size=(3,3), padding='same')(x)
x = layers.Dropout(drop_out)(x)
x = layers.ReLU()(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)

x = layers.Conv2D(filters=128, kernel_size=(3,3), padding='same')(x)
x = layers.Dropout(drop_out)(x)
x = layers.ReLU()(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)

x = layers.Conv2D(filters=256, kernel_size=(3,3), padding='same')(x)
x = layers.Dropout(drop_out)(x)
x = layers.ReLU()(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)

x = layers.Conv2D(filters=512, kernel_size=(3,3), padding='same')(x)
x = layers.Dropout(drop_out)(x)
x = layers.ReLU()(x)
x = layers.Dense(1028, activation='relu')(x)
x = layers.Flatten()(x)

predictions = layers.Dense(num_classes, activation='softmax')(x)

model = models.Model(inputs=inputs, outputs=predictions)

In [12]:
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 224, 224, 18)      504       
_________________________________________________________________
dropout_1 (Dropout)          (None, 224, 224, 18)      0         
_________________________________________________________________
re_lu_1 (ReLU)               (None, 224, 224, 18)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 112, 112, 18)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 112, 112, 36)      5868      
_________________________________________________________________
dropout_2 (Dropout)          (None, 112, 112, 36)      0   

In [13]:
weight_path = 'dogs_weights.h5'

completion = callbacks.EarlyStopping(monitor='val_acc', verbose=1, patience=50)
checkpoint = callbacks.ModelCheckpoint(weight_path, monitor='val_acc', verbose=1, save_best_only=True)

callbacks_list = [completion, checkpoint]

In [14]:
model.compile(optimizer=optimizers.Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['acc'])

In [15]:
history = model.fit(images,
                    labels,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_split=0.2,
                    callbacks=callbacks_list)

Train on 8177 samples, validate on 2045 samples
Epoch 1/500

Epoch 00001: val_acc improved from -inf to 0.01076, saving model to dogs_weights.h5
Epoch 2/500

Epoch 00002: val_acc did not improve from 0.01076
Epoch 3/500

Epoch 00003: val_acc did not improve from 0.01076
Epoch 4/500

Epoch 00004: val_acc did not improve from 0.01076
Epoch 5/500

Epoch 00005: val_acc improved from 0.01076 to 0.01125, saving model to dogs_weights.h5
Epoch 6/500

Epoch 00006: val_acc improved from 0.01125 to 0.01174, saving model to dogs_weights.h5
Epoch 7/500

Epoch 00007: val_acc did not improve from 0.01174
Epoch 8/500

Epoch 00008: val_acc did not improve from 0.01174
Epoch 9/500

Epoch 00009: val_acc did not improve from 0.01174
Epoch 10/500

Epoch 00010: val_acc did not improve from 0.01174
Epoch 11/500

Epoch 00011: val_acc did not improve from 0.01174
Epoch 12/500

Epoch 00012: val_acc improved from 0.01174 to 0.01418, saving model to dogs_weights.h5
Epoch 13/500

Epoch 00013: val_acc did not impro

KeyboardInterrupt: 

In [None]:
test_images = np.zeros((num_test, width, width, num_channels), dtype=np.float16)
test_labels = np.zeros((num_test, num_classes), dtype=np.int8)

for i in tqdm(range(num_images)):
    test_images[i] = cv2.resize(cv2.imread(test_folder + '%s.jpg' % df['id'][i]), (width, width))
    test_labels[i][class_to_num[df['breed'][i]]] = 1
    
test_data = (test_images, test_labels)

In [None]:
model.load_weights("dogs_weights.h5")

results = model.evaluate(test_data)
dict(zip(model.metrics_names, results))