# Dog breed classification

## Libraries

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import random
random.seed(0)

import warnings
warnings.filterwarnings('ignore')

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Let's extract train and test zip files

In [None]:
path = "/content/drive/My Drive/Computer Vision/cvweek2data/"

In [None]:
from zipfile import ZipFile
with ZipFile(path+"train.zip","r") as z:
  z.extractall()

In [None]:
from zipfile import ZipFile
with ZipFile(path+"test.zip","r") as z:
  z.extractall()

In [None]:
from zipfile import ZipFile
with ZipFile(path+"sample_submission.csv.zip","r") as z:
  z.extractall()

In [None]:
from zipfile import ZipFile
with ZipFile(path+"labels.csv.zip","r") as z:
  z.extractall()

In [None]:
!ls

drive  labels.csv  sample_data	sample_submission.csv  test  train


In [None]:
labels = pd.read_csv("./labels.csv")
labels.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


We will now look at the distribution of the breeds

In [None]:
labels['breed'].value_counts()

scottish_deerhound      126
maltese_dog             117
afghan_hound            116
entlebucher             115
bernese_mountain_dog    114
                       ... 
brabancon_griffon        67
komondor                 67
golden_retriever         67
eskimo_dog               66
briard                   66
Name: breed, Length: 120, dtype: int64

We will now use label encoder and to categorical funtion

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(labels['breed'])
print(integer_encoded)

[19 37 85 ...  3 75 28]


In [None]:
import tensorflow
y = tensorflow.keras.utils.to_categorical(integer_encoded,dtype='int')

In [None]:
y[0]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
from tqdm import tqdm
import cv2
x_feature = []
y_feature = []

i = 0
for f,img in tqdm(labels.values):
  train_img = cv2.imread('./train/{}.jpg'.format(f),0)
  label = y[i]
  train_img_resize = cv2.resize(train_img,(128,128))
  x_feature.append(train_img_resize)
  y_feature.append(label)
  i+=1

100%|██████████| 10222/10222 [00:16<00:00, 624.12it/s]


In [None]:
x_train_data = np.array(x_feature,np.float32)/255.
print(x_train_data.shape)

(10222, 128, 128)


In [None]:
x_train_data = np.expand_dims(x_train_data,axis=3)
print(x_train_data.shape)

(10222, 128, 128, 1)


In [None]:
y_train_data = np.array(y_feature)
print(y_train_data.shape)

(10222, 120)


Let's split the data into train and test

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(x_train_data, y_train_data, test_size=0.2, random_state=2)
print(x_train.shape)
print(x_val.shape)

(8177, 128, 128, 1)
(2045, 128, 128, 1)


Let's import the relevant libraries

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Conv2D,MaxPooling2D,Flatten,GlobalAveragePooling2D

Let's now define model and layers

In [None]:
img_rows = 128
img_cols = 128
num_channel = 1

In [None]:
model = Sequential()

model.add(Conv2D(32,kernel_size=3,activation='relu',input_shape=(img_rows,img_cols,num_channel)))
model.add(MaxPooling2D(pool_size=3))
model.add(Dropout(0.2))

model.add(Conv2D(64,kernel_size=3,activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(128,kernel_size=3,activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(128,kernel_size=3,activation='relu',padding='same'))


model.add(Flatten())

model.add(Dense(500,activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(120,activation='softmax'))

Let's compile and summarize the model

In [None]:
from tensorflow.keras import regularizers,optimizers

optimizer = optimizers.Adam(lr=0.01)
model.compile(optimizer=optimizer,loss="categorical_crossentropy",metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 126, 126, 32)      320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 42, 42, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 42, 42, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 42, 42, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 21, 21, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 21, 21, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 21, 21, 128)       7

Let's fit the model

In [None]:
model.fit(x_train,y_train,batch_size=128,epochs=20,validation_data=(x_val,y_val))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f36ec613b90>

Let's now use Transfer Learning

In [None]:
from keras.applications.vgg16 import VGG16,preprocess_input

In [None]:
base_model = VGG16(weights=(path+"vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5"),include_top=False,pooling='avg')
base_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

Let's import data once again

In [None]:
from tqdm import tqdm
import cv2
x_feature = []
y_feature = []

i = 0
for f,img in tqdm(labels.values):
  train_img = cv2.imread("./train/{}.jpg".format(f),1)
  label = y[i]
  train_image_resize = cv2.resize(train_img,(128,128))
  x_feature.append(train_image_resize)
  y_feature.append(label)
  i += 1

100%|██████████| 10222/10222 [00:33<00:00, 307.57it/s]


In [None]:
import tensorflow

from tensorflow.keras.preprocessing import image

In [None]:
X_arr = []

for f,label in tqdm(labels.values):
  X_arr.append(base_model.predict(preprocess_input(np.expand_dims(image.img_to_array(image.load_img('./train/'+f+'.jpg',target_size=[128,128])),axis=0)))[0])

X = pd.DataFrame(X_arr)

100%|██████████| 10222/10222 [38:44<00:00,  4.40it/s]


In [None]:
X_train = X
y_train = y_feature

In [None]:
from keras.layers import Activation
model = Sequential([
    Dense(1024, input_shape=(512,)),
    Activation('relu'),
    Dense(256, input_shape=(512,)),
    Activation('relu'),
    Dense(120),
    Activation('softmax'),
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.fit(x=X_train,y=np.asarray(y_train),epochs=100,batch_size=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f870dc81e50>