## Download the dataset
Getting dataset from Kaggle requires kaggle.json file. Obtain it from the Account page.

In [6]:
%mkdir /root/.kaggle/

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [8]:
%cp kaggle.json /root/.kaggle/

In [9]:
!kaggle datasets download -d mengcius/cinic10

Downloading cinic10.zip to /content
 99% 750M/754M [00:08<00:00, 57.6MB/s]
100% 754M/754M [00:09<00:00, 87.7MB/s]


In [10]:
import shutil
shutil.unpack_archive("cinic10.zip", "/content")

## Read the dataset

In [1]:
%matplotlib inline
import numpy as np
import cv2
import os
import random
from tqdm import tqdm
from keras.layers import Dropout, Flatten, Dense, Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.applications.vgg16 import VGG16
from keras.models import Sequential, Model
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from tensorflow.keras.applications.resnet50 import ResNet50

In [2]:
# Hyperparameters
image_size = 32  # for VGG and Resnet, assign 224
color_channel = 3

input_shape = (image_size, image_size, color_channel)

traindir = "/content/train"
testdir = "/content/test"

In [3]:
# get all classes from dataset
categories = []
for dir in os.listdir(traindir):
  categories.append(dir)
categories

['automobile',
 'airplane',
 'bird',
 'ship',
 'dog',
 'deer',
 'truck',
 'frog',
 'cat',
 'horse']

In [4]:
# VGG and Resnet requires 224x224 images, therefore more memory
# will be used. To save memory, maximum max_images images for each class 
# can be used
max_images = 300 
iterations = 0

train_data, test_data = [], []

# read train and test datasets
for category in categories:

    train_path = os.path.join(traindir, category)  
    test_path = os.path.join(testdir, category)

    category_index = categories.index(category)

    # iterate through train images
    for img in tqdm(os.listdir(train_path)):

      # set limit for maximum number of images
      # due to memory limitations
      #iterations += 1
      #if iterations > max_images:
      #  break
      
      image_data = cv2.imread(os.path.join(train_path, img))
      # resized = cv2.resize(image_data, (image_size, image_size)) 
      train_data.append([image_data, category_index])

    iterations = 0

    # iterate through test images
    for img in tqdm(os.listdir(test_path)):

      # set limit for maximum number of images
      # due to memory limitations
      #iterations += 1
      #if iterations > max_images:
      #  break

      image_data = cv2.imread(os.path.join(test_path, img))
      # resized = cv2.resize(image_data, (image_size, image_size)) 
      test_data.append([image_data, category_index])
    iterations = 0

print('\n')
print('Training data length:', len(train_data))
print('Test data length:', len(test_data))

100%|██████████| 9000/9000 [00:00<00:00, 9746.64it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9600.08it/s]
100%|██████████| 9000/9000 [00:01<00:00, 8903.22it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9248.29it/s]
100%|██████████| 9000/9000 [00:01<00:00, 7916.27it/s]
100%|██████████| 9000/9000 [00:01<00:00, 7170.13it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9473.79it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9666.64it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9712.02it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9310.45it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9524.63it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9363.18it/s]
100%|██████████| 9000/9000 [00:00<00:00, 10185.45it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9812.12it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9291.11it/s]
100%|██████████| 9000/9000 [00:01<00:00, 8389.63it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9600.16it/s]
100%|██████████| 9000/9000 [00:00<00:00, 9379.84it/s]
100%|██████████| 9000/9000 



Training data length: 90000
Test data length: 90000





In [5]:
# shuffle the data
random.shuffle(train_data)
for sample in train_data[:10]:
  print(sample[1])

3
4
2
0
4
3
4
2
0
4


In [6]:
X_train, X_test, y_train, y_test = [], [], [], []

for features, label in train_data:
    X_train.append(features)
    y_train.append(label)

for features, label in test_data:
    X_test.append(features)
    y_test.append(label)

# reshape new ndarrays to desired image size
X_train = np.array(X_train).reshape(-1, image_size, image_size, color_channel)
X_test = np.array(X_test).reshape(-1, image_size, image_size, color_channel)

In [7]:
# clear variables from colab to save memory
%reset_selective -f "^train_data$"
%reset_selective -f "^test_data$"

In [8]:
# Normalization
X_train = np.array(X_train/255.0)
# X_test will be normalized later

y_train = np.array(y_train)
y_test = np.array(y_test)

# one-hot encoding
y_train = to_categorical(y_train, len(categories))
y_test = to_categorical(y_test, len(categories))

print('X_train:', X_train.shape)
print('X_test:', X_test.shape)
print('y_train:', y_train.shape)
print('y_test:', y_test.shape)

X_train: (90000, 32, 32, 3)
X_test: (90000, 32, 32, 3)
y_train: (90000, 10)
y_test: (90000, 10)


## Models

### CNN Model

In [22]:
model = Sequential()

model.add(Conv2D(50, kernel_size=(3, 3), kernel_initializer='GlorotNormal', 
                 input_shape=input_shape, activation='relu'))

model.add(Conv2D(75, kernel_size=(3, 3), kernel_initializer='GlorotNormal', 
                 activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(125, kernel_size=(3, 3), kernel_initializer='GlorotNormal', 
                 activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Flatten())

model.add(Dense(len(categories), activation="softmax"))

model.compile(loss='categorical_crossentropy', 
              optimizer="adam", 
              metrics=['accuracy'])

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 30, 30, 50)        1400      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 28, 28, 75)        33825     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 14, 14, 75)        0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 14, 14, 75)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 12, 12, 125)       84500     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 6, 6, 125)         0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 6, 6, 125)        

In [23]:
model.fit(X_train, y_train, batch_size=128,  epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f40b54fc8d0>

In [25]:
# Normalize the test data
X_test = np.array(X_test/255.0)

In [26]:
# Calculate prediction
y_pred = model.predict(X_test)
y_pred_indices = np.argmax(y_pred, axis = 1)
y_test_indices = np.argmax(y_test, axis = 1)

print(confusion_matrix(y_test_indices, y_pred_indices))
print(classification_report(y_test_indices, y_pred_indices))

[[6381  309   69  534   31   24 1454   29   59  110]
 [ 202 6753  365 1125   21   54  255   30   69  126]
 [  80  551 5582  616  236  426   62  634  578  235]
 [ 250  665  265 7123   47   63  309   71  121   86]
 [ 136  239  916  450 3055  855  140  247 2022  940]
 [  54  283  939  445  531 4235  113  316  896 1188]
 [1559  320   68  609   41   40 6125   21   86  131]
 [  55   72  794  235  114  184   35 6576  862   73]
 [  75  176  898  425  813  548  129  644 4950  342]
 [  93  226  350  258  385  515  179   25  402 6567]]
              precision    recall  f1-score   support

           0       0.72      0.71      0.71      9000
           1       0.70      0.75      0.73      9000
           2       0.54      0.62      0.58      9000
           3       0.60      0.79      0.68      9000
           4       0.58      0.34      0.43      9000
           5       0.61      0.47      0.53      9000
           6       0.70      0.68      0.69      9000
           7       0.77      0.73   

### VGG-16

This model requires input shape of 224x224x3, therefore resize the images before proceeding.

In [10]:
model = VGG16(weights="imagenet")

# remove output layer
model = Model(inputs=model.inputs, 
                  outputs=model.layers[-2].output)

# add 1 fully-connected and 1 prediction layer
fc = Dense(1024, activation='relu', name='fc')(model.layers[-2].output)
pred = Dense(len(categories), activation='softmax', name='prediction')(fc)

# create new model with VGG16 and our custom layers
myModel = Model(model.input, pred)

# make only last 4 layers trainable
for i in range(0, 19):
  myModel.layers[i].trainable=False

myModel.compile(optimizer='adam', 
                loss='categorical_crossentropy',
                metrics=["accuracy"])

myModel.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0   

In [None]:
myModel.fit(X_train, y_train, batch_size=32, epochs=10)

In [None]:
# clear varibales from colab to save memory
%reset_selective -f "^X_train$"
%reset_selective -f "^y_train$"

In [None]:
# Normalize the test data
X_test = np.array(X_test/255.0)

In [None]:
# Calculate prediction with tests data
y_pred = model.predict(X_test)
y_pred_indices = np.argmax(y_pred, axis = 1)
y_test_indices = np.argmax(y_test, axis = 1)

print(confusion_matrix(y_test_indices, y_pred_indices))
print(classification_report(y_test_indices, y_pred_indices))

### Resnet-50

This model also requires input shape of 224x224x3, therefore resize the images to the specified size before training.

In [None]:
model = ResNet50(weights='imagenet')

# add 1 fully-connected and 1 prediction
fc = Dense(1024, name='fc')(model.layers[-2].output)
pred = Dense(len(categories), activation='softmax', name='prediction')(fc)

# create new model with VGG16 and our custom layers
myModel = Model(model.input, pred)

# make resnet layers untrainable
# only last one will be trainable
for i in range(0, 175):
  myModel.layers[i].trainable=False

myModel.compile(optimizer='adam', 
                loss='categorical_crossentropy',
                metrics=["accuracy"])

myModel.summary()

In [None]:
myModel.fit(X_train, y_train, batch_size=32, epochs=10)

In [None]:
# clear varibales from colab to save memory
%reset_selective -f "^X_train$"
%reset_selective -f "^y_train$"

In [None]:
# Normalize the test data
X_test = np.array(X_test/255.0)

In [None]:
# Calculate prediction with tests data
y_pred = model.predict(X_test)
y_pred_indices = np.argmax(y_pred, axis = 1)
y_test_indices = np.argmax(y_test, axis = 1)

print(confusion_matrix(y_test_indices, y_pred_indices))
print(classification_report(y_test_indices, y_pred_indices))