<a href="https://colab.research.google.com/github/lee-thien-tuyen/image-classification/blob/main/image_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Classification images: Clean/ Dirtyed road

## Load file dataset from github

In [None]:
! git clone https://github.com/lee-thien-tuyen/image-classification.git


## Import library

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import requests
from PIL import Image
from io import BytesIO

## Load data, formatting

In [None]:
dataset = pd.read_csv("image-classification/data_set/metadata.csv")
dataset.head(5)

In [None]:
dataset.info()

In [None]:
dataset.describe()

In [None]:
class_names = ('clean','dirty')
num_classes = len(class_names)

img_size = (128,128,3)
print(f'{num_classes} classes: {class_names}')
print("image size:",img_size)

In [None]:
dataset.iloc[8][1]

In [None]:
image_path = '/content/image-classification/data_set/Images/Images'
labels = []
images = []
for image in dataset.iloc:
    images.append(np.asarray(cv2.resize(cv2.imread(image_path + '/' + image[0], cv2.IMREAD_COLOR), img_size[0:2])[:, :, ::-1]))

    # labels will be in the form of a vector: [0, 1] or [1, 0] means that one hot coding
    label = np.zeros(num_classes)
    label[image[1]] = 1
    labels.append(label)

labels = np.asarray(labels)
images = np.asarray(images)
print("labels shape:", labels.shape)
print("images shape:", images.shape)

plt.imshow(images[1])

## Visualisation data

In [None]:
# Display 16 pictures from the dataset
fig, axs = plt.subplots(4, 4, figsize=(10, 10))

for x in range(4):
    for y in range(4):
        i = np.random.randint(0, len(images))

        axs[x][y].imshow(images[i])

        # delete x and y ticks and set x label as picture label
        axs[x][y].set_xticks([])
        axs[x][y].set_yticks([])
        axs[x][y].set_xlabel(class_names[np.argmax(labels[i])])

## Split Train/Validation

In [None]:
x_train,x_val,y_train,y_val = train_test_split(images, labels,test_size = 0.2,random_state = 42)

print(f"train images shape: {x_train.shape}\ntrain labels shape: {y_train.shape}\n")
print(f"validation images shape: {x_val.shape}\nvalidation labels shape: {y_val.shape}\n")

## Data augmentation

In [None]:
# ImageDataGenerator for train images
train_images_generator = tf.keras.preprocessing.image.ImageDataGenerator(shear_range=0.3,
                                                                         rotation_range=15,
                                                                         zoom_range=0.3,
                                                                         vertical_flip=True,
                                                                         horizontal_flip=True,rescale = 1./255)
train_images_generator = train_images_generator.flow(x_train,y_train)

# ImageDataGenerator for validation images
val_images_generator = tf.keras.preprocessing.image.ImageDataGenerator(shear_range=0.3,
                                                                         rotation_range=15,
                                                                         zoom_range=0.3,
                                                                         vertical_flip=True,
                                                                         horizontal_flip=True,rescale = 1./255)
val_images_generator = val_images_generator.flow(x_val,y_val)



## CNNs model

In [None]:

def get_model():
    cnn_model = tf.keras.Sequential([

    # Convolutional block 1 and Input
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same',input_shape = img_size[:]),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(pool_size=2),

    # Convolutional block 2
    tf.keras.layers.Conv2D(128, (2, 2), activation='relu', padding='same'),
    tf.keras.layers.Conv2D(128, (2, 2), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(pool_size=2),

    # Convolutional block 3
    tf.keras.layers.Conv2D(256, (2, 2), activation='relu', padding='same'),
    tf.keras.layers.Conv2D(256, (2, 2), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(pool_size=2),
    tf.keras.layers.GlobalAveragePooling2D(),

    # Dense block
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    # optimizer = tf.keras.optimizers.Adam(
    #                             learning_rate= 0.001,
    #                             beta_1 = 0.9,
    #                             beta_2 = 0.999,
    #                             epsilon = 1e-07,
    #                             name = 'Adam')

    cnn_model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return cnn_model

##Lottery Ticket Hypothesis Realizing
About lottery ticket hypotesis - https://arxiv.org/abs/1803.03635

In [None]:
# my lottery ticket hypotesis realization
min_loss = 10
for seed in np.linspace(1, 257654, 15).astype(int):
    tf.random.set_seed(seed)
    cnn_model = get_model()

    loss = cnn_model.fit(train_images_generator, epochs=1, verbose=1, steps_per_epoch=189).history['loss'][0]
    if loss < min_loss:
        min_loss = loss
        best_model = cnn_model

print(f'\n\nmin loss: {min_loss}', best_model.summary())

## Training and Results

In [None]:
# creating ModelChecpoint callback
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint('cnn_model/model{epoch:02d}')

history = best_model.fit(train_images_generator, epochs=200, verbose=1, validation_data=val_images_generator, callbacks=[checkpoint_callback])


minimum val_loss: 0.0273 - val_accuracy: 1.0000 | epoch 192


In [None]:
# loading best model
cnn_model = tf.keras.models.load_model('cnn_model/model195')

## Accuracy and Loss Plot

In [None]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(accuracy))

In [None]:
plt.figure()
plt.plot(epochs, accuracy, label='Training Accuracy')
plt.plot(epochs, loss, label='Training Loss')
plt.legend()
plt.title('Training Accuracy and Loss')

plt.figure()
plt.plot(epochs, val_accuracy, label='Validation Accuracy')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.legend()
plt.title('Validation Accuracy and Loss')

plt.show()

## Prediction

In [None]:
fig, axs = plt.subplots(12, 4, figsize=(12, 12))

i = 0
for x in range(12):
    for y in range(4):
        prediction = cnn_model.predict(x_val[i][None, ...], verbose=0)[0]

        axs[x][y].set_xticks([])
        axs[x][y].set_yticks([])

        if np.argmax(prediction) != np.argmax(y_val[i]):
            axs[x][y].set_xlabel(f'prediction: {class_names[np.argmax(prediction)]} | label: {class_names[np.argmax(y_val[i])]}', color='red')
        else:
            axs[x][y].set_xlabel(f'prediction: {class_names[np.argmax(prediction)]} | label: {class_names[np.argmax(y_val[i])]}')

        axs[x][y].imshow(x_val[i])

        i += 1
plt.show()

In [None]:
cnn_model.save_weights('cleanroad_model.h5')