# Image Classification with Convolutional Neural Networks

In [None]:
#@title Import the packages if not installed (Optional if the Colab throws error while importing packages)
# !pip install -q tensorflow
# !pip install -q tqdm

## Import dependencies and packages


In [None]:
import tensorflow as tf

In [None]:
# Import TensorFlow Datasets
import tensorflow_datasets as tfds
tfds.disable_progress_bar()
# Helper libraries
import math
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import logging
logger = tf.get_logger()
logger.setLevel(logging.ERROR)

## Import the Fashion MNIST dataset

Using Fashion MNIST dataset, which contains 70,000 grayscale images in 10 categories. The images show individual articles of clothing at low resolution (28 $\times$ 28 pixels), as seen here:

<table>
  <tr><td>
    <img src="https://tensorflow.org/images/fashion-mnist-sprite.png"
         alt="Fashion MNIST sprite" width="600">
  </td></tr>
  
</table>



**Added a split to the training dataset, reserving 10% for use during validation**

* The model is trained using `train_dataset`.
* The model validates as it is being trained using the `validation_dataset`
* The model is tested against `test_dataset`.

In [None]:
dataset, metadata = tfds.load('fashion_mnist', as_supervised=True, with_info=True, split=['train[:90%]','train[90%:]', 'test'])
train_dataset, validation_dataset, test_dataset = dataset



The images are 28 $\times$ 28 arrays, with pixel values in the range `[0, 255]`. The *labels* are an array of integers, in the range `[0, 9]`. These correspond to the *class* of clothing the image represents:

<table>
  <tr>
    <th>Label</th>
    <th>Class</th>
  </tr>
  <tr>
    <td>0</td>
    <td>T-shirt/top</td>
  </tr>
  <tr>
    <td>1</td>
    <td>Trouser</td>
  </tr>
    <tr>
    <td>2</td>
    <td>Pullover</td>
  </tr>
    <tr>
    <td>3</td>
    <td>Dress</td>
  </tr>
    <tr>
    <td>4</td>
    <td>Coat</td>
  </tr>
    <tr>
    <td>5</td>
    <td>Sandal</td>
  </tr>
    <tr>
    <td>6</td>
    <td>Shirt</td>
  </tr>
    <tr>
    <td>7</td>
    <td>Sneaker</td>
  </tr>
    <tr>
    <td>8</td>
    <td>Bag</td>
  </tr>
    <tr>
    <td>9</td>
    <td>Ankle boot</td>
  </tr>
</table>

Each image is mapped to a single label. Since the *class names* are not included with the dataset, store them here to use later when plotting the images:

In [None]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal',      'Shirt',   'Sneaker',  'Bag',   'Ankle boot']

### Data Exploration before training

This shows there are 54,000 images in the training set, 6000 images in the validation set, and 10000 images in the test set:

In [None]:
num_train_examples = len(train_dataset)
num_validation_examples = len(validation_dataset)
num_test_examples = len(test_dataset)
print("Number of training examples: {}".format(num_train_examples))
print("Number of validation examples: {}".format(num_validation_examples))
print("Number of test examples:     {}".format(num_test_examples))

## Data Pre-processing

The value of each pixel in the image data is an integer in the range `[0,255]`. For the model to work properly, these values need to be normalized to the range `[0,1]`.

The `normalize` function is used for preprocessing images and their corresponding labels, where we are using `tf.cast` which converts the pixel values of the images to tf.float32 data type (32-bit floating-point numbers). This ensures the data type is compatible with TensorFlow operations that typically work with floating-point numbers for numerical stability.

In [None]:
def normalize(images, labels):
  images = tf.cast(images, tf.float32)
  images /= 255
  return images, labels

# The map function applies the normalize function to each element in the train and test datasets
train_dataset =  train_dataset.map(normalize)
validation_dataset = validation_dataset.map(normalize)
test_dataset  =  test_dataset.map(normalize)

# For the first time, the data will be loaded from disk, so caching will keep them in memory, making training faster
train_dataset =  train_dataset.cache()
validation_dataset = validation_dataset.cache()
test_dataset  =  test_dataset.cache()

### Visualizing the processed data



In [None]:
# Take a single image, and remove the color dimension by reshaping
for image, label in test_dataset.take(1):
  break
image = image.numpy().reshape((28,28))

plt.figure()
plt.imshow(image, cmap=plt.cm.binary)
plt.colorbar()
plt.grid(False)
plt.show()

### For visualizing first 10 images from the *training set* and display the class name below each image

In [None]:
plt.figure(figsize=(10,10))
i = 0
for (image, label) in test_dataset.take(10):
    image = image.numpy().reshape((28,28))
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(image, cmap=plt.cm.binary)
    plt.xlabel(class_names[label])
    i += 1
plt.show()

## Building the neural network requires configuring the layers of the model, then compiling the model

### Setting up the layers for network

**Our network layers configuration:**
* 2D Convolution layer - 32 filters, 3x3 kernel, ReLU activation, padding with same values
* Max pooling layer - 2x2 kernel, 2 stride
* 2D Convolution layer - 64 filters, 3x3 kernel, ReLU activation, padding with same values
* Max pooling layer - 2x2 kernel, 2 stride
* Flatten layer
* Dense layer - 128 nodes output, ReLU activation
* Dense layer - 10 nodes output, Softmax activation

<font color='orange'><b>Stride controls how much the filter moves across the image in each step.</b></font>
<ul>
    <li><font color='orange'><b>Stride of 1</b>: The filter moves one pixel at a time, creating a detailed output.</font></li>
    <li><font color='orange'><b>Stride of 2</b>: The filter skips every other pixel, making the output smaller and faster to compute.</font></li>
</ul>




In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), padding='same', activation=tf.nn.relu,
                           input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPool2D((2, 2), strides=2),
    tf.keras.layers.Conv2D(64, (3,3), padding='same', activation=tf.nn.relu),
    tf.keras.layers.MaxPool2D((2, 2), strides=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(10, activation='softmax')
])

### Using `Model.compile` to compile the model

**The following settings have been implemented**
* *Loss function* — SparseCategoricalCrossentropy
* *Optimizer* — Adam
* *Metrics* — accuracy


In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

## Using `Model.fit` to train the model

Training is performed by calling the `model.fit` method,
1. Feed the training data to the model using `train_dataset`.
2. The model learns to associate images and labels.
3. The `epochs=10` parameter limits training to 10 full iterations of the training dataset


In [None]:
BATCH_SIZE = 32
train_dataset = train_dataset.cache().shuffle(num_train_examples).batch(BATCH_SIZE)
validation_dataset = validation_dataset.cache().batch(BATCH_SIZE)
test_dataset = test_dataset.cache().batch(BATCH_SIZE)

In [None]:
model.fit(train_dataset, epochs=10, validation_data=validation_dataset)

## Evaluate accuracy with `Model.evaluate`

Comparing how the model performs on the test dataset.

In [None]:
test_loss, test_accuracy = model.evaluate(test_dataset, steps=math.ceil(num_test_examples/32))
print('Accuracy on test dataset:', test_accuracy)

## The model has been trained, now, to make predictions about some images

In [None]:
for test_images, test_labels in test_dataset.take(1):
  test_images = test_images.numpy()
  test_labels = test_labels.numpy()
  predictions = model.predict(test_images)

In [None]:
predictions.shape


The model has predicted the label for each image in the testing set, showing for the first prediction:

In [None]:
predictions[0]

A prediction is an array of 10 numbers, to see which label has the highest confidence value:

In [None]:
np.argmax(predictions[0])

So the model is usually most confident that this image is a coat, or `class_names[4]`.

In [None]:
test_labels[0]

Visualize full set of 10 class predictions

In [None]:
def plot_image(i, predictions_array, true_labels, images):
  predictions_array, true_label, img = predictions_array[i], true_labels[i], images[i]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])

  plt.imshow(img[...,0], cmap=plt.cm.binary)

  predicted_label = np.argmax(predictions_array)
  if predicted_label == true_label:
    color = 'blue'
  else:
    color = 'red'

  plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(predictions_array),
                                class_names[true_label]),
                                color=color)

def plot_value_array(i, predictions_array, true_label):
  predictions_array, true_label = predictions_array[i], true_label[i]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])
  thisplot = plt.bar(range(10), predictions_array, color="#777777")
  plt.ylim([0, 1])
  predicted_label = np.argmax(predictions_array)

  thisplot[predicted_label].set_color('red')
  thisplot[true_label].set_color('blue')

Display 0th image, predictions, and prediction array.

In [None]:
i = 0
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions, test_labels, test_images)
plt.subplot(1,2,2)
plot_value_array(i, predictions, test_labels)

In [None]:
i = 12
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions, test_labels, test_images)
plt.subplot(1,2,2)
plot_value_array(i, predictions, test_labels)

## Visualize several images with their prediction
 Correct prediction labels are blue and incorrect prediction labels are red.

In [None]:
# Plot the test images, their predicted label, and the true label
# Color correct predictions in blue, incorrect predictions in red
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
  plt.subplot(num_rows, 2*num_cols, 2*i+1)
  plot_image(i, predictions, test_labels, test_images)
  plt.subplot(num_rows, 2*num_cols, 2*i+2)
  plot_value_array(i, predictions, test_labels)


Using the trained model to make prediction for one image

In [None]:
# Grab an image from the test dataset
img = test_images[0]

print(img.shape)

In [None]:
# Add the image to a batch where it's the only member.
img = np.array([img])

print(img.shape)

Prediction:

In [None]:
predictions_single = model.predict(img)

print(predictions_single)

In [None]:
plot_value_array(0, predictions_single, test_labels)
_ = plt.xticks(range(10), class_names, rotation=45)

`model.predict` returns a list of lists, one for each image in the batch of data, check the predictions for the single image in the batch:

In [None]:
np.argmax(predictions_single[0])

The model predicts a label of 4 (coat).

## **Simple Image Classification is done ✨ Now, experiment by making the network complex or different data and enjoy !!😎🙃**