In [None]:
pip install --upgrade matplotlib

In [8]:
import os

os.environ["KERAS_BACKEND"] = "jax" 

import keras
from keras import layers
#from keras.backend import ops
import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt

In [3]:
classes = 100
input_shape = (32, 32, 3)

(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()

print(f"x_train shape: {x_train.shape}", f"y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape}", f"y_test shape: {y_test.shape}")

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
x_train shape: (50000, 32, 32, 3) y_train shape: (50000, 1)
x_test shape: (10000, 32, 32, 3) y_test shape: (10000, 1)


In [4]:
learning_rate = 1e-3
batch_size = 128
epochs = 20
image_size = 72
patch_size = 6 #this is the size of the patches extracted from the images, help in the generation of the embeddings and the attention maps
num_patches = (image_size // patch_size) ** 2 #this is the number of patches extracted from the images, help in the generation of the embeddings and the attention maps
projection_dim = 64 #this is the dimension of the embeddings
num_heads = 4 #this is the number of heads in the multi-head attention
transformer_units = [
    projection_dim * 2, #this is the size of the transformer layers
    projection_dim, #this is the size of the transformer layers
]  # Size of the transformer layers

mlp_head_units = [2048, 1024] #this is the size of the MLP head



## What is Data augmentation?

>Ans: Data augmentation is a technique to artificially create new training data from existing training data. This is done by applying domain-specific techniques to examples from the training data that create new and different training examples. This means that the training dataset can be artificially expanded by creating transformed versions of images in the dataset. Training deep learning neural network models on more data can result in more skillful models, and the augmentation techniques can create variations of the images that can improve the ability of the fit models to generalize what they have learned to new images.

In [6]:
data_augmentation = keras.Sequential(
    [
        layers.Normalization(), #this is the normalization layer
        layers.Resizing(image_size, image_size), #we resize the images to the size of the patches
        layers.RandomFlip("horizontal"), #we flip the images horizontally
        layers.RandomRotation(factor=0.02), #random rotation of the images
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2 #we do this to zoom in and out of the images
        ),
    ],
    name="data_augmentation", #naming the layer
)


## What is Multi-layer perceptron?

>Ans: MLP (multi-layer perceptron) is a feedforward artificial neural network that has one or more hidden layers between the input and output layers.

In [7]:
def mlp(x, hidden_units, dropout_rate): #this is the MLP layer
    for units in hidden_units: #we iterate through the hidden units
        x = layers.Dense(units, activation=tf.nn.gelu)(x) #we use the GELU activation function
        x = layers.Dropout(dropout_rate)(x) #we use dropout to prevent overfitting
    return x

### why we need to reshape the patches?
>Ans: we need to reshape the patches because we need to flatten the patches to feed them into the transformer encoder, the transformer encoder takes a 2D tensor as input, so we need to flatten the patches to feed them into the transformer encoder

In [9]:
class Patches(layers.Layer):
    def __init__(self, patch_size): 
        super(Patches, self).__init__() #we call the super class
        self.patch_size = patch_size

    def call(self, images): 
        input_shape = tf.shape(images) 
        batch_size = input_shape[0]
        hight = input_shape[1]
        width = input_shape[2] 
        num_channels = input_shape[3] #this is the number of channels of the images
        num_patches_h = hight // self.patch_size 
        num_patches_w = width // self.patch_size 
        patches = keras.image.extrat_patches(images, size = self.patch_size)
        patches = tf.reshape(
            patches, 
                    (
                        batch_size, 
                        num_patches_h * num_patches_w, 
                        self.patch_size * self.patch_size * num_channels
                    )
                ),
        return patches
    
    def get_config(self): #this is used to get the configuration of the layer
        config = super(Patches, self).get_config()
        config.update({"patch_size": self.patch_size})
        return config


### explaining above code in brief.

1. we get the shape of the images
2. we get the batch size
3. we get the height of the images
4. we get the width of the images
5. this is the number of channels of the images
6. this is used to get the number of patches in the height
7. this is used to get the number of patches in the width
8. we reshape the patches
9. we return the patches

In [None]:
plt.figure(figsize=(4, 4))
image = x_train[np.random.choice(range(x_train.shape[0]))]
plt.imshow(image.astype("uint8"))
plt.axis("off")