In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import tensorflow as tf
import numpy

In [None]:
num_classes = 5
input_shape = (32,32,3)

dataset = tf.keras.preprocessing.image_dataset_from_directory("/content/gdrive/MyDrive/flower_photos")
train = tf.keras.preprocessing.image_dataset_from_directory("/content/gdrive/MyDrive/flower_photos",
                                                            validation_split=0.2,
                                                            labels='inferred',
                                                            subset="training",
                                                            image_size= (72,72),
                                                            batch_size = 200,
                                                            seed=123)
validation = tf.keras.preprocessing.image_dataset_from_directory("/content/gdrive/MyDrive/flower_photos",
                                                                 validation_split = 0.2,
                                                                 labels="inferred",
                                                                 subset="validation",
                                                                 batch_size = 200,
                                                                 image_size = (72,72),
                                                                 seed=123)
class_names = dataset.class_names
print(class_names)


Found 3670 files belonging to 5 classes.
Found 3670 files belonging to 5 classes.
Using 2936 files for training.
Found 3670 files belonging to 5 classes.
Using 734 files for validation.
['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']


In [None]:
'''train_iterator = train.as_numpy_iterator()
train_ds = train_iterator.next()
print(train_ds[0].shape)
validation_iterator = validation.as_numpy_iterator()
validation_ds = validation_iterator.next()'''

In [None]:
image_size = 72
preprocessingModel = tf.keras.layers.Rescaling(1./255)

augmentedModel = tf.keras.Sequential(
    [
        tf.keras.layers.experimental.preprocessing.RandomTranslation(0.1, 0.1),
        tf.keras.layers.experimental.preprocessing.RandomFlip("vertical"),
        tf.keras.layers.experimental.preprocessing.RandomRotation(factor = 0.02),
        tf.keras.layers.experimental.preprocessing.RandomZoom(
              height_factor = 0.2,width_factor = 0.2
        ),
    ]
)

In [None]:
train = train.map(lambda x,y:(preprocessingModel(x),y))
validation = validation.map(lambda x,y:(preprocessingModel(x),y))
train = train.map(lambda x,y:(augmentedModel(x),y))

In [None]:
train = train.prefetch(tf.data.AUTOTUNE)
validation = validation.prefetch(tf.data.AUTOTUNE)

In [None]:
class PatchEmbedding(tf.keras.layers.Layer):
  def __init__(self , size , num_of_patches , projection_dim):
    super().__init__()

    self.size=size
    self.num_of_patches= num_of_patches + 1
    self.projection_dim=projection_dim

    self.projection=tf.keras.layers.Dense(projection_dim)

    self.clsToken = self.add_weight(
            name="clsToken",
            shape=(1, 1, projection_dim),
            initializer="HeNormal",  # Experiment with different initializers
            trainable=True,
        )
    self.positionalEmbedding = tf.keras.layers.Embedding(self.num_of_patches , projection_dim)


  def call(self, inputs):
    patches = tf.image.extract_patches(inputs , sizes=[1 , self.size , self.size , 1], strides=[1 , self.size , self.size , 1], rates=[1 ,1 ,1 ,1], padding="VALID",)

    patches=tf.reshape(patches, (tf.shape(inputs)[0], -1, self.size * self.size *3))
    patches= self.projection(patches)

    # repeat cls token length of batch size
    clsToken = tf.repeat(self.clsToken , tf.shape(inputs)[0] , 0)
    patches = tf.concat((clsToken, patches) , axis=1)
    # create position number for each patch
    positions = tf.range(0 , self.num_of_patches , 1)[tf.newaxis , ...]
    positionalEmbedding = self.positionalEmbedding(positions)

    #print(positionalEmbedding)
    patches= patches + positionalEmbedding
    return patches

In [None]:
class TransformerLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, heads, mlp_rate, dropout_rate=0.1):
        super().__init__()

        self.layernorm_1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.mha = tf.keras.layers.MultiHeadAttention(heads, d_model//heads, dropout=dropout_rate)

        self.layernorm_2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm_3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.mlp = tf.keras.Sequential([
            tf.keras.layers.Dense(d_model * mlp_rate, activation="gelu"),  # Changed "Relu" to "relu"
            tf.keras.layers.Dropout(dropout_rate),
            tf.keras.layers.Dense(d_model),
            tf.keras.layers.Dropout(dropout_rate)
        ])

    def call(self, inputs, training=True):
        out_1 = self.layernorm_1(inputs)
        out_1 = self.mha(out_1, out_1, training=training)
        out_1 = inputs + out_1

        out_2 = self.layernorm_2(out_1)
        out_2 = self.mlp(out_2, training=training)
        out_2 = out_1 + out_2

        out_3 = self.layernorm_3(out_2)
        return out_3


In [None]:
class TransformerEncoder(tf.keras.layers.Layer):
  def __init__(self , d_model , heads , mlp_rate , num_layers=1 , dropout_rate=0.2):
    super().__init__()

    self.encoders = [TransformerLayer(d_model , heads , mlp_rate , dropout_rate) for _ in range(num_layers)]

  def call(self , inputs , training=True):
    x =inputs

    for layer in self.encoders:
      x = layer(x , training=training)

    return x

In [None]:
class ViT(tf.keras.Model):
  def __init__(self , num_classes , patch_size , num_of_patches , d_model , heads , num_layers , mlp_rate , dropout_rate=0.2):
    super().__init__()

    self.patchEmbedding = PatchEmbedding(patch_size , num_of_patches , d_model)
    self.encoder = TransformerEncoder(d_model , heads , mlp_rate  ,num_layers , dropout_rate)
    self.encoderNormalization = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.prediction = tf.keras.Sequential([
                                           tf.keras.layers.Dropout(0.2),
                                           tf.keras.layers.Dense(mlp_rate * d_model , activation="gelu"),
                                           tf.keras.layers.Dropout(0.2),
                                           tf.keras.layers.Dense(num_classes, activation="softmax")


  ])
  def call(self , inputs ,  training=True):
    patches = self.patchEmbedding(inputs) #patches will contain patch + positional information
    encoderResult = self.encoder(patches, training=training)

    clsResult = encoderResult[: , 0 , :]
    clsResult = self.encoderNormalization(clsResult)
    prediction = self.prediction(clsResult,
                                 training=training)
    return prediction

In [None]:
def convert_to_dataset(data,batch_size,shuffle = False,augment = False):
  dataset1 = data.map(lambda x,y:(preprocessingModel(x)[0],y),num_parallel_calls=tf.data.AUTOTUNE)

  if shuffle:
    dataset1 = dataset1.shuffle(len(dataset1))

  dataset1 = dataset1.batch(batch_size,drop_remainder = True)
  if augment:
    dataset1 = dataset1.map(lambda x,y:(augmentedModel(x,training = True),y),num_parallel_calls=tf.data.AUTOTUNE)

  return dataset1.prefetch(tf.data.AUTOTUNE)

In [None]:
vitClassifier = ViT(
                5,
                6,
                (72//6)**2,
                64,
                5,
                4,
                3,
                0.1
)

vitClassifier.compile(
  optimizer="adam",
  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
  metrics=[
      tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
      tf.keras.metrics.SparseTopKCategoricalAccuracy(10, name="top-10-accuracy"),
  ],
)

In [None]:
history = vitClassifier.fit(train,batch_size=200,validation_data=validation,epochs=10)