<a href="https://colab.research.google.com/github/sneakatyou/ViT-Tensorflow-2.0/blob/main/VIT_PRELIM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import math

import six
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard

from tensorflow.keras import datasets

import logging
import numpy as np

from fastprogress import master_bar, progress_bar
import matplotlib.pyplot as plt


In [None]:
def get_angles(pos, i, d_model):
  angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
  return pos * angle_rates
  
def positional_encoding(position, d_model):
  angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                          np.arange(d_model)[np.newaxis, :],
                          d_model)
  angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
  angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
  pos_encoding = angle_rads[np.newaxis, ...]
  return tf.cast(pos_encoding, dtype=tf.float32)


In [None]:
n, d = 2048, 512
pos_encoding = positional_encoding(n, d)
print(pos_encoding.shape)
pos_encoding = pos_encoding[0]
pos_encoding = tf.reshape(pos_encoding, (n, d//2, 2))
pos_encoding = tf.transpose(pos_encoding, (2,1,0))
pos_encoding = tf.reshape(pos_encoding, (d, n))
plt.pcolormesh(pos_encoding, cmap='RdBu')
plt.ylabel('Depth')
plt.xlabel('Position')
plt.colorbar()
plt.show()


#MHA

In [None]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self,num_heads,d_model):
        super(MultiHeadAttention,self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        assert d_model%num_heads==0 
        self.multihead_d_model = tf.cast(d_model/num_heads,tf.int32)
        self.dense1 = tf.keras.layers.Dense(d_model)
        self.dense2 = tf.keras.layers.Dense(d_model)
        self.dense3 = tf.keras.layers.Dense(d_model)
        self.dense4 = tf.keras.layers.Dense(d_model)
    
    def SelfAttention(self,q,k,v,d_model):
        x = tf.matmul(q,k,transpose_b = True)
        x = tf.divide(x,tf.math.sqrt(tf.cast(d_model,tf.float32))) #scale q*k(T)
        attention_weights = tf.keras.activations.softmax(x,axis = -1) #calculating attention weights

        x = tf.matmul(attention_weights,v)
        return x,attention_weights
    
    def create_multi_heads(self,x,batch_size):
        x = tf.reshape(x,(batch_size,-1,self.num_heads,self.multihead_d_model))
        x = tf.transpose(x,perm = [0,2,1,3])
        return x
    
    def call(self,x):
        batch_size = x.get_shape()[0]
        q = self.dense1(x)
        k = self.dense2(x)
        v = self.dense3(x)

        q = self.create_multi_heads(q,batch_size)
        k = self.create_multi_heads(k,batch_size)
        v = self.create_multi_heads(v,batch_size)


        x,attention_weights = self.SelfAttention(q,k,v,self.d_model)
        x = tf.transpose(x, perm = [0,2,1,3])
        x = tf.reshape(x,(batch_size,-1,self.d_model))
        x = self.dense4(x)
        return x,attention_weights


In [None]:
temp_mha = MultiHeadAttention(d_model=512, num_heads=8)
y = tf.random.uniform((1, 60, 512))  # (batch_size, encoder_sequence, d_model)
out, attn = temp_mha(y)
out.shape, attn.shape

#OTHERS

In [None]:
class Add(tf.keras.layers.Layer):
    def __init__(self):
        super(Add,self).__init__()

    def call(self,x,x1):
        return tf.add(x,x1)


In [None]:
class MLP(tf.keras.layers.Layer):
    def __init__(self,hidden_shape1,hidden_shape2,dropout_rate = 0.1):
        super(MLP,self).__init__()
        self.dense1 = tf.keras.layers.Dense(hidden_shape1,bias_initializer=tf.keras.initializers.RandomNormal(stddev=1e-6))
        self.dense2 = tf.keras.layers.Dense(hidden_shape2,bias_initializer=tf.keras.initializers.RandomNormal(stddev=1e-6))
        self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
        self.dropout2 = tf.keras.layers.Dropout(dropout_rate)

    def call(self,x,training =True):
        x = self.dense1(x)
        x = tf.keras.activations.gelu(x)
        x = self.dropout1(x,training = training)
        x = self.dense2(x)
        x = self.dropout2(x,training = training)
        return x



In [None]:
class ImgPatches(tf.keras.layers.Layer):
    def __init__(self,d_model,patch_size,):
        super(ImgPatches,self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(d_model,patch_size,patch_size,padding = 'valid')
        self.patch_size = patch_size
        self.d_model = d_model
    def call(self,x):
        batch_size = x.get_shape()[0]
        x = self.conv1(x)
        x = tf.reshape(x,(x.get_shape()[0],tf.multiply(x.get_shape()[1],x.get_shape()[2]),x.get_shape()[3]))
        return x

In [None]:
x = tf.random.uniform((1,224,224,3))
img = ImgPatches(d_model = 5,patch_size=32)
y = img(x)
y

In [None]:
class AddCls(tf.keras.layers.Layer):
    def __init__(self,d_model):
        super(AddCls,self).__init__()
        self.d_model = d_model
        self.cls = tf.Variable(tf.zeros((1,1,d_model),dtype=tf.float32),trainable=True)
    
    def call(self,x):
        batch_size = x.get_shape()[0]
        print(batch_size)
        self.cls  = tf.tile(self.cls,[batch_size,1,1])
        x = tf.concat([x,self.cls],axis=1)
        return x

In [None]:
x = tf.random.uniform((32,49,5))
img = AddCls(5)
y = img(x)
y.shape

#ENCODER

In [None]:
class EncoderBlock(tf.keras.layers.Layer):
    def __init__(self,num_heads,d_model,hidden_layer_shape,dropout_rate = 0.1):
        super(EncoderBlock,self).__init__()
        self.norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.mlp = MLP(hidden_shape1=hidden_layer_shape,hidden_shape2=d_model,dropout_rate=dropout_rate)
        #self.Add = Add()
        self.mha = MultiHeadAttention(num_heads=num_heads,d_model = d_model)
        self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
    
    def call(self,inp,training = True):
        x = self.norm1(inp)
        x,_ = self.mha(x,training=training)
        x = self.dropout1(x,training = training)
        x = x + inp

        y = self.norm2(x)
        y = self.mlp(y)
        y = x+y

        return y

In [None]:
temp_mha = EncoderBlock(d_model=512, num_heads=8,hidden_layer_shape=2048)
y = tf.random.uniform((1, 60, 512))  # (batch_size, encoder_sequence, d_model)
out = temp_mha(y,training=True)
out.shape

In [None]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self,num_heads,d_model,hidden_layer_shape,num_layers,dropout_rate = 0.1):
        super(Encoder,self).__init__()
        self.encoder_layers = [EncoderBlock(num_heads=num_heads,d_model = d_model,hidden_layer_shape=hidden_layer_shape,dropout_rate=dropout_rate) for _ in range(num_layers)]
        self.num_layers = num_layers
        self.norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    def call(self,x,training = True):
        for layer in range(self.num_layers):
            x = self.encoder_layers[layer](x,training=training)
        x = self.norm1(x)
        return x

In [None]:
temp_mha = Encoder(d_model=512, num_heads=8,hidden_layer_shape=2048,num_layers = 8)
y = tf.random.uniform((1, 60, 512))  # (batch_size, encoder_sequence, d_model)
out = temp_mha(y,training=True)
out.shape

#VIT Model

In [None]:
class ViT(tf.keras.Model):
    def __init__(self,num_heads,d_model,hidden_layer_shape,num_layers,patch_size,num_classes,dropout_rate = 0.1,training = True):
        super(ViT,self).__init__()
        self.encoder = Encoder(num_heads=num_heads,d_model=d_model,hidden_layer_shape=hidden_layer_shape,num_layers=num_layers,dropout_rate=dropout_rate)
        self.img_patches = ImgPatches(d_model=d_model,patch_size=patch_size)
        self.addcls = AddCls(d_model=d_model)
        self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
        self.dense1 = tf.keras.layers.Dense(units = num_classes,kernel_initializer = tf.keras.initializers.zeros)
    
    def call(self,input,training = True):
        x = self.img_patches(input)
        x = self.addcls(x)
        emb = positional_encoding(x.get_shape()[1],x.get_shape()[2])
        x = tf.add(x,emb)
        x = self.dropout1(x,training = training)
        x = self.encoder(x,training = training)
        print(x.shape)
        x = x[:,0]
        x = self.dense1(x)
        return x


In [None]:
temp_mha = ViT(num_heads = 8,d_model = 512,hidden_layer_shape = 2048,num_layers = 12,patch_size = 32,num_classes = 10,dropout_rate = 0.1,training = True)
y = tf.random.uniform((32, 224,224, 3))  # (batch_size, encoder_sequence, d_model)
out = temp_mha(y,training=True)
out.shape

32
(32, 50, 512)


TensorShape([32, 10])

#Config

In [None]:
class Configs():
    def __init__(self,num_classes, num_heads,num_layers,d_model,hidden_layer_shape1,hidden_layer_shape2,dropout_rate,training,batch_size,patch_size):
        self.num_classes = num_classes
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.d_model = d_model
        self.hidden_layer_shape1 = hidden_layer_shape1
        self.hidden_layer_shape2 = hidden_layer_shape2
        self.dropout_rate = dropout_rate
        self.training = training
        self.batch_size = batch_size
        self.patch_size = patch_size

In [None]:
configs = Configs(num_classes = 10, 
                  num_heads = 12,
                  num_layers = 12,
                  d_model = 712,
                  hidden_layer_shape1 = 2048,
                  hidden_layer_shape2 = 1024,
                  dropout_rate = 0.1,
                  training = True)