## prepare training and validation data

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow.keras.layers as L
from tensorflow.keras.losses import MeanSquaredError
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
import cv2
import os

seed = 1
base_dir='/kaggle/input/petfinder-pawpularity-score/'
image_size=224

df = pd.read_csv(os.path.join(base_dir,"train.csv"))
df['filename'] = df.apply(lambda row: row['Id']+'.jpg',axis=1)
df['y']=df.apply(lambda row:row["Pawpularity"]/100,axis=1)

def prep_fn(img):
    img = img.astype(np.float32) / 255.0
    return img

data_gen_args = dict(preprocessing_function=prep_fn,
                     width_shift_range=0.2,
                     height_shift_range=0.2,
                     zoom_range=0.1,
                     rotation_range=20,
                     horizontal_flip=True,
                     vertical_flip=False,
                     validation_split=0.1)

train_datagen = ImageDataGenerator(**data_gen_args)
val_datagen = ImageDataGenerator(preprocessing_function=prep_fn,validation_split=0.1)

train_generator = train_datagen.flow_from_dataframe(dataframe=df,
                                                    directory=base_dir+'train/',
                                                    x_col='filename',
                                                    y_col=["y"],
                                                    subset="training",
                                                    batch_size = 32,
                                                    seed=seed,
                                                    shuffle=True,
                                                    class_mode='raw',
                                                    target_size=(224,224))
val_generator = val_datagen.flow_from_dataframe(dataframe=df,
                                                directory=base_dir+'train/',
                                                x_col='filename',
                                                y_col=["y"],
                                                subset="validation",
                                                batch_size = 32,
                                                seed=seed,
                                                shuffle=True,
                                                class_mode='raw',
                                                target_size=(224,224))

## Prepare Test data

In [None]:
df_test = pd.read_csv(os.path.join(base_dir,"test.csv"))
df_test['filename'] = df_test.apply(lambda row: row['Id']+'.jpg',axis=1)
df_test['y']=0
test_datagen = ImageDataGenerator(preprocessing_function=prep_fn)
test_generator = test_datagen.flow_from_dataframe(dataframe=df_test,
                                                directory=base_dir+'test/',
                                                x_col='filename',
                                                y_col=['y'],
                                                batch_size = 32,
                                                shuffle=False,
                                                class_mode='raw',
                                                target_size=(224,224))

## Configuration

In [None]:
projection_dim = 128
conv_filters = [32,64, projection_dim]
num_patches = (image_size//2**(len(conv_filters))) **2
num_heads = 3

## Model

In [None]:
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = L.Dense(units, activation = tf.nn.gelu)(x)
        x = L.Dropout(dropout_rate)(x)
    return x

In [None]:
class CCTTokenizer(L.Layer):
    def __init__(self):
        super(CCTTokenizer, self).__init__()
        self.num_patches = num_patches
        self.projection_dim = projection_dim
        self.conv_model = keras.Sequential()
        for i in conv_filters:
            self.conv_model.add(
                L.Conv2D(i,(3,3),activation='relu', padding='same')
            )
            self.conv_model.add(
                L.MaxPool2D((2, 2), strides=(2, 2))
            )
        
        self.position_embedding = L.Embedding(
            input_dim = self.num_patches, output_dim = self.projection_dim
        )

    def call(self, images):
        outputs = self.conv_model(images)
        positions = tf.range(start = 0, limit = self.num_patches, delta = 1)
        #print(outputs.shape)
        reshaped = tf.reshape(
            outputs,
            (-1, tf.shape(outputs)[1] * tf.shape(outputs)[2], tf.shape(outputs)[-1]),
        )+ self.position_embedding(positions)
        #print(tf.shape(reshaped), self.position_embedding(positions).shape)
        return reshaped

In [None]:
def attention_block(inputs, key_dim, mlp_dim,dropout=0.1):
    x = L.LayerNormalization(epsilon = 1e-6)(inputs)
    attention_output = L.MultiHeadAttention(
        num_heads = num_heads, key_dim = key_dim, dropout = dropout
    )(x, x)
    if(inputs.shape[-1]==key_dim):
        x = L.Add()([inputs,attention_output])
    skip = x
    x = L.LayerNormalization(epsilon = 1e-6)(x)
    x = mlp(x, hidden_units = mlp_dim, dropout_rate = dropout)
    if(skip.shape[-1]==x.shape[-1]):
        x = L.Add()([skip,x])
    return x

def vision_transformer():
    inputs = L.Input(shape = (image_size, image_size, 3))

    #conv_features= convolution_block(inputs)

    # Encode patches.
    x = CCTTokenizer()(inputs)

    for i in range(4):
        x = attention_block(x, 128, [256,128], 0.05)


    # Create a [batch_size, projection_dim] tensor.
    x = L.LayerNormalization(epsilon = 1e-6)(x)

    x = x[:,0,:]
    #x = L.GlobalAveragePooling1D()(x) 

    x = L.Dense(64,activation='relu')(x)
    out = L.Dense(1,activation='sigmoid')(x)
    
    # Create the model.
    model = tf.keras.Model(inputs = inputs, outputs = out)
    
    return model

In [None]:
from keras import backend as K
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true))) 
model = vision_transformer()
model.compile(optimizer = Adam(learning_rate=0.001), 
              loss=root_mean_squared_error)

In [None]:
model.summary()

## Training

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=1)
r=model.fit(train_generator, validation_data=val_generator,
            validation_steps=28,steps_per_epoch=279,epochs=100,callbacks=[callback])

In [None]:
ypred=model.predict(test_generator)
submission=pd.read_csv(os.path.join(base_dir,"sample_submission.csv"))
submission["Pawpularity"]=ypred*100
submission.to_csv("submission.csv", index=False)
submission.head()