<a href="https://colab.research.google.com/github/skillnerve/DataScience-Projects/blob/main/Kaggle_Pawpularity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [1]:
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
!pip install -U -q kaggle==1.5.8

[K     |████████████████████████████████| 59 kB 3.0 MB/s 
[?25h  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Building wheel for slugify (setup.py) ... [?25l[?25hdone


In [2]:
! kaggle competitions download -c petfinder-pawpularity-score

Downloading petfinder-pawpularity-score.zip to /content
100% 983M/983M [00:04<00:00, 276MB/s]
100% 983M/983M [00:04<00:00, 236MB/s]


In [3]:
!git clone https://github.com/rishigami/Swin-Transformer-TF

Cloning into 'Swin-Transformer-TF'...
remote: Enumerating objects: 39, done.[K
remote: Counting objects: 100% (39/39), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 39 (delta 13), reused 19 (delta 4), pack-reused 0[K
Unpacking objects: 100% (39/39), done.


In [4]:
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image  as img
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Model
import keras.backend as K
from tqdm import tqdm
import sys
sys.path.append('/content/Swin-Transformer-TF')
from swintransformer import SwinTransformer
from sklearn.model_selection import StratifiedKFold

# Data

In [None]:
!unzip /content/petfinder-pawpularity-score.zip

In [6]:
train_csv=pd.read_csv('/content/train.csv')
test_csv=pd.read_csv('/content/test.csv')

In [7]:
train_csv.head(2)

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity
0,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,63
1,0009c66b9439883ba2750fb825e1d7db,0,1,1,0,0,0,0,0,0,0,0,0,42


In [8]:
test_csv.head(2)

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur
0,4128bae22183829d2b5fea10effdb0c3,1,0,1,0,0,1,1,0,0,1,0,1
1,43a2262d7738e3d420d453815151079e,0,1,0,0,0,0,1,1,0,0,0,0


In [9]:
labels=dict(train_csv[['Id','Pawpularity']].values)

# Data Generators

In [10]:
class DataGenerator(tf.keras.utils.Sequence):

    def __init__(self, list_IDs, labels, batch_size=1, n_channels=1,part='train', shuffle=False):
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.part=part
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        
        list_IDs_temp      = [self.list_IDs[k] for k in indexes]
        
        x, y = self.__data_generation(list_IDs_temp)
        return x, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        x           = np.zeros((self.batch_size,224,224,3))
        y           = np.zeros((self.batch_size,1))
        for i, ID in enumerate(list_IDs_temp):
            x[i]                =Image.open('/content/'+str(self.part)+'/'+ str(ID)+'.jpg').resize((224,224))
            y[i]                =self.labels[ID]
            
        return x,y

In [11]:
class TestDataGenerator(tf.keras.utils.Sequence):

    def __init__(self, list_IDs, batch_size=1, n_channels=1,part='test', shuffle=False):
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.part=part
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        
        list_IDs_temp      = [self.list_IDs[k] for k in indexes]
        
        x= self.__data_generation(list_IDs_temp)
        return x

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        x           = np.zeros((self.batch_size,224,224,3))
        for i, ID in enumerate(list_IDs_temp):
            x[i]                =Image.open('/content/'+str(self.part)+'/'+ str(ID)+'.jpg').resize((224,224))

        return x

test_generator   = TestDataGenerator(test_csv['Id'].values)

# CallBack

In [12]:
class CustomCallBacks(tf.keras.callbacks.Callback):
    
    def __init__(self,n):
        self.n=n
    
    def on_train_begin(self, logs={}):
        self.loss     ={'loss':[]}
        self.val_loss ={'loss':[]}
        self.epoch    =None
        self.b        =0
        self.c        =0
        self.min_t    =None 
        
    def on_epoch_begin(self, epoch, logs={}):
        self.epoch=epoch
#         if self.epoch>=1:
#             lr=tf.keras.backend.get_value(self.model.optimizer.lr)
#             tf.keras.backend.set_value(self.model.optimizer.lr,lr*0.10)

    def on_epoch_end(self, epoch, logs={}):
        print('{} ended'.format(epoch))
        self.loss['loss'].append(logs.get('root_mean_squared_error'))
        self.val_loss['loss'].append(logs.get('val_root_mean_squared_error'))
        if self.epoch==0:
            self.model.save_weights('model'+str(self.n)+'.h5')
            self.min_t=self.val_loss['loss'][-1]
        if self.epoch>=1:
            if self.val_loss['loss'][-1]<=self.min_t and self.val_loss['loss'][-1]<=self.loss['loss'][-1]:
                self.min_t=self.val_loss['loss'][-1]
                self.model.save_weights('model'+str(self.n)+'.h5')
                print('*'*5)
                print("weight updated")
                print('*'*5)
            else:
                self.model.stop_training = True

# Model

In [13]:
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true)))

In [None]:
skf = StratifiedKFold(n_splits=10)
q=0
for train_index, test_index in skf.split(train_csv['Id'].values,train_csv['Pawpularity'].values):
    training_generator   = DataGenerator(train_csv['Id'].values[train_index],labels)
    validation_generator = DataGenerator(train_csv['Id'].values[test_index],labels)
    cb=CustomCallBacks(q)
    
    model = tf.keras.Sequential([
      tf.keras.layers.Lambda(lambda data: tf.keras.applications.imagenet_utils.preprocess_input(tf.cast(data, tf.float32), mode="torch"), input_shape=(224,224,3)),
      SwinTransformer('swin_large_224', include_top=False, pretrained=True),
      tf.keras.layers.Dropout(0.4),
      tf.keras.layers.Dense(1, activation='linear')
    ])
    model.layers[-3].trainable=False
    
    opt = tf.keras.optimizers.Adam(learning_rate=0.01)
    model.compile(loss=root_mean_squared_error,optimizer=opt,metrics=[root_mean_squared_error])
    history=model.fit_generator(generator=training_generator,validation_data=validation_generator,use_multiprocessing=True,workers=-1,epochs=1,callbacks=[cb])
    print('*'*100)
    print(q)
    print('*'*100)
    q+=1



****************************************************************************************************
0
****************************************************************************************************
****************************************************************************************************
1
****************************************************************************************************
****************************************************************************************************
2
****************************************************************************************************
****************************************************************************************************
3
****************************************************************************************************
****************************************************************************************************
4
*********************************************************************************

In [None]:
jfsofjdo