In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import sys
sys.path.append('../input/timmmaster/')
sys.path.append('../input/tez-lib/')
!pip install GPUtil

In [None]:
import tez
import albumentations
import pandas as pd
import cv2
import numpy as np
import timm
import torch.nn as nn
from sklearn import metrics
import torch
from tez.callbacks import EarlyStopping
from tqdm import tqdm
import gc
from GPUtil import showUtilization as gpu_usage
import random

In [None]:
def seed_everything(seed=999):
    random.seed(seed)
    os.environ['PYTHONHASHSEED']=str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic=True
    
seed_everything()

In [None]:
class args:
    batch_size=16
    image_size=224
    epochs=20
    mixup_alpha=0.2
    cutmix_alpha=20

In [None]:
def cutmixandmixup(x,z,y,prob):
    #20% Mixup & 30% Cutmix Augmentation
    if prob<0.2:
        if args.mixup_alpha>0:
            lam=np.random.beta(args.mixup_alpha,args.mixup_alpha)
        else:
            lam=1
        batch_size=x.size()[0]
        index=torch.randperm(batch_size).cuda()
        mixed_x=lam*x+(1-lam)*x[index,:]
        mixed_z=lam*z+(1-lam)*z[index,:]
        y_a,y_b=y,y[index]
        return mixed_x,mixed_z,y_a,y_b,lam
    
    
    else:
        image=None
        dim=x.size()[1]
        batch_size=x.size()[0]
        index=torch.randperm(batch_size).cuda()
        lam=torch.tensor([])
        y_a,y_b=y,y[index]
        
        for i in range(len(batch_size)):
            width=np.int_(np.round(np.random.beta(args.cutmix_alpha,args.cutmix_alpha)*dim))
            height=np.int_(np.round(np.random.beta(args.cutmix_alpha,args.cutmix_alpha)*dim))
            xx=np.random.randint(0,dim)
            yy=np.random.randint(0,dim)
            ya=max(0,y-height//2)
            yb=min(dim,y+height//2)
            xa=max(0,x-width//2)
            xb=min(dim,x+width//2)
            area=((yb-ya)*(xb-xa)/(dim*dim))
            lam=torch.cat([lam,area])
            one=x[i,:,ya:yb,0:xa]
            two=x[index[i],:]
            two=two[:,ya:yb,xa:xb]
            three=x[i,:,ya:yb,xb:dim]
            img=torch.cat([one,two,three],2)
            img=torch.cat(x[i,:,0:ya,:],img,x[i,:,yb:dim,:],1)
            img=img.unsqueeze(0)
            if image is None:
                image=img
            else:
                image=torch.cat([image,img])
        lam=lam.unsqueeze(1)
        mixed_z=(1-lam)*z+lam*z[index]
        
        return image,mixed_z,y_a,y_b,lam
        
    
        

def mixup_loss(loss_fn,pred,y_a,y_b,lam):
    return lam*loss_fn(pred,y_a)+(1-lam)*loss_fn(pred,y_b)

def cutmix_loss(loss_fn,pred,y_a,y_b,lam):
    loss=[]
    for i in range(len(pred)):
        tmp_loss=lam[i]*loss_fn(pred[i],y_b[i])+(1-lam[i])*loss_fn(pred[i],y_a[i])
        loss.append(tmp_loss)
    return sum(loss)

In [None]:
class PawpularDataset:
    def __init__(self,image_paths,dense_features,targets,augmentations):
        self.image_paths=image_paths
        self.dense_features=dense_features
        self.targets=targets
        self.augmentations=augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    
    def __getitem__(self,item):
        image=cv2.imread(self.image_paths[item])
        image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        
        
        if self.augmentations is not None:
            augmented=self.augmentations(image=image)
            image=augmented["image"]
            
        image=np.transpose(image,(2,0,1)).astype(np.float32)
        
        
        features=self.dense_features[item,:]
        targets=self.targets[item]/100.
        
        return{
            "image":torch.tensor(image,dtype=torch.float),
            "features":torch.tensor(features,dtype=torch.float),
            "targets":torch.tensor(targets,dtype=torch.float)
        }
    

In [None]:

class PawpularModel(tez.Model):
    
    def __init__(self):
        super().__init__()
    
        self.model=timm.create_model("beit_large_patch16_224",pretrained=True,in_chans=3)
        self.model.head=nn.Linear(self.model.head.in_features,128)
        self.dropout=nn.Dropout(0.2)
        self.dense1=nn.Linear(128+12,64)
        self.relu=nn.ReLU()
        self.dense2=nn.Linear(64,1)
        self.step_scheduler_after='epoch'
        
    def monitor_metrics(self,outputs,targets):
        outputs=outputs.cpu().detach().numpy()
        targets=targets.cpu().detach().numpy()
        rmse=metrics.mean_squared_error(targets,outputs,squared=False)
        return {'rmse': rmse}
    
    def fetch_scheduler(self):
        sch=torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(self.optimizer,T_0=10,T_mult=1,eta_min=1e-6,last_epoch=-1)
        return sch
    
    
    def fetch_optimizer(self):
        opt=torch.optim.Adam(self.parameters(),lr=1e-4)
        return opt
    
    def forward(self,image,features,targets=None):
        if((targets is not None) and (self._train_state==True)):
            prob=torch.rand([1])
            
            if prob<0.7:
                image,features,target_a,target_b,lam=cutmixandmixup(image,features,targets.view(-1,1),prob)
                image=image.to(device='cuda',dtype=torch.float)
                features=features.to(device='cuda',dtype=torch.float)
                target_a=target_a.to(device='cuda',dtype=torch.float)
                target_b=target_b.to(device='cuda',dtype=torch.float)
            else:
                image=image.to(device="cuda",dtype=torch.float)
                features=features.to(device="cuda",dtype=torch.float)
                targets=targets.to(device="cuda",dtype=torch.float)
        x=self.model(image)
        x=self.dropout(x)
        x=torch.cat([x,features],dim=1)
        x=self.dense1(x)
        x=self.relu(x)
        x=self.dense2(x)
        
        if targets is not None:
            loss_fn=nn.BCEWithLogitsLoss()
            if self._train_state==True:
                if prob<0.2:
                    loss=mixup_loss(loss_fn,x,target_a,target_b,lam)
                elif 0.2<prob<0.5:
                    loss=cutmix_loss(loss_fn,x,target_a,target_b,lam)
                else:
                    loss=loss_fn(x,targets.view(-1,1))
            else:
                loss=loss_fn(x,targets.view(-1,1))
            metrics=self.monitor_metrics(torch.sigmoid(x)*100,targets*100)
            return x,loss,metrics
        return x,0,{}
                
    

In [None]:
train_aug=albumentations.Compose([

albumentations.RandomResizedCrop(args.image_size,args.image_size,p=1),
albumentations.HueSaturationValue(hue_shift_limit=0.2,sat_shift_limit=0.2,val_shift_limit=0.2,p=0.5),
albumentations.RandomBrightnessContrast(brightness_limit=(-0.1,0.1),contrast_limit=(-0.1,0.1),p=0.5),
albumentations.HorizontalFlip(p=0.5),
albumentations.VerticalFlip(p=0.5),
albumentations.GaussNoise(var_limit=5.0 / 255.0, p=0.50),
albumentations.Rotate(limit=180,p=0.7),
albumentations.ShiftScaleRotate(shift_limit=0.1,scale_limit=0.1,rotate_limit=45,p=0.5),
albumentations.CoarseDropout(max_holes=15,max_width=10,max_height=10,min_holes=6,p=0.5),
albumentations.Normalize(
    mean=[0.485,0.456,0.406],
    std=[0.229,0.224,0.225],
    max_pixel_value=255.0,
    p=1
)

],p=1)

valid_aug=albumentations.Compose([ albumentations.RandomResizedCrop(args.image_size,args.image_size,p=1), albumentations.Normalize( mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225], max_pixel_value=255.0, p=1 ),albumentations.HorizontalFlip(p=0.5) ],p=1)


In [None]:
dense_features = [
    'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
    'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
]


In [None]:
for fold_ in range(5,10):
    
    
    print(f'--- Fold {fold_} ---')
    df=pd.read_csv('../input/creating-folds/train_10folds.csv')
    df_train=df[df.kfold!=fold_].reset_index(drop=True)
    df_valid=df[df.kfold==fold_].reset_index(drop=True)
    train_img_paths=[f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_train["Id"].values]
    valid_img_paths=[f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_valid["Id"].values]
    
    train_dataset=PawpularDataset(
        image_paths=train_img_paths,
        dense_features=df_train[dense_features].values,
        targets=df_train['Pawpularity'].values,
        augmentations=train_aug
    )

    valid_dataset=PawpularDataset(
        image_paths=valid_img_paths,
        dense_features=df_valid[dense_features].values,
        targets=df_valid['Pawpularity'].values,
        augmentations=valid_aug
    )


    model=PawpularModel()

    es=EarlyStopping(
        monitor="valid_rmse",
        model_path=f"model_f{fold_}.bin",
        patience=5,
        mode="min",
        save_weights_only=True
    )

    model.fit(
        train_dataset,
        valid_dataset=valid_dataset,
        train_bs=args.batch_size,
        valid_bs=2*args.batch_size,
        device="cuda",
        epochs=args.epochs,
        callbacks=[es],
        fp16=True,
        n_jobs=-1
    )
    
    del train_dataset,valid_dataset,model,df_train,df_valid,train_img_paths,valid_img_paths
    gc.collect()
    torch.cuda.empty_cache()