In [None]:
!pip install timm

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import pandas as pd
import numpy as np
import timm
from PIL import Image
import pytorch_lightning as pl
import os

In [None]:
from sklearn.model_selection import train_test_split
main_df = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
train_df, valid_df = train_test_split(main_df, test_size=0.1, random_state=42, shuffle=True)

In [None]:
train_df

In [None]:
valid_df

In [None]:
class Config:
    IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
    IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
    
    transform = transforms.Compose(
        [
            transforms.RandomCrop(500, 500),
            transforms.Resize(384),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.RandomRotation(45),
            transforms.ToTensor(),
            transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD),   
        ]
    )
    BATCH_SIZE = 8
    LR = 3e-4
    
config = Config()

In [None]:
class ImageFolder(Dataset):
    def __init__(self, base_dir, dataframe):
        self.basedir = base_dir
        self.img_id = dataframe['Id'].to_numpy()
        self.targets = torch.rand(self.img_id.shape)
        self.features = torch.from_numpy(dataframe.drop(['Id'], axis=1).to_numpy()).to(torch.float32)
        
    def __len__(self):
        return len(self.targets)
    
    def __getitem__(self, idx):
        image = config.transform(
            Image.open(
                os.path.join(self.basedir, self.img_id[idx]+'.jpg')
            ).convert('RGB')
        ).to(torch.float32)
        
        feature = self.features[idx]
        target = self.targets[idx]
        return image, feature, target

In [None]:
test_dataset = ImageFolder('../input/petfinder-pawpularity-score/test', pd.read_csv('../input/petfinder-pawpularity-score/test.csv'))

In [None]:
test_dataset[1]

In [None]:
test_loader = DataLoader(
    test_dataset,
    batch_size=20,
    shuffle=False,
    num_workers=2
)

In [None]:
timm.create_model('swin_large_patch4_window12_384', pretrained=True)

In [None]:
class EffNet(nn.Module):
    def __init__(self, model_name='swin_large_patch4_window12_384'):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=True)
        self.model.head = nn.Linear(self.model.head.in_features, 128)
        self.dropout = nn.Dropout(0.3)
        self.linear = nn.Linear(128+12, 1)
           
    def forward(self, img, feature):
        x = self.dropout(self.model(img))
        x = torch.cat([x, feature], dim=1)
        out = self.linear(x)
        return out

In [None]:
class LitRegressor(pl.LightningModule):
    def __init__(self, batch_size=config.BATCH_SIZE, lr=config.LR):
        super().__init__()
        self.batch_size = batch_size
        self.lr = lr
        self.model = EffNet('swin_large_patch4_window12_384')
        self.criterion = nn.MSELoss()
        
    def forward(self, x, feature):
        return self.model(x, feature)
    
    def training_step(self, batch, batch_idx):
        imgs, features, targets = batch
        preds = self(imgs, features).squeeze()
        rmse = torch.sqrt(self.criterion(preds, targets))
        self.log("Train RMSE", rmse, prog_bar=True)
        return rmse
    
    def validation_step(self, batch, batch_idx):
        imgs, features, targets = batch
        preds = self(imgs, features).squeeze()
        rmse = torch.sqrt(self.criterion(preds, targets))
        self.log("Valid RMSE", rmse, prog_bar=True)
        return rmse
    
    def configure_optimizers(self):
        return torch.optim.AdamW(self.model.parameters(), lr=self.lr)
    
    def train_dataloader(self):
        train_dataset = ImageFolder('../input/petfinder-pawpularity-score/train', train_df)
        train_loader = DataLoader(
            train_dataset,
            batch_size=self.batch_size,
            num_workers=2,
            shuffle=True,
            pin_memory=True
        )
        return train_loader
        
    def val_dataloader(self):
        val_dataset = ImageFolder('../input/petfinder-pawpularity-score/train', valid_df)
        val_loader = DataLoader(
            val_dataset,
            batch_size=self.batch_size,
            num_workers=2,
            shuffle=False,
            pin_memory=True
        )
        return val_loader

In [None]:
torch.cuda.empty_cache()
!nvidia-smi

In [None]:
model = LitRegressor()
early_stopping = pl.callbacks.EarlyStopping(monitor="Valid RMSE", patience=6, verbose=True)
trainer = pl.Trainer(gpus=1, precision=16, callbacks=[early_stopping], log_every_n_steps=35)

# Run learning rate finder
lr_finder = trainer.tuner.lr_find(model)

# Results can be found in
lr_finder.results

# Plot with
fig = lr_finder.plot(suggest=True)
fig.show()

# Pick point based on plot, or get suggestion
new_lr = lr_finder.suggestion()

# update hparams of the model
model.lr = new_lr

print(f"Initial LR: {model.lr:}")
# Fit model
trainer.fit(model)

In [None]:
# target: 18.04930

In [None]:
# trainer.save_checkpoint('./swin-model.ckpt')

In [None]:

with torch.no_grad():
    for img, feature, y in test_loader:
        preds = model(img, feature)
        print(preds.squeeze())

In [None]:
preds.shape

In [None]:
final_preds = preds.squeeze().tolist()

In [None]:
submission_df = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

In [None]:
submission_df['Pawpularity'] = final_preds
submission_df

In [None]:
submission_df.to_csv('./submission.csv')

In [None]:
# !zip -r './lightning_logs.zip' './lightning_logs'