In [None]:
!pip install gradsflow -U --pre -q

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T


from gradsflow import Model, AutoDataset
from gradsflow.data import image_dataset_from_directory
from gradsflow.data import random_split_dataset


import torch
from torch import nn
from timm import create_model

In [None]:
FEATURES = ('Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
       'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur')

BS = 64
IMAGE_SIZE = 224

In [None]:
class RegressionDataset(Dataset):
    def __init__(self, df, resize=(IMAGE_SIZE, IMAGE_SIZE), transforms=None):
        self.df = df
        self.test = "Pawpularity" not in df.columns
        if transforms:
            self.transforms = transforms
        else:
            self.transforms = T.Compose([T.Resize(resize), T.RandomHorizontalFlip(), T.ToTensor()])
        
        
    def __len__(self):
        return len(self.df)
        
    def __getitem__(self, idx):
        df = self.df
        features_data = df.loc[:, FEATURES].loc[idx].values.astype(np.float32)
        features_data = torch.as_tensor(features_data).float()
        data = df.iloc[idx]
        image_id = data.Id
        if self.test:
            target = 0.0
            folder = "test"
            
        else:
            folder = "train"
            target = data.Pawpularity/100.0
        
        image = Image.open(f"/kaggle/input/petfinder-pawpularity-score/{folder}/{image_id}.jpg")
        image = self.transforms(image)
        return {
            "inputs": {"images": image, "features": features_data},
            "targets": torch.as_tensor(target).float(),
        }

In [None]:
df = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/train.csv")
ds = RegressionDataset(df)
train_ds, val_ds = random_split_dataset(ds, 0.9)

num_workers = 4
train_dl = DataLoader(train_ds, batch_size=BS, shuffle=True, pin_memory=True, num_workers=num_workers)
val_dl = DataLoader(val_ds, batch_size=BS, shuffle=False, pin_memory=True, num_workers=num_workers)

In [None]:
class PawpularModel(nn.Module):
    def __init__(self, backbone:str="efficientnet_b0"):
        super().__init__()
        self.backbone = create_model(backbone, pretrained=True, num_classes = 0)
        num_features = self.backbone.num_features
        
        self.dropout = nn.Dropout(0.3)
        self.dense1 = nn.Linear(12, 32)
        concat_outputs = 32+num_features
        self.bn1 = nn.BatchNorm1d(concat_outputs)
        self.dense2 = nn.Linear(concat_outputs, 1024)
        self.bn2 = nn.BatchNorm1d(1024)
        self.dense3 = nn.Linear(1024, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.output = nn.Linear(128, 1)
        
    
    def forward(self, batch):
        images, features = batch['images'], batch['features']
        
        x1 = self.backbone(images)
        x2 = self.dense1(features)
        
        x = torch.cat([x1, x2], dim=1)
        x = nn.functional.leaky_relu(x)
        x = self.bn1(x)
        x = self.dropout(x)
        
        x = self.dense2(x)
        x = nn.functional.leaky_relu(x)
        x = self.bn2(x)
        x = self.dropout(x)
        
        x = self.dense3(x)
        x = nn.functional.leaky_relu(x)
        x = self.bn3(x)
        x = self.dropout(x)
        
        x = self.output(x)
        return x.view(len(x))

In [None]:
model = PawpularModel("efficientnet_b0")
gf_model = Model(model, accelerator_config={"fp16":True})
gf_model.compile("mseloss", "adam")
autodata = AutoDataset(train_dl, val_dl)

gf_model.INPUT_KEY = "inputs"
gf_model.OUTPUT_KEY = "targets"

In [None]:
gf_model.fit(autodata, max_epochs=10)

In [None]:
batch = next(iter(autodata.train_dataloader))
gf_model.eval()
jit_model = torch.jit.trace(gf_model.learner, batch["inputs"])
torch.jit.save(jit_model, "model.jit.pt")

In [None]:
# pd.DataFrame(gf_model.tracker.logs)

In [None]:
test_df = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/test.csv")
test_ds = RegressionDataset(test_df)
test_dl = gf_model.accelerator.prepare_data_loader(DataLoader(test_ds, batch_size=8, shuffle=False, pin_memory=True, num_workers=num_workers))

In [None]:
gf_model.eval()
predictions = []
for data in test_dl:
    preds = gf_model(data["inputs"])
    predictions.append(preds)

In [None]:
i = 0
results = []
for preds in predictions:
    preds = preds * 100
    for score in preds:
        results.append({"Id": test_ds.df.iloc[i].Id, "Pawpularity": score.item()})

In [None]:
pd.DataFrame(results).to_csv("submission.csv", index=False)

In [None]:
pd.DataFrame(results)