In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# !pip install --upgrade --force-reinstall kaggle
# !pip install wandb
# !pip install timm
# !pip install albumentations


In [None]:
!pip install timm --no-index --find-links=file:../input/petfinderpawpularitypackages/timm
!pip install albumentations --no-index --find-links=../input/petfinderpawpularitypackages/albumentations

In [None]:
# Run only to download dataset

# !rm -rf /content/drive/MyDrive/Kaggle\ Datasets/petfinder-pawpularity-score
# %cd /content
# !mkdir -p ~/.kaggle/ && cp /content/drive/MyDrive/Kaggle/kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json
# !mkdir -p /content/drive/MyDrive/Kaggle\ Datasets/petfinder-pawpularity-score
# %cd /content/drive/MyDrive/Kaggle Datasets/petfinder-pawpularity-score
# !kaggle competitions download -c petfinder-pawpularity-score
# !unzip petfinder-pawpularity-score.zip
# !rm petfinder-pawpularity-score.zip
# %cd /content

In [None]:
import torch
BASE_DIR = "/kaggle/input/petfinder-pawpularity-score/"
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
import pandas as pd
train_df = pd.read_csv(f'{BASE_DIR}train.csv')
train_df.head() 

In [None]:
test_df = pd.read_csv(f'{BASE_DIR}test.csv')
# test_df.head()

In [None]:
IMAGE_SIZE = 96
BATCH_SIZE = 32
EPOCHS= 10
DROPOUT = 0.3

In [None]:
import albumentations

train_aug = albumentations.Compose(
    [
        albumentations.Resize(IMAGE_SIZE, IMAGE_SIZE, p=1),
        albumentations.HueSaturationValue(
            hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5
        ),
        albumentations.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5
        ),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ]
)

valid_aug = albumentations.Compose(
    [
        albumentations.Resize(IMAGE_SIZE, IMAGE_SIZE, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ]
)

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from skimage import io, transform
import os
from PIL import Image
import cv2
import numpy as np

class PetDataset(Dataset):    
    def __init__(self, annotations_path, csv_name, transforms=None, nrows=len(train_df)):
        self.annotations_path = annotations_path
        self.csv_name = csv_name

        self.transforms = transforms
        self.img_labels = pd.read_csv(f'{annotations_path}/{csv_name}.csv', nrows= nrows)

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        row = self.img_labels.iloc[idx]
        img_path = os.path.join(self.annotations_path, f'{self.csv_name}/{row["Id"]}.jpg')
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transforms is not None:
            transforms = self.transforms(image=image)
            image = transforms["image"]

        image = np.transpose(image, (2, 0, 1)).astype(np.float32)

        image = torch.tensor(image, dtype=torch.float)
        if self.csv_name != "test":
            target = torch.tensor(row["Pawpularity"], dtype=torch.float)
            return (image, target)
        else:
            return image
    
train_dataset = PetDataset(BASE_DIR, "train", train_aug)
train_dl = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)


In [None]:
# for (img, target) in train_dl:
#     print(img.shape, target.shape)
#     break

# for (img) in test_dl:
#     print(img.shape)
#     break

In [None]:
# # img, target = next(iter(train_dl))
# # img, target
# import matplotlib.pyplot as plt

# (train_features, train_labels) = next(iter(train_dl))
# print(f"Feature batch shape: {train_features.size()}")
# print(f"Labels batch shape: {train_labels.size()}")
# img_source = train_features[0]
# # img_source = img_source.reshape(3, 96, 96)

# print(img_source.shape)
# plt.imshow(img_source.permute(1, 2, 0)  )


In [None]:
import timm
class Model(nn.Module):

    def __init__(self):
        super().__init__()

        self.model = timm.create_model("tf_efficientnet_b0_ns", pretrained=False, in_chans=3)
        self.model.load_state_dict(torch.load("../input/timm-pretrained-efficientnet/efficientnet/tf_efficientnet_b0_ns-c0e6a31c.pth", map_location=device))
        self.model.classifier = nn.Linear(self.model.classifier.in_features, 64)
        self.dropout = nn.Dropout(DROPOUT)
        self.dense = nn.Linear(64, 1)

    def forward(self, image, targets=None):
        x = self.model(image)
        x = self.dropout(x)
        x = self.dense(x)

        return x, targets

In [None]:
model = Model()
model = model.to(device)
# model

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

import torch.optim as optim
from torchvision.transforms import transforms
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import math
import time
from sklearn.metrics import r2_score
from termcolor import cprint
import warnings
import numpy as np
from sklearn.model_selection import train_test_split
warnings.filterwarnings('ignore')
np.random.seed(42)

# validation_size= 0.2
# train_dataset = PetDataset(BASE_DIR, "train", train_aug, nrows=BATCH_SIZE)
# print(len(train_dataset))
# num_train = len(train_dataset)
# indices = list(range(num_train))
# split = int(np.floor(validation_size * num_train))
# np.random.shuffle(indices)

# train_idx, valid_idx = indices[split:], indices[:split]
# train_sampler = SubsetRandomSampler(train_idx)
# valid_sampler = SubsetRandomSampler(valid_idx)

# train_dl = DataLoader(train_dataset, shuffle=False, batch_size = BATCH_SIZE, sampler=train_sampler)
# val_dl = DataLoader(train_dataset, shuffle=False, batch_size = BATCH_SIZE, sampler=valid_sampler)

# EPOCHS = 1
criterion = nn.MSELoss()
optm = optim.Adam(model.parameters(), lr = 1e-4)

train_step_loss, val_step_loss = [], []
train_loss, val_loss = [], []
val_best_loss = np.inf

for epoch in range(EPOCHS):
    start_time = time.time()
    print(f"Epoch {epoch + 1} : ")
    epoch_loss = 0.0
    model.train()
    for (img, target) in train_dl:
        optm.zero_grad()
        img = img.float().to(device)
        target = target.float().to(device)
        output, _ = model(img)
        loss = criterion(output, target)
        epoch_loss += loss.item()
        train_step_loss.append(loss.item())
#         print(f"Loss: {'%.4f'%(loss.item())}")
        loss.backward()
        optm.step()

# train_loss.append(epoch_loss)
# model.eval()
# val_ep_loss = 0.0
# with torch.no_grad():
#     for (img, target) in val_dl:
#         img = img.float().to(device)
#         target = target.float().to(device)
#         output, _ = model(img, target)
#         loss = criterion(output, target)
#         val_step_loss.append(loss.item())
#         val_ep_loss += loss.item()


# print(f"validation data --> loss : {'%.4f'%(val_ep_loss)}")
# val_loss.append(val_ep_loss)
 

In [None]:
#test dataset
test_dataset = PetDataset(BASE_DIR, "test", valid_aug)
test_dl = DataLoader(test_dataset, shuffle=False, batch_size=1)

model.eval()
preds = []

with torch.no_grad():
    for (idx, img) in enumerate(test_dl):
        img = img.float().to(device)
        output, _ = model(img)
        preds.extend(output.ravel().tolist())


In [None]:
test_df = pd.read_csv(f'{BASE_DIR}test.csv')
test_df['Pawpularity'] = preds
test_df  = test_df[["Id", "Pawpularity"]]
test_df.to_csv("submission.csv", index=False)

In [None]:
# test_df

In [None]:
# pd.read_csv("submission.csv")