In [None]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install ../input/3rd-party/timm-0.4.12-py3-none-any.whl

In [None]:
import timm, torch, random, os
import pandas as pd
import numpy as np
from PIL import Image
import torch.nn as nn
import torchvision.transforms as transforms

from torch.utils.data import DataLoader, Dataset

In [None]:
class CFG:
    apex=False
    debug=False
    print_freq=10
    num_workers=4
    size=512
    model_name='tf_efficientnet_b8'
    scheduler='CosineAnnealingLR' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    epochs=3
    T_max=3 # CosineAnnealingLR
    T_0=3 # CosineAnnealingWarmRestarts
    lr=1e-4
    min_lr=1e-6
    batch_size=24
    weight_decay=1e-6
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    target_size=1
    target_col='Pawpularity'
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]
    train=True
    grad_cam=True

In [None]:
test=pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

In [None]:
def get_image_file_path(image_id):
    return f'../input/petfinder-pawpularity-score/test/{image_id}.jpg'
test['file_path'] = test['Id'].apply(get_image_file_path)

feature_cols = [col for col in test.columns if col not in ['Id', 'Pawpularity', 'file_path']]

In [None]:
class test_data(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        self.file_name = df['file_path'].values
        self.meta = df[feature_cols].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        file_path = self.file_name[index]
        image = Image.open(file_path).convert('RGB')
        meta = self.meta[index, :]

        if self.transform:
            image = self.transform(image)

        return image, meta

get_transforms = transforms.Compose([
    transforms.Resize((CFG.size, CFG.size)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )
])

In [None]:
class custom_model(nn.Module):
    def __init__(self, cfg, pretrained=False):
        super().__init__()
        self.cfg = cfg
        self.model = timm.create_model('tf_efficientnet_b8', pretrained=pretrained)
        self.n_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.dropout = nn.Dropout(p=0.3)

        self.fc1 = nn.Sequential(
            nn.Linear(self.n_features+12, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3)
        )

        self.fc2 = nn.Sequential(
            nn.Linear(1024, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3)
        )

        self.fc3 = nn.Linear(256, self.cfg.target_size)

    def image_feature(self, image):
        feature = self.model(image)
        return feature

    def forward(self, image, meta):
        feature = self.image_feature(image)
        feature = self.dropout(feature)
        ensembled_feature = torch.cat([feature, meta], dim=1)  # features = (bs, embedding_size + 12)

        ensembled_feature = self.fc1(ensembled_feature)
        ensembled_feature = self.fc2(ensembled_feature)

        output = self.fc3(ensembled_feature)
        return output

In [None]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

In [None]:
def infer(model, device, test_loader):
    model.to(device)
    model.eval()
    y = []

    with torch.no_grad():
        for images, meta in test_loader:
            images = images.to(device)
            meta = meta.to(device)
            y_preds = model(images, meta)
            y.append(y_preds)
    return y

In [None]:
# load the model
model_path = '../input/trained-model/tf_efficientnet_b8_fold4_best.pth'
model_infer = custom_model(CFG, pretrained=False)
model_infer.load_state_dict(torch.load(model_path))
model_infer.eval()

#
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

testset = test_data(test, transform=get_transforms)
testloader = DataLoader(testset, batch_size=1, shuffle=False, num_workers=0)

ypred_test = infer(model_infer, device, testloader)

submission_transfer_nn = pd.DataFrame()
submission_transfer_nn['Id'] = test['Id']

y_sub = []
for i in range(len(ypred_test)):
    y_sub.append(ypred_test[i].view(-1).to('cpu').item())

submission_transfer_nn['Pawpularity'] = y_sub

submission_transfer_nn

In [None]:
submission_transfer_nn.to_csv("submission.csv", index = False)