In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from random import sample
import cv2

import imgaug.augmenters as ia
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

# Deep Learning
import torch
from torchvision import transforms
from torch.autograd import Variable
from torch.nn import * #Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD
from torchvision import models
from torch.utils.data import Dataset, DataLoader

from transformers import ViTFeatureExtractor, ViTModel
from PIL import Image
import requests

In [None]:
dat = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
y = dat["Pawpularity"].to_numpy()
y = y/100

In [None]:
p = "../input/petfinder-pawpularity-score/train/0007de18844b0dbbb5e1f607da0606e0.jpg"
img = cv2.cvtColor(cv2.resize(cv2.imread(p),(244,244)),cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.show()

images = [img for i in range(8)]
seq = ia.Sequential([ia.Sometimes(0.75,ia.Sequential([
    ia.Affine(rotate=(-60, 59)),
    ia.flip.Fliplr(0.5),
    ia.flip.Flipud(0.5),
    ia.Crop(percent=(0, 0.2))])),
])

images_aug = seq(images=images)

print("Augmented:")
plt.figure(figsize=(100,20))
plt.imshow(np.hstack(images_aug),)
plt.show()

In [None]:
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)

feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
vit_model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')

In [None]:
for param in vit_model.parameters():
#     print(param.requires_grad)
    param.requires_grad = False

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

In [None]:
class model(Module):
    def __init__(self,vit_model,feature_extractor):
        super(model, self).__init__()
        self.vit_model = vit_model
        self.feature_extractor = feature_extractor
        self.network = Sequential(
            Linear(768, 32),
            ReLU(),
            Linear(32, 1),
        )

    def forward(self, x):
#         inputs = self.feature_extractor(x,return_tensors="pt")
#         outputs = self.vit_model(**inputs)
        outputs = self.vit_model(x)
        x = outputs.pooler_output
        logits = self.network(x)
        return logits
    
model = model(vit_model,feature_extractor).to(device)
# print(model)

In [None]:
criterion = MSELoss()#CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.0001)#, momentum=0.9)

In [None]:
class Data:
    def __init__(self,path,ids,x=224,y=224,labels=None,aug_arg=False):
        self.x = x
        self.y = y
        self.labels = labels
#         self.image_list = [t.split(".")[0] for t in os.listdir(path)] 
        self.image_list = ids
        self.path = path
        self.batch = 0
        self.aug = aug_arg
        
    def load_batch(self,batch_size=1,shuffle=False):
        if shuffle:
            b = self.batch
            batch_list = self.image_list[b*batch_size:(b+1)*batch_size]
            self.batch = b+1
            if self.batch>len(self.image_list)//batch_size:
                self.batch=0
        else:
            batch_list = sample(self.image_list,batch_size)
        images = np.array([cv2.cvtColor(cv2.resize(cv2.imread(self.path+image+".jpg"),(self.x,self.y)),cv2.COLOR_BGR2RGB) for image in batch_list])
        if self.aug:
            images = seq(images=images)
        labels = self.labels.loc[batch_list].to_numpy()/100

        return images,labels
    
    def loader(self,batch_size=1,shuffle=False):
        while True:
            x,y = self.load_batch(batch_size,shuffle)
            yield x,y

ids = dat["Id"].to_list()
train_ids = ids[:int(len(ids)*0.8)]
val_ids = ids[int(len(ids)*0.8):int(len(ids)*0.9)]
test_ids = ids[int(len(ids)*0.9):]

path = "../input/petfinder-pawpularity-score/train/"
labels = dat.set_index("Id")["Pawpularity"]
c = Data(path,labels=labels,ids=ids)#,aug_arg=True)
c_train = Data(path,labels=labels,ids=train_ids)#,aug_arg=True)
c_val = Data(path,labels=labels,ids=val_ids)
c_test = Data(path,labels=labels,ids=test_ids)

In [None]:
# l = np.array([t,t,t,t])
# l = l.transpose((0,3,1,2))
# # model.forward(l)
# k = torch.Tensor(l)#transforms.ToTensor()(l[0])
# # feature_extractor(images=k,do_resize=False, size = None)
# o = vit_model(k).pooler_output
# o.shape

In [None]:
epochs = 10
steps = 60
batch_size = 256
for epoch in range(epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    for i in range(steps):
        # get the inputs; data is a list of [inputs, labels]
        x,y = c_train.load_batch(batch_size)
        x = (torch.Tensor(x.transpose((0,3,1,2)))-128)/255
        y = torch.Tensor(y)
        x, y = x.cuda(), y.cuda() # add this line
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        del x,y, outputs
        # print statistics
        running_loss += loss.item()
        if i % 10 == 9:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.6f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

In [None]:
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)

In [None]:
images = np.array([cv2.cvtColor(cv2.resize(cv2.imread(path+image+".jpg"),(224,224)),cv2.COLOR_BGR2RGB) for image in test_ids])
y = labels.loc[test_ids]
images = (torch.Tensor(images.transpose((0,3,1,2)))-128)/255
images = images.cuda()
y_pred = model.forward(images)#.to_device()

In [None]:
plt.hist(np.abs(y.to_numpy().reshape((-1,1))/100-y_pred.cpu().detach().numpy()))
print("METRIC : ",np.sqrt(np.sum((y.to_numpy().reshape((-1,1))-y_pred.cpu().detach().numpy()*100)**2)/(y_pred.shape[0])))

In [None]:
del images
plt.figure()
plt.hist(y)
plt.figure()
plt.hist(y_pred.cpu().detach().numpy())

In [None]:
net = Net()
net.load_state_dict(torch.load(PATH))

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))