In [None]:
import numpy as np 
import pandas as pd 


In [None]:
import glob
import torch
import torchvision
import matplotlib.pyplot as plt
import PIL

In [None]:
train_meta = pd.read_csv('/kaggle/input/petfinder-pawpularity-score/train.csv')

In [None]:
train_meta.head()

In [None]:
img = np.random.choice(glob.glob('/kaggle/input/*/train/*'))

In [None]:
image = PIL.Image.open(img)

In [None]:
image

In [None]:
import os

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [None]:
class PawDataset(Dataset):
    def __init__(self, img_dir, label_file, transform=None):
        super().__init__()
        self.labels = pd.read_csv(label_file)
        self.img_dir = img_dir
        self.transform = transform
        
        
    def __len__(self):
        return len(self.labels)
    
    
    def __getitem__(self, ind):
        img_name, label = self.labels.loc[ind, ['Id', 'Pawpularity']]
        label = label[None].astype(np.float32)
        img = PIL.Image.open(os.path.join(self.img_dir, img_name + '.jpg'))
            
        image = np.array(img, dtype=np.float32) / 255
        image = np.transpose(image, [2, 0, 1])
        
        if self.transform:
            image = self.transform(img)
            
        return dict(
            sample=image,
            label=label
        )
        
        
        
        

In [None]:
from torchvision.transforms import Compose, Resize, ToTensor, Normalize

In [None]:
transforms = torchvision.transforms.Compose(
    [
            Resize((224, 224)), 
            ToTensor(), 
            Normalize((0.5, 0.5, 0.5), (1, 1, 1)), 
        ]
)

In [None]:
label_file = '/kaggle/input/petfinder-pawpularity-score/train.csv'
img_dir = '/kaggle/input/petfinder-pawpularity-score/train'

In [None]:
purr_dataset_not_transformed = PawDataset(img_dir, label_file, transform=None)
purr_dataset = PawDataset(img_dir, label_file, transform=transforms)

In [None]:
plt.imshow(purr_dataset_not_transformed[10]['sample'].transpose(1, 2, 0))

In [None]:
plt.imshow(purr_dataset[10]['sample'].numpy().transpose(1, 2, 0))

In [None]:
purr_dataset_not_transformed[0]['sample'].shape

In [None]:
purr_dataset[0]['sample'].shape

In [None]:
len_dataset = len(purr_dataset)
len_dataset

In [None]:
train_set, test_set = torch.utils.data.random_split(
    purr_dataset, 
    [int(0.8 * len_dataset), len_dataset- int(0.8 * len_dataset)]
)

In [None]:
train_dataloader = DataLoader(train_set, batch_size=64, shuffle=True, pin_memory=True)
test_dataloader = DataLoader(test_set, batch_size=64, shuffle=False, pin_memory=True)

In [None]:
train_set[0]['sample'].shape

In [None]:
example = next(iter(test_dataloader))
example['sample'].shape

### На это забейте пока

In [None]:
from torchvision.models import resnet18

model = resnet18(pretrained=True)

In [None]:
for param in model.parameters():
    param.requires_grad = False

In [None]:
model.fc = torch.nn.Linear(512, 1)

In [None]:
from tqdm.notebook import tqdm

In [None]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.01)

In [None]:
def train_one_epoch(model, optimizer, criterion, train_loader):
    pbar = tqdm(train_loader)
    model.train()
    for sample in pbar:
        imgs = sample['sample']
        labels = sample['label']
        prediction = model(imgs)
        loss = criterion(prediction, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        pbar.set_description(f'Loss: {loss.data.numpy()}')

def test(model, optimizer, criterion, test_loader):
    predictions = []
    labels_list = []
    pbar = tqdm(test_loader)
    model.eval()
    with torch.no_grad():
        loss = 0
        for sample in pbar:
            imgs = sample['sample']
            labels = sample['label']
            prediction = model(imgs)
            loss += ((prediction - labels) ** 2).sum()
            labels = labels.numpy()
            prediction = prediction.numpy()
            predictions.append(prediction)
            labels_list.append(labels)
        predictions = np.concatenate(predictions)
        labels_list = np.concatenate(labels_list)
        print(f'Loss {(loss / len(test_loader) / test_loader.batch_size) ** 0.5}')


def train(model, optimizer, criterion, train_loader, test_loader, n_epochs):
    for epoch in range(n_epochs):
        train_one_epoch(model, optimizer, criterion, train_loader)

        test(model, optimizer, criterion, test_loader)


In [None]:
#train(model, optimizer, criterion, train_dataloader, test_dataloader, 1)