In [None]:
!pip install wandb --upgrade --q

In [None]:
import os
import shutil
import multiprocessing as mproc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')

from sklearn.model_selection import KFold
from kaggle_secrets import UserSecretsClient
import wandb

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,random_split,Dataset
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torchvision.models as models
from PIL import Image

import pytorch_lightning as pl
from pytorch_lightning.metrics.functional import accuracy
from pytorch_lightning.callbacks import Callback
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint

In [None]:
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("api-key")
!wandb login $secret_value_0

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
wandb.init(project="wandb-lightning-PetFinder", name="PetFinder-VGG")

In [None]:
train_data = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test_data = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
sample_submission = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')

In [None]:
train_dir = '../input/petfinder-pawpularity-score/train'
test_dir = '../input/petfinder-pawpularity-score/test'

In [None]:
train_data['Path'] = train_data['Id'].apply(lambda x:train_dir+'/'+x+'.jpg')
test_data['Path'] = test_data['Id'].apply(lambda x:test_dir+'/'+x+'.jpg')

In [None]:
kfold = KFold(n_splits=3, shuffle=True,random_state=12345)

train_data['fold'] = -1

for fold, (train_ids, valid_ids) in enumerate(kfold.split(train_data)):
    train_data.loc[valid_ids,'fold'] = fold

In [None]:
train_data['fold'].value_counts()

In [None]:
train = train_data[train_data['fold']!=0][['Path','Pawpularity']].reset_index(drop=True)
test = test_data[['Path']]
valid = train_data[train_data['fold']==0][['Path','Pawpularity']].reset_index(drop=True)

In [None]:
test.iloc[0]['Path']

In [None]:
class MarkeDataset(Dataset):
    def __init__(self, data,is_test, transform=transforms.ToTensor()):
        self.data = data
        self.transform = transform
        self.device = device
        self.is_test = is_test
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if self.is_test:
            img_name = self.data.iloc[idx, 0]
            image = Image.open(img_name)

            if self.transform:
                image = self.transform(image).to(self.device)

            return image
        else:
            img_name = self.data.iloc[idx, 0]
            image = Image.open(img_name)
            y_label = torch.tensor(self.data.iloc[idx, 1]).to(self.device)

            if self.transform:
                image = self.transform(image).to(self.device)

            return image, y_label.to(torch.float32)

In [None]:
class PetFinderDM(pl.LightningDataModule):
    dataset_cls = MarkeDataset

    def __init__(
        self,
        batch_size: int = 32,
        num_workers: int = None,
    ):
        super().__init__()
        self.batch_size = batch_size
        self.num_workers = 0
        self.train_dataset = None
        self.valid_dataset = None
        
        self.augmentation = transforms.Compose([
              transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
              transforms.RandomRotation(degrees=15),
              transforms.RandomHorizontalFlip(),
              transforms.CenterCrop(size=224),
              transforms.ToTensor(),
              transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
        ])
        self.transform = transforms.Compose([
              transforms.Resize(size=256),
              transforms.CenterCrop(size=224),
              transforms.ToTensor(),
              transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
        ])


    def prepare_data(self):
        pass

    def setup(self, stage=None):
        self.train_dataset = self.dataset_cls(train,is_test=False,transform=self.augmentation)
        print(f"training dataset: {len(self.train_dataset)}")
        self.valid_dataset = self.dataset_cls(valid,is_test=False,transform=self.transform)
        print(f"validation dataset: {len(self.valid_dataset)}")
        self.test_dataset = self.dataset_cls(test,is_test=True,transform=self.transform)
        print(f"test dataset: {len(self.test_dataset)}")


    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=True,
        )

    def val_dataloader(self):
        return DataLoader(
            self.valid_dataset,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
        )

    def test_dataloader(self):
        pass
#         return DataLoader(
#             self.test_dataset,
#             batch_size=self.batch_size,
#             num_workers=self.num_workers,
#             shuffle=False,
#         )

In [None]:
dm = PetFinderDM()
dm.setup()

In [None]:
sample_images,scores = next(iter(dm.train_dataloader()))

In [None]:
fig,ax = plt.subplots(2,5,figsize=(15,5))
for i,axi in enumerate(ax.flatten()):
    img, lab = sample_images[i],scores[i]
    axi.imshow(img.cpu().numpy().transpose((1, 2, 0)))
    axi.text(x = 112,y =2,s =f'Score :{str(lab.item())}',ha='center',backgroundcolor='y')
    axi.axis('off')
plt.tight_layout()
plt.show()

In [None]:
class theModel(pl.LightningModule):
    def __init__(self, input_shape, output_units, learning_rate=2e-3):
        super().__init__()
        
        self.save_hyperparameters()
        self.learning_rate = learning_rate
        self.dim = input_shape
        self.num_classes = output_units
        self.batch_size = 32
        
        self.feature_extractor = models.vgg11_bn(pretrained=True)
        self.feature_extractor.eval()

        for param in self.feature_extractor.parameters():
            param.requires_grad = False
        
        n_sizes = self._get_conv_output(input_shape)

        self.regressor = nn.Linear(n_sizes, self.num_classes)

    def _get_conv_output(self, shape):
        batch_size = 1
        input = torch.autograd.Variable(torch.rand(batch_size, *shape))

        output_feat = self._forward_features(input) 
        n_size = output_feat.data.view(batch_size, -1).size(1)
        return n_size
        
    def _forward_features(self, x):
        x = self.feature_extractor(x)
        return x
    
    def forward(self, x):
        x = self._forward_features(x)
        x = x.view(x.size(0), -1)
        x = self.regressor(x)

        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y = y.reshape(x.shape[0],-1)
        preds = self(x)
        loss = torch.sqrt(F.mse_loss(preds, y))
        
        self.log('train_loss', loss, on_step=True, on_epoch=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y = y.reshape(x.shape[0],-1)
        preds = self(x)
        loss = torch.sqrt(F.mse_loss(preds, y))
        self.log('val_loss', loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, self.trainer.max_epochs, 0)
        return [optimizer], [scheduler]

In [None]:
net = theModel((3,224,224),1)
print(net)

In [None]:
logger = pl.loggers.CSVLogger(save_dir='logs/', name='Densenet')

In [None]:
wandb_logger = WandbLogger(project='wandb-lightning-PetFinder', job_type='train')

In [None]:
trainer = pl.Trainer(
    gpus=1,
    logger=wandb_logger,
    max_epochs=5,
    accumulate_grad_batches=8,
    val_check_interval=0.25,
    progress_bar_refresh_rate=1,
    weights_summary='top',
)

trainer.fit(model=net, datamodule=dm)
wandb.finish()

In [None]:
test_data = DataLoader(MarkeDataset(test,is_test=True,transform=dm.transform))

In [None]:
trainer.save_checkpoint("baseline-petfinder.ckpt")
loaded_model = theModel.load_from_checkpoint(checkpoint_path="baseline-petfinder.ckpt")

In [None]:
loaded_model.eval()
loaded_model.cuda()

In [None]:
valid_predictions = []
for X,y in dm.valid_dataset:
    valid_predictions.extend(loaded_model(X.unsqueeze(0)))

In [None]:
test_predictions = []
for X in test_data:
    test_predictions.extend(loaded_model(X))

In [None]:
test_predictions