In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Unzipping Data

In [None]:
!mkdir -p /kaggle/temp/
!unzip /kaggle/input/dogs-vs-cats/test1.zip -d /kaggle/temp/
!unzip /kaggle/input/dogs-vs-cats/train.zip -d /kaggle/temp/

In [None]:
train_data_path = "/kaggle/temp/train/"
test_data_path = "/kaggle/temp/test1/"
sample_submission_path = "/kaggle/input/dogs-vs-cats/sampleSubmission.csv"

# Loading Data

In [None]:
sample_submission = pd.read_csv(sample_submission_path)
sample_submission.head()

## Creating Dataset

In [None]:
import torch
import cv2
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as T
from typing import Dict, Callable, Optional, Any, Tuple
import matplotlib.pyplot as plt
from PIL import Image
import matplotlib.pyplot as plt
import multiprocessing

In [None]:
class ResizeImage(object):
    def __init__(self, image:Image, ratio:float, pad:Tuple[float, float]):
        self.image = image
        self.ratio = ratio
        self.pad = pad

In [None]:
def load_image(path:str, new_shape: Tuple[int, int]) -> Tuple[Any, ResizeImage]:
    # new_shape tuple [Height, Width]
    img = Image.open(path)
    w0, h0 = img.size # Pillow give us [Width, Height]
    
    # Scale ratio (new / old) -> min(h_new/h_old, w_new/w_old)
    # This secure to resize the large dimension first
    r = min(new_shape[0]/h0, new_shape[1]/w0)
    
    # new un_pad dimensions keeping aspec ratio
    new_unpad = int(round(h0 * r)), int(round(w0 * r))
    # Compute padding
    dw, dh = new_shape[1] - new_unpad[1], new_shape[0] - new_unpad[0]
    dw /= 2; dh /= 2;
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    
    # First Stage Preprocessing Transforms
    inteli_resize = T.Compose([
        T.Resize(new_unpad),
        T.Pad((left, top, right, bottom), fill=(0,0,0))
    ])
    
    return (img, ResizeImage(inteli_resize(img), r, (dw, dh)))           

In [None]:
class CatsVsDogs(Dataset):
    def __init__(self, path: str, train: bool,
                transforms: Optional[Callable] = None,
                new_shape: Optional[Tuple[int, int]] = (224, 224)) -> None:
        
        self.img_paths = os.listdir(path)
        self.name_classes = {'cat': 0,
                                'dog': 1}
        self.new_shape = new_shape
        self.transforms = transforms
        
        if train:
            self.classes = [self.name_classes[img_path.split(".")[0]] for img_path in self.img_paths]
        else:
            # In this case classes will contains images ids
            self.classes = [int(img_path.split(".")[0]) for img_path in self.img_paths]
        
        self.img_paths = [os.path.join(path, img_path) for img_path in self.img_paths]
            
    
    def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
        _, resize_image = load_image(self.img_paths[index], self.new_shape)
        if self.transforms is not None:
            tensor_img = self.transforms(resize_image.image)
        else:
            transforms = T.Compose([
                T.ToTensor(),
                T.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
            ])
            tensor_img = transforms(resize_image.image)
        return tensor_img, torch.tensor(self.classes[index], dtype=torch.float32)
        
    def __len__(self) -> int:
        return len(self.img_paths)

In [None]:
class CatsVsDogsDataModule(pl.LightningDataModule):
    def __init__(self, train_dir: str, test_dir: str):
        super().__init__()
        self.train_dir = train_dir
        self.test_dir = test_dir
        self.transform = T.Compose([
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
        ])
        # self.dims is returned when you call dm.size()
        # Setting default dims here because we know them.
        # Could optionally be assigned dynamically in dm.setup()
        self.dims = (3, 224, 224)

    def prepare_data(self):
        # download
        pass

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == 'fit' or stage is None:
            dataset_full = CatsVsDogs(self.train_dir, train=True,
                                    transforms=self.transform, new_shape=self.dims[1:])
            self.train_dataset, self.val_dataset = random_split(dataset_full, [int(len(dataset_full)*0.8),
                                                                               len(dataset_full) - int(len(dataset_full)*0.8)])

        # Assign test dataset for use in dataloader(s)
        if stage == 'test' or stage is None:
            self.test_dataset = CatsVsDogs(self.test_dir, train=False,
                                           transforms=self.transform, new_shape=self.dims[1:])

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=16, shuffle=True,
                         num_workers = multiprocessing.cpu_count())

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=16, shuffle=False,
                         num_workers = multiprocessing.cpu_count())

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=16, shuffle=False,
                         num_workers = multiprocessing.cpu_count())

# Define Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import models

In [None]:
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.vgg16 = models.vgg16(pretrained=True)
        self.vgg16.classifier[-1] = nn.Linear(in_features = 4096, out_features = 1)
        
    def forward(self, x):
        x = self.vgg16(x)
        return x.view(-1)

In [None]:
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

class CatVsDogLitModel(pl.LightningModule):
    def __init__(self, model: nn.Module, lr:int):
        super(CatVsDogLitModel, self).__init__()
        self.model = model
        self.lr = lr
        
    def forward(self, x):
        y = self.model(x)
        return y
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        bs, _, _, _ = x.size()
        y_hat = self(x)
        loss = F.binary_cross_entropy_with_logits(y_hat, y)
        self.log('train_loss', loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        bs, _, _, _ = x.size()
        y_hat = self(x)
        loss = F.binary_cross_entropy_with_logits(y_hat, y)
        self.log('val_loss', loss, prog_bar=True)
    
    
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True)
        return {
           'optimizer': optimizer,
           'lr_scheduler': scheduler, # Changed scheduler to lr_scheduler
           'monitor': 'val_loss'
       }

In [None]:
data = CatsVsDogsDataModule(train_dir=train_data_path, test_dir=test_data_path)

ae_model = CatVsDogLitModel(VGG16(), 1e-3)
lr_monitor = LearningRateMonitor(logging_interval='step')

trainer = pl.Trainer(gpus=1, max_epochs=25, amp_level='O2', precision=16, callbacks=[lr_monitor,
                                                                                     EarlyStopping(monitor='val_loss')])

lr_finder = trainer.tuner.lr_find(ae_model, data)
lr_finder.results

fig = lr_finder.plot(suggest=True)
new_lr = lr_finder.suggestion()
ae_model.lr = new_lr

In [None]:
trainer.fit(ae_model, data)

# Get Metrics results with validation dataset

In [None]:
from tqdm.notebook import tqdm

pbar = tqdm(total=len(data.val_dataset),
           desc="Metric")

ae_model.eval()
ae_model.cuda()
 
real_ = []
pred_ = []
for x, y in data.val_dataset:
    with torch.no_grad():
        y_hat = ae_model(x.unsqueeze(0).cuda()).sigmoid()
        pred = (y_hat > 0.5).to(dtype=torch.float32)
    
    real_.append(y.item())
    pred_.append(pred.item())
    
    pbar.update(1)
    
real_ = np.array(real_)
pred_ = np.array(pred_)

accuracy = sum(real_ == pred_)/len(real_) * 100
accuracy

In [None]:
import random
indexes = np.where(real_ != pred_)[0]
choices = random.choices(range(len(indexes)), k=10)

mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
unormalize = T.Normalize(mean=-mean/std,
            std=1.0/std)

print(choices)
data.setup(stage="test")
plt.figure(figsize=(45, 45), tight_layout=True)
for j, i in enumerate(choices):
    plt.subplot(1, len(choices), j + 1)
    x, y = data.val_dataset[indexes[i]]
    plt.title(f"class {list(data.test_dataset.name_classes.keys())[int(y.item())]}," + \
             f" pred {list(data.test_dataset.name_classes.keys())[int(pred_[indexes[i]])]}")
    plt.imshow(np.transpose(unormalize(x), (1, 2, 0)))
plt.show()

# Get results


In [None]:
pbar = tqdm(total=len(data.test_dataset),
           desc="Test Set Predict")

ae_model.eval()
ae_model.cuda()
 
pred_ = []
idx_ = []
for x, y in data.test_dataset:
    with torch.no_grad():
        y_hat = ae_model(x.unsqueeze(0).cuda()).sigmoid()
        pred = (y_hat > 0.5).to(dtype=torch.float32)
    
    idx_.append(y.item())
    pred_.append(pred.item())
    
    pbar.update(1)
    
pred_ = np.array(pred_)
idx_ = np.array(idx_)

# Save model

In [None]:
ae_model.eval()
ae_model.cpu()
ae_model.to_torchscript("/kaggle/working/model.torch.pt", example_inputs=torch.randn(1, 3, 224, 224))

# Save submissions

In [None]:
pred_ = pred_.astype(np.int64); idx_ = idx_.astype(np.int64)

In [None]:
submission = sample_submission.copy()
submission.head()

In [None]:
submission.set_index("id", inplace=True, drop=True)
submission.loc[idx_, "label"] = pred_

In [None]:
submission.reset_index(inplace=True)
submission.head()

In [None]:
submission.to_csv("/kaggle/working/submission.csv", index=False)