In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!unzip -q /kaggle/input/dogs-vs-cats/test1.zip -d .
!unzip -q /kaggle/input/dogs-vs-cats/train.zip -d .

In [None]:
!ls 

In [None]:
import torch
from pathlib import Path
import os
import torchvision
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np

In [None]:
class customDataset:
    def __init__(self, paths, labels, transforms = None):
        self.paths = paths
        self.labels = torch.tensor(labels)
        self.transforms = transforms
        
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):
        image = torchvision.io.read_image(self.paths[idx])
        label = self.labels[idx]
        if self.transforms:
            image = self.transforms(image)
        return (
            image,
            label
        )

In [None]:
p = Path('train')
pList = list(p.glob('**/*.jpg'))
pList = list(map(str, pList))

In [None]:
labels = list(map(lambda x : os.path.split(x)[1].split('.')[0], pList))


In [None]:
import pandas as pd
labels = pd.Series(labels)
labels = labels.map({'dog': 1, 'cat': 0}).to_numpy(int)

classes = ['cat', 'dog']

In [None]:
trans = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
])
cds = customDataset(pList, labels, trans)

train_ds, test_ds = torch.utils.data.random_split(cds, (20000, 5000))

In [None]:
plt.figure(figsize = (15, 15))
for i, (image, label) in enumerate(train_ds):
    if (i == 9):
        break
    plt.subplot(3, 3, i + 1)
    plt.imshow(image.permute(1,2,0))
    plt.title(classes[label])



In [None]:
dataloader = {
    'train': DataLoader(train_ds, batch_size = 1, num_workers= 2, shuffle = True),
    'val': DataLoader(test_ds, batch_size = 1, num_workers = 2, shuffle = True)
}

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
from torch import nn
import torch.nn.functional as F 

class Neural(nn.Module):
    def __init__(self):
        super().__init__()
        self.seq = nn.Sequential(
            ## image size = 224x224
            nn.Conv2d(3, 16, kernel_size = 3, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size = 3, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32,32, kernel_size = 3, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32,32, kernel_size = 3, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32,32, kernel_size = 3, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(7*7*32, 112),
            nn.Linear(112, 2),
        )
    
    def forward(self, x):
        out = self.seq(x)
        return out
    
    

In [None]:
torch.cuda.empty_cache()

In [None]:
model = Neural()
model = model.to(device)

In [None]:
def fit(model, loss_func, dataloader, optimizer, epochs = 10):
    for epoch in range(epochs):
        running_loss = 0
        running_acc = 0
        print(f"epoch {epoch}/{epochs}", '-'*20)
        for batch, (img, label) in enumerate(dataloader['train']):
            img, label = img.to(device), label.to(device)
            pred = model(img)
            loss = loss_func(pred, label)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_acc += (pred.argmax() == label).sum()
            running_loss += loss.item()
            
        print(f"loss at {epoch}: {running_loss/20000}, Acc:{running_acc/20000}")
        acc=0
        loss=0
        for (img, label) in dataloader['val']:
            img, label = img.to(device), label.to(device)
            pred = model(img)
            loss += loss_func(pred, label)
            acc += (pred.argmax() == label).sum()
        print(f"Val: loss - {loss/5000}, acc - {acc/5000}")

In [None]:
from torch import optim
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [None]:
fit(model, loss_func, dataloader, optimizer)

In [None]:
dataloader.shape

In [None]:
 print(torch.cuda.memory_summary())

In [None]:
!nvidia-smi