In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# In this notebook, I'm going to use pytorch
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import torchvision
from torchvision import transforms
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, ConcatDataset
import zipfile
from PIL import Image
from torch.utils.data import Subset
from sklearn.model_selection import train_test_split
from tqdm import tqdm

%matplotlib inline

In [None]:
train_dir = './train'
test_dir = './test1'

In [None]:
# reading input files
train_files = zipfile.ZipFile('/kaggle/input/dogs-vs-cats/train.zip')
train_files.extractall()
test_files = zipfile.ZipFile('/kaggle/input/dogs-vs-cats/test1.zip')
test_files.extractall()

In [None]:
train_files = os.listdir(train_dir)
test_files = os.listdir('./test1')

print(f"train data len {len(os.listdir(train_dir))}")
print(f"test data len {len(os.listdir('./test1'))}")

In [None]:
# Lets create a dataset class
class MyDogCatDataset(Dataset):
    def __init__(self, file_list, dir, mode='train', transform=None):
        self.file_list = file_list
        self.dir = dir
        self.mode = mode
        self.transform = transform
        
        if self.mode == 'train':
            if 'dog' in self.file_list[0]:
                self.label = 1
            else:
                self.label =0
                
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, idx):
        img = Image.open(os.path.join(self.dir, self.file_list[idx]))
        if self.transform:
            img = self.transform(img)
        if self.mode == 'train':
            img = img.numpy()
            return img.astype('float32'), self.label
        else:
            img = img.numpy()
            return img.astype('float32'), self.file_list[idx]

In [None]:
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.ColorJitter(),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.Resize(128),
    transforms.ToTensor()
])

In [None]:
cat_files = [tf for tf in train_files if 'cat' in tf]
print(len(cat_files))
dog_files = [tf for tf in train_files if 'dog' in tf]
print(len(dog_files))

In [None]:
# our dataset is balanced. let's load with our dataloader class
cats = MyDogCatDataset(cat_files, train_dir, transform=data_transform)
dogs = MyDogCatDataset(dog_files, train_dir, transform=data_transform)

In [None]:
test_data = MyDogCatDataset(test_files, test_dir, transform=data_transform)
test_data

In [None]:
# concatenate two datasets
catdogs = ConcatDataset([cats, dogs])
catdogs

In [None]:
# Load data in DataLoader class -> this is used to make the whole dataset into batch wise data
train_loader = DataLoader(catdogs, batch_size=100, shuffle=True, num_workers=4)
print(f"input data len: {len(catdogs)}")
print(len(train_loader))
train_loader


In [None]:
test_loader = DataLoader(test_data, batch_size=100, shuffle=True, num_workers=4)
test_loader

In [None]:
def image_convert(img):
    img = img.clone().cpu().numpy()
    img = img.transpose(1,2,0)
    std = [0.5,0.5,0.5]
    mean = [0.5,0.5,0.5]
    img = img*std + mean
    return img

In [None]:
def plot_10():
    iter_ = iter(test_loader)
    images,labels = next(iter_)
    an_ = {'0':'cat','1':'dog'}

    plt.figure(figsize=(20,10))
    for idx in range(10):
        plt.subplot(2,5,idx+1)
        img = image_convert(images[idx])
        label = labels[idx]
        plt.imshow(img)
        print(f'shape of the image is : {img.shape}')
        plt.title(an_[str(label.numpy())])
    plt.show()

In [None]:
plot_10()


In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

In [None]:
# now we are going to create a model with training dataset
class ConvolutionalNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 3, 1)
        self.conv2 = nn.Conv2d(6, 16, 3, 1)
        self.fc1 = nn.Linear(30*30*16, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84,10)

    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X, 2, 2)
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X, 2, 2)
        X = X.view(-1, 30*30*16)
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = self.fc3(X)
        return F.log_softmax(X, dim=1)



In [None]:
# now our CNN is ready
torch.manual_seed(42)
model = ConvolutionalNetwork()
if torch.cuda.is_available():
    model.cuda()
model

In [None]:
def count_parameters(model):
    params = [p.numel() for p in model.parameters() if p.requires_grad]
    for item in params:
        print(f'{item:>6}')
    print(f'______\n{sum(params):>6}')

In [None]:
count_parameters(model)

In [None]:
# let's define lossfunction and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
len(train_loader)

In [None]:
import time
start_time = time.time()

epochs = 3
train_losses = []
test_losses = []
train_correct = []
test_correct = []

for i in range(epochs):
    trn_corr = 0
    tst_corr = 0
    
    # Run the training batches
    for b, (X_train, y_train) in enumerate(train_loader):
        b+=1
        X_train, y_train = X_train.to(device), y_train.to(device)
        print(f" {b} shape of X_train : {X_train.shape}")
        # Apply the model
        y_pred = model(X_train)  # we don't flatten X-train here
        loss = criterion(y_pred, y_train)
 
        # Tally the number of correct predictions
        predicted = torch.max(y_pred.data, 1)[1]
        batch_corr = (predicted == y_train).sum()
        trn_corr += batch_corr
        
        # Update parameters
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Print interim results
        if b%100 == 0:
            print(f'epoch: {i:2}  batch: {b:4} [{10*b:6}/60000]  loss: {loss.item():10.8f}  \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
        
    train_losses.append(loss.item())
    train_correct.append(trn_corr.item())
        
    # Run the testing batches
    with torch.no_grad():
        for b, (X_test, y_test) in enumerate(test_loader):
            X_test, y_test = X_test.to(device), y_test.to(device)

            # Apply the model
            y_val = model(X_test)

            # Tally the number of correct predictions
            predicted = torch.max(y_val.data, 1)[1] 
            tst_corr += (predicted == y_test).sum()
            
    loss = criterion(y_val, y_test)
    test_losses.append(loss)
    test_correct.append(tst_corr)
        
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed            

In [None]:
# Plotting the Loss and Accuracy
plt.plot(train_losses, label='training loss')
plt.plot(test_losses, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();

In [None]:
plt.plot([t/600 for t in train_correct], label='training accuracy')
plt.plot([t/100 for t in test_correct], label='validation accuracy')
plt.title('Accuracy at the end of each epoch')
plt.legend();