In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats/test1.zip
/kaggle/input/dogs-vs-cats/train.zip
/kaggle/input/dogs-vs-cats/sampleSubmission.csv


In [2]:
import torch
import zipfile
import glob
import os
import shutil
import random
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import transforms
import torch.optim as optim
import torchvision.models as models
import torch.nn as nn

In [3]:
train_path = '/kaggle/working/train/train'

In [4]:
with zipfile.ZipFile('../input/dogs-vs-cats/train.zip', 'r') as zip_ref:
    zip_ref.extractall('/kaggle/working/train')

In [5]:
paths = glob.glob('/kaggle/working/train/train/*.jpg')


In [6]:
all_data = []
#category={'dog':1,'cat':0}

In [7]:
for path in paths:
    file_name = path.split('/')[-1]
    category=-1
    if 'dog' in file_name:
        data = {'file':path,'label':1}
    else:
        data = {'file':path,'label':0}
    all_data.append(data)

In [8]:
all_data[:10]

[{'file': '/kaggle/working/train/train/cat.12473.jpg', 'label': 0},
 {'file': '/kaggle/working/train/train/dog.11204.jpg', 'label': 1},
 {'file': '/kaggle/working/train/train/cat.2021.jpg', 'label': 0},
 {'file': '/kaggle/working/train/train/dog.5313.jpg', 'label': 1},
 {'file': '/kaggle/working/train/train/cat.10448.jpg', 'label': 0},
 {'file': '/kaggle/working/train/train/cat.12101.jpg', 'label': 0},
 {'file': '/kaggle/working/train/train/dog.4673.jpg', 'label': 1},
 {'file': '/kaggle/working/train/train/cat.9518.jpg', 'label': 0},
 {'file': '/kaggle/working/train/train/dog.5875.jpg', 'label': 1},
 {'file': '/kaggle/working/train/train/cat.3597.jpg', 'label': 0}]

In [9]:
len(all_data)

25000

In [10]:
train_df = pd.DataFrame(data=all_data) 

In [11]:
train_df.head()

Unnamed: 0,file,label
0,/kaggle/working/train/train/cat.12473.jpg,0
1,/kaggle/working/train/train/dog.11204.jpg,1
2,/kaggle/working/train/train/cat.2021.jpg,0
3,/kaggle/working/train/train/dog.5313.jpg,1
4,/kaggle/working/train/train/cat.10448.jpg,0


In [12]:
train_df, val_df = train_test_split(train_df, test_size=0.2)

In [13]:
len(val_df)

5000

In [14]:
class DogCatDataset(Dataset):
    def __init__(self,df,tfms=None):
        self.df = df
        if tfms is None:
            self.transforms= transforms.Compose([
                                         transforms.Resize((224,224)),
                                         transforms.RandomRotation(20),
                                         transforms.RandomVerticalFlip(p=0.1),
                                         transforms.ToTensor(),
                                         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
                                         ])
        else:
            self.transfroms = tfms
    def __len__(self):
        return len(self.df)
    def __getitem__(self,idx):
        label = self.df.iloc[idx][1]
        img = Image.open(self.df.iloc[idx][0])
        img = self.transforms(img)
        return img,label

In [15]:
train_ds = DogCatDataset(train_df)
val_ds = DogCatDataset(val_df)

In [16]:
train_dl = DataLoader(train_ds,batch_size=64,shuffle=True)
val_dl = DataLoader(val_ds,batch_size=64,shuffle=True)

In [17]:
model = models.resnet34()
            

In [18]:
class DogCatModel(nn.Module):
    def __init__(self,model):
        super(DogCatModel,self).__init__()
        self.model = model
        self.model.fc = nn.Linear(self.model.fc.in_features,2)
    def forward(self,image):
        return self.model(image)

In [19]:
dcModel = DogCatModel(model)

In [20]:
device = torch.device('cuda')

In [21]:
dcModel.to(device)

DogCatModel(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_r

In [22]:
optimizer = optim.Adam(params=model.parameters())

In [23]:
lossfunc = nn.CrossEntropyLoss()

In [None]:
epochs = 10
total=0
for epoch in range(epochs):
    accuracy = 0
    correct = 0
    val_correct = 0
    val_accuracy =0
    for index, (x,y) in enumerate(train_dl):
        x,y =x.to(device),y.to(device)
        prediction = dcModel(x)
        optimizer.zero_grad()
        loss = lossfunc(prediction,y)
        _, predicted = torch.max(prediction.data, 1)
        correct += (predicted == y).float().sum()
        loss.backward()
        optimizer.step()
    accuracy = 100 * correct / len(train_ds)
    with torch.no_grad():
        for index, (x,y) in enumerate(val_dl):
            x,y =x.to(device),y.to(device)
            val_prediction = dcModel(x)
            _, val_predicted = torch.max(val_prediction.data, 1)
            val_correct += (val_predicted == y).float().sum()
        val_accuracy = 100 * val_correct / len(val_ds)
    print("Epoch {} Train Accuracy = {} Validation Accuracy = {}".format(epoch,accuracy,val_accuracy))

Epoch 0 Train Accuracy = 60.34499740600586 Validation Accuracy = 61.03999710083008
Epoch 1 Train Accuracy = 65.30500030517578 Validation Accuracy = 67.25999450683594
Epoch 2 Train Accuracy = 70.70500183105469 Validation Accuracy = 71.29999542236328
Epoch 3 Train Accuracy = 73.97999572753906 Validation Accuracy = 74.63999938964844
Epoch 4 Train Accuracy = 77.12999725341797 Validation Accuracy = 79.29999542236328
Epoch 5 Train Accuracy = 78.9749984741211 Validation Accuracy = 77.6199951171875
Epoch 6 Train Accuracy = 81.36000061035156 Validation Accuracy = 83.37999725341797
Epoch 7 Train Accuracy = 83.8949966430664 Validation Accuracy = 85.54000091552734
