In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision as tv

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm.autonotebook import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class Dataset2class(torch.utils.data.Dataset):
    def __init__(self, path_dir1:str, path_dir2:str):
        super().__init__()
        
        self.path_dir1 = path_dir1
        self.path_dir2 = path_dir2
        
        self.dir1_list = sorted(os.listdir(path_dir1))
        self.dir2_list = sorted(os.listdir(path_dir2))
        
    def __len__(self):
        return len(self.dir1_list) + len(self.dir2_list)
    
    def __getitem__(self, idx):
        
        if idx < len(self.dir1_list):
            class_id = 0
            img_path = os.path.join(self.path_dir1, self.dir1_list[idx])
        else:
            class_id = 1
            idx -= len(self.dir1_list)
            img_path = os.path.join(self.path_dir2, self.dir2_list[idx])
        
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32)
        img = img/255.0
        
        img = cv2.resize(img, (112, 112), interpolation=cv2.INTER_AREA)
        img = img.transpose((2, 0, 1))
        
        t_img = torch.from_numpy(img)
        t_class_id = torch.tensor([class_id])
        
        return {'img': t_img, 'label': t_class_id}
        

In [3]:
train_dogs_path = './dataset/training_set/dogs/'
train_cats_path = './dataset/training_set/cats/'
test_dogs_path = './dataset/test_set/dogs/'
test_cats_path = './dataset/test_set/cats/'

train_ds_catsdogs = Dataset2class(train_dogs_path, train_cats_path)
test_ds_catsdogs = Dataset2class(test_dogs_path, test_cats_path)

In [5]:
print(f'Train size: {len(train_ds_catsdogs)}')
print(f'Test size: {len(test_ds_catsdogs)}')

Train size: 8000
Test size: 2000


In [7]:
batch_size = 4

train_loader = torch.utils.data.DataLoader(
    train_ds_catsdogs, shuffle=True, 
    batch_size=batch_size, num_workers=1, drop_last=True
)
test_loader = torch.utils.data.DataLoader(
    test_ds_catsdogs, shuffle=False,
    batch_size=batch_size, num_workers=1, drop_last=False
)

In [8]:
class VGG13(nn.Module):
    def __init__(self, out_nc):
        super().__init__()
        
        self.act = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(2,2)
        
        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        
        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        
        self.conv3_1 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv3_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv3_3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        
        self.conv4_1 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv4_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv4_3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        
#         self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
#         self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
#         self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        
        self.flat = nn.Flatten()
        
        self.fc1 = nn.Linear(128, 128)
        #self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(128, out_nc)
        
    def forward(self, x):
        out = self.conv1_1(x)
        out = self.act(out)
        out = self.conv1_2(out)
        out = self.act(out)
        
        out = self.maxpool(out)
        
        out = self.conv2_1(out)
        out = self.act(out)
        out = self.conv2_2(out)
        out = self.act(out)
        
        out = self.maxpool(out)
        
        out = self.conv3_1(out)
        out = self.act(out)
        out = self.conv3_2(out)
        out = self.act(out)
        out = self.conv3_3(out)
        out = self.act(out)
        
        out = self.maxpool(out)
        
        out = self.conv4_1(out)
        out = self.act(out)
        out = self.conv4_2(out)
        out = self.act(out)
        out = self.conv4_3(out)
        out = self.act(out)
        
        out = self.maxpool(out)
        
#         out = self.conv5_1(out)
#         out = self.act(out)
#         out = self.conv5_2(out)
#         out = self.act(out)
#         out = self.conv5_3(out)
#         out = self.act(out)
        
#         out = self.maxpool(out)
        out = self.avgpool(out)
        out = self.flat(out)
        
        out = self.fc1(out)
        out = self.act(out)
#         out = self.fc2(out)
#         out = self.act(out)
        out = self.fc3(out)
        
        return out

In [9]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def accuracy(pred, label):
    answer = (F.sigmoid(pred.detach().cpu()).numpy() > 0.5) == (label.cpu().numpy() > 0.5)
    return answer.mean()

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [18]:
model = VGG13(1) # ConvNet()
model = model.to(device)
count_parameters(model)

epochs = 7


loss_fn = nn.BCEWithLogitsLoss()#.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [19]:
for epoch in range(epochs):
    loss_val = 0
    acc_val = 0
    for sample in (pbar := tqdm(train_loader)):
        img, label = sample['img'], sample['label']
        img = img.to(device)
        label = label.float().to(device)
        optimizer.zero_grad()

        #label = F.one_hot(label, 2).float()
        pred = model(img)

        #print(pred.shape, label.shape)
        loss = loss_fn(pred, label)

        loss.backward()
        loss_item = loss.item()
        loss_val += loss_item

        optimizer.step()

        acc_current = accuracy(pred, label)
        acc_val += acc_current

        pbar.set_description(f'loss: {loss_item:.5f}\taccuracy: {acc_current:.3f}')
    print(loss_val/len(train_loader))
    print(acc_val/len(train_loader))

loss: 0.69970	accuracy: 0.250: 100%|████████| 2000/2000 [00:50<00:00, 39.61it/s]


0.6933949950933457
0.50075


loss: 0.69315	accuracy: 0.500: 100%|████████| 2000/2000 [00:26<00:00, 74.46it/s]


0.6932838300764561
0.496


loss: 0.69142	accuracy: 0.500: 100%|████████| 2000/2000 [00:26<00:00, 74.70it/s]


0.693299315303564
0.499625


loss: 0.66734	accuracy: 0.750: 100%|████████| 2000/2000 [00:27<00:00, 71.99it/s]


0.6892712494283915
0.54075


loss: 0.53541	accuracy: 1.000: 100%|████████| 2000/2000 [00:27<00:00, 71.74it/s]


0.6843551223278046
0.560125


loss: 0.61921	accuracy: 0.750: 100%|████████| 2000/2000 [00:27<00:00, 73.52it/s]


0.682359621167183
0.56225


loss: 0.69709	accuracy: 0.500: 100%|████████| 2000/2000 [00:26<00:00, 74.44it/s]

0.6765189296007157
0.58175





In [None]:
epochs = 7

for epoch in range(epochs):
    loss_val = 0
    acc_val = 0
    for sample in (pbar := tqdm(train_loader)):
        img, label = sample['img'], sample['label']
        optimizer.zero_grad()

        #label = F.one_hot(label, 2).float()
        pred = model(img)

        loss = loss_fn(pred, label)

        loss.backward()
        loss_item = loss.item()
        loss_val += loss_item

        optimizer.step()

        acc_current = accuracy(pred, label)
        acc_val += acc_current

    pbar.set_description(f'loss: {loss_item:.5f}\taccuracy: {acc_current:.3f}')
    print(loss_val/len(train_loader))
    print(acc_val/len(train_loader))

In [35]:
loss_val = 0
acc_val = 0
for sample in (pbar := tqdm(test_loader)):
    with torch.no_grad():
        img, label = sample['img'], sample['label'].float()

        #label = F.one_hot(label, 2).float()
        pred = model(img)

        loss = loss_fn(pred, label)
        loss_item = loss.item()
        loss_val += loss_item

        acc_current = accuracy(pred, label)
        acc_val += acc_current

    pbar.set_description(f'loss: {loss_item:.5f}\taccuracy: {acc_current:.3f}')
print(loss_val/len(train_loader))
print(acc_val/len(train_loader))

loss: 9.98807	accuracy: 0.000:   0%|           | 8/2000 [00:04<20:21,  1.63it/s]


KeyboardInterrupt: 