In [1]:
!pip install torchsummary



In [2]:
import subprocess
import shutil
import torch
import torchvision
from torchvision import transforms
from torchsummary import summary
import json
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [4]:
transform = transforms.Compose([
    # you can add other transformations in this list
    transforms.ToTensor(),
])

train_data = torchvision.datasets.ImageFolder(root='dataset/train', transform=transform)
test_data = torchvision.datasets.ImageFolder(root='dataset/test', transform=transform)


In [5]:
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=64, shuffle=True, num_workers=8)
test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=64, shuffle=True, num_workers=4)

In [6]:
print(len(train_loader))

5836


In [7]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.bn0 = nn.BatchNorm2d(3)
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.pool = nn.AvgPool1d(4, 4)
        self.conv1_bn = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.pool = nn.AvgPool2d(4, 4)
        self.conv2_bn = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 64, 3)
        self.pool = nn.AvgPool2d(4, 4)
        self.conv3_bn = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, 3)
        self.pool = nn.AvgPool2d(4, 4)
        self.fc = nn.Linear(64, 4)

    def forward(self, x):
        x = self.bn0(x)
        x = self.conv1_bn(self.pool(F.relu(self.conv1(x))))
        x = self.conv2_bn(self.pool(F.relu(self.conv2(x))))
        x = self.conv3_bn(self.pool(F.relu(self.conv3(x))))
        x = self.pool(F.relu(self.conv4(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.fc(x)
        return x


model = Net()
model = model.to(device)

In [8]:
summary(model, (3, 640, 640))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
       BatchNorm2d-1          [-1, 3, 640, 640]               6
            Conv2d-2         [-1, 32, 638, 638]             896
         AvgPool2d-3         [-1, 32, 159, 159]               0
       BatchNorm2d-4         [-1, 32, 159, 159]              64
            Conv2d-5         [-1, 64, 157, 157]          18,496
         AvgPool2d-6           [-1, 64, 39, 39]               0
       BatchNorm2d-7           [-1, 64, 39, 39]             128
            Conv2d-8           [-1, 64, 37, 37]          36,928
         AvgPool2d-9             [-1, 64, 9, 9]               0
      BatchNorm2d-10             [-1, 64, 9, 9]             128
           Conv2d-11             [-1, 64, 7, 7]          36,928
        AvgPool2d-12             [-1, 64, 1, 1]               0
           Linear-13                    [-1, 4]             260
Total params: 93,834
Trainable params: 

In [9]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00005)

In [10]:
early_batches_acc = {0, 2, 4, 6, 8, 10, 15, 20, 30, 50,
                                75, 100, 150, 200, 300, 400, 500,
                               600, 700, 800}

In [11]:
test_accs = dict()

for epoch in range(20):  # loop over the dataset multiple times
    
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        if (i % 1000 == 1) or (epoch == 0 and (i in early_batches_acc)):
            correct = 0
            total = 0
            with torch.no_grad():
                for ix, data in enumerate(test_loader):
                    if ix > 100:
                        break
                    images, labels = data
                    images, labels = images.cuda(), labels.cuda()
                    # calculate outputs by running images through the network
                    outputs = model(images)
                    # the class with the highest energy is what we choose as prediction
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            acc = correct / total
            test_accs[(epoch, i)] = acc
            print(f'Accuracy of the network on the test images: {100*acc} %')
            with open('accuracies_smallboi_v2.json', 'wb') as fp:
                pickle.dump(test_accs, fp)
        if epoch == 0 and (i % 6 == 0):
            torch.save(model.state_dict(), f'saved_models/smallboy_v2/epoch_{epoch}_batch_{i}.pth')
        
        if epoch == 1 and i % 30 == 0:
            torch.save(model.state_dict(), f'saved_models/smallboy_v2/epoch_{epoch}_batch_{i}.pth')

        if epoch > 1 and i % 150 == 0:
            torch.save(model.state_dict(), f'saved_models/smallboy_v2/epoch_{epoch}_batch_{i}.pth')

        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.cuda(), labels.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        
        running_loss += loss.item()
        if i % 200 == 199:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 200:.3f}')
            running_loss = 0.0

print('Finished Training')

Accuracy of the network on the test images: 23.994430693069308 %
Accuracy of the network on the test images: 24.53589108910891 %
Accuracy of the network on the test images: 24.613242574257427 %
Accuracy of the network on the test images: 27.5990099009901 %
Accuracy of the network on the test images: 28.403465346534652 %
Accuracy of the network on the test images: 28.960396039603957 %
Accuracy of the network on the test images: 30.863242574257427 %
Accuracy of the network on the test images: 35.133044554455445 %
Accuracy of the network on the test images: 37.88675742574257 %
Accuracy of the network on the test images: 40.87252475247525 %
Accuracy of the network on the test images: 43.53341584158416 %
Accuracy of the network on the test images: 44.47710396039604 %
Accuracy of the network on the test images: 46.65841584158416 %
Accuracy of the network on the test images: 48.05074257425743 %
[1,   200] loss: 1.165
Accuracy of the network on the test images: 50.17017326732673 %
Accuracy of 

[7,  3400] loss: 0.130
[7,  3600] loss: 0.144
[7,  3800] loss: 0.130
[7,  4000] loss: 0.136
Accuracy of the network on the test images: 81.3118811881188 %
[7,  4200] loss: 0.132
[7,  4400] loss: 0.132
[7,  4600] loss: 0.141
[7,  4800] loss: 0.130
[7,  5000] loss: 0.131
Accuracy of the network on the test images: 82.17821782178217 %
[7,  5200] loss: 0.135
[7,  5400] loss: 0.121
[7,  5600] loss: 0.128
[7,  5800] loss: 0.131
Accuracy of the network on the test images: 79.90408415841584 %
[8,   200] loss: 0.128
[8,   400] loss: 0.127
[8,   600] loss: 0.128
[8,   800] loss: 0.123
[8,  1000] loss: 0.123
Accuracy of the network on the test images: 81.23452970297029 %
[8,  1200] loss: 0.132
[8,  1400] loss: 0.114
[8,  1600] loss: 0.120
[8,  1800] loss: 0.123
[8,  2000] loss: 0.132
Accuracy of the network on the test images: 80.86324257425743 %
[8,  2200] loss: 0.123
[8,  2400] loss: 0.130
[8,  2600] loss: 0.122
[8,  2800] loss: 0.120
[8,  3000] loss: 0.117
Accuracy of the network on the test i

[15,  1400] loss: 0.077
[15,  1600] loss: 0.070
[15,  1800] loss: 0.080
[15,  2000] loss: 0.082
Accuracy of the network on the test images: 82.47215346534654 %
[15,  2200] loss: 0.079
[15,  2400] loss: 0.075
[15,  2600] loss: 0.075
[15,  2800] loss: 0.066
[15,  3000] loss: 0.076
Accuracy of the network on the test images: 83.58601485148515 %
[15,  3200] loss: 0.077
[15,  3400] loss: 0.074
[15,  3600] loss: 0.072
[15,  3800] loss: 0.071
[15,  4000] loss: 0.072
Accuracy of the network on the test images: 82.8125 %
[15,  4200] loss: 0.071
[15,  4400] loss: 0.074
[15,  4600] loss: 0.071
[15,  4800] loss: 0.070
[15,  5000] loss: 0.077
Accuracy of the network on the test images: 82.73514851485149 %
[15,  5200] loss: 0.076
[15,  5400] loss: 0.079
[15,  5600] loss: 0.069
[15,  5800] loss: 0.073
Accuracy of the network on the test images: 82.20915841584159 %
[16,   200] loss: 0.070
[16,   400] loss: 0.067
[16,   600] loss: 0.069
[16,   800] loss: 0.073
[16,  1000] loss: 0.069
Accuracy of the ne

In [12]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
confusion_matrix = torch.zeros(4, 4)
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images, labels = images.cuda(), labels.cuda()
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

print(f'Accuracy of the network on the test images: {100 * correct // total} %')
print(confusion_matrix.diag()/confusion_matrix.sum(1))

Accuracy of the network on the test images: 83 %
tensor([0.7920, 0.9208, 0.7620, 0.8711])


In [14]:
confusion_matrix

tensor([[50024.,  2037.,  5990.,  5107.],
        [ 1716., 57850.,  2077.,  1183.],
        [ 3702.,  3949., 56752., 10071.],
        [ 2378.,  1664.,  4250., 56060.]])