In [1]:
import subprocess
import shutil
import torch
import torchvision
from torchvision import transforms
from torchsummary import summary
import json
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
transform = transforms.Compose([
    # you can add other transformations in this list
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (1))
])

train_data = torchvision.datasets.ImageFolder(root='dataset/train', transform=transform)
test_data = torchvision.datasets.ImageFolder(root='dataset/test', transform=transform)


In [4]:
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=64, shuffle=True, num_workers=8)
test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=64, shuffle=True, num_workers=4)

In [5]:
print(len(train_loader))

7451


In [6]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.pool = nn.AvgPool1d(4, 4)
        self.conv1_bn = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.pool = nn.AvgPool2d(4, 4)
        self.conv2_bn = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 64, 3)
        self.pool = nn.AvgPool2d(4, 4)
        self.conv3_bn = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, 3)
        self.pool = nn.AvgPool2d(4, 4)
        self.fc = nn.Linear(64, 4)

    def forward(self, x):
        x = self.conv1_bn(self.pool(F.relu(self.conv1(x))))
        x = self.conv2_bn(self.pool(F.relu(self.conv2(x))))
        x = self.conv3_bn(self.pool(F.relu(self.conv3(x))))
        x = self.pool(F.relu(self.conv4(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.fc(x)
        return x


model = Net()
model = model.to(device)

In [7]:
summary(model, (3, 640, 640))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 638, 638]             896
         AvgPool2d-2         [-1, 32, 159, 159]               0
       BatchNorm2d-3         [-1, 32, 159, 159]              64
            Conv2d-4         [-1, 64, 157, 157]          18,496
         AvgPool2d-5           [-1, 64, 39, 39]               0
       BatchNorm2d-6           [-1, 64, 39, 39]             128
            Conv2d-7           [-1, 64, 37, 37]          36,928
         AvgPool2d-8             [-1, 64, 9, 9]               0
       BatchNorm2d-9             [-1, 64, 9, 9]             128
           Conv2d-10             [-1, 64, 7, 7]          36,928
        AvgPool2d-11             [-1, 64, 1, 1]               0
           Linear-12                    [-1, 4]             260
Total params: 93,828
Trainable params: 93,828
Non-trainable params: 0
---------------------------------

In [8]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00005)

In [9]:
early_batches_acc = {0, 2, 4, 6, 8, 10, 15, 20, 30, 50,
                                75, 100, 150, 200, 300, 400, 500,
                               600, 700, 800}

the_very_start = list(range(0, 400, 2))


In [10]:
test_accs = dict()

for epoch in range(20):  # loop over the dataset multiple times
    
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        if (i % 1000 == 1) or (epoch == 0 and (i in early_batches_acc)):
            correct = 0
            total = 0
            with torch.no_grad():
                for ix, data in enumerate(test_loader):
                    if ix > 100:
                        break
                    images, labels = data
                    images, labels = images.cuda(), labels.cuda()
                    # calculate outputs by running images through the network
                    outputs = model(images)
                    # the class with the highest energy is what we choose as prediction
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            acc = correct / total
            test_accs[(epoch, i)] = acc
            print(f'Accuracy of the network on the test images: {100*acc} %')
            with open('accuracies_smallboi.json', 'wb') as fp:
                pickle.dump(test_accs, fp)
        if epoch == 0 and (i % 6 == 0):
            torch.save(model.state_dict(), f'saved_models/smallboy/epoch_{epoch}_batch_{i}.pth')
        
        if epoch == 1 and i % 30 == 0:
            torch.save(model.state_dict(), f'saved_models/smallboy/epoch_{epoch}_batch_{i}.pth')

        if epoch > 1 and i % 150 == 0:
            torch.save(model.state_dict(), f'saved_models/smallboy/epoch_{epoch}_batch_{i}.pth')

        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.cuda(), labels.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        
        running_loss += loss.item()
        if i % 200 == 199:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 200:.3f}')
            running_loss = 0.0

print('Finished Training')

Accuracy of the network on the test images: 27.382425742574256 %
Accuracy of the network on the test images: 27.289603960396043 %
Accuracy of the network on the test images: 28.79022277227723 %
Accuracy of the network on the test images: 29.455445544554454 %
Accuracy of the network on the test images: 33.678836633663366 %
Accuracy of the network on the test images: 36.86571782178218 %
Accuracy of the network on the test images: 38.319925742574256 %
Accuracy of the network on the test images: 41.90903465346535 %
Accuracy of the network on the test images: 43.254950495049506 %
Accuracy of the network on the test images: 44.87933168316832 %
Accuracy of the network on the test images: 46.72029702970298 %
Accuracy of the network on the test images: 49.33477722772277 %
Accuracy of the network on the test images: 51.51608910891089 %
Accuracy of the network on the test images: 53.58910891089109 %
[1,   200] loss: 1.219
Accuracy of the network on the test images: 55.63118811881188 %
Accuracy of

[6,   600] loss: 0.182
[6,   800] loss: 0.183
[6,  1000] loss: 0.178
Accuracy of the network on the test images: 91.41398514851485 %
[6,  1200] loss: 0.184
[6,  1400] loss: 0.176
[6,  1600] loss: 0.174
[6,  1800] loss: 0.185
[6,  2000] loss: 0.178
Accuracy of the network on the test images: 91.83168316831683 %
[6,  2200] loss: 0.172
[6,  2400] loss: 0.176
[6,  2600] loss: 0.174
[6,  2800] loss: 0.174
[6,  3000] loss: 0.173
Accuracy of the network on the test images: 91.24381188118812 %
[6,  3200] loss: 0.177
[6,  3400] loss: 0.183
[6,  3600] loss: 0.172
[6,  3800] loss: 0.177
[6,  4000] loss: 0.180
Accuracy of the network on the test images: 91.6924504950495 %
[6,  4200] loss: 0.168
[6,  4400] loss: 0.171
[6,  4600] loss: 0.175
[6,  4800] loss: 0.170
[6,  5000] loss: 0.173
Accuracy of the network on the test images: 91.44492574257426 %
[6,  5200] loss: 0.171
[6,  5400] loss: 0.172
[6,  5600] loss: 0.172
[6,  5800] loss: 0.174
[6,  6000] loss: 0.175
Accuracy of the network on the test i

[12,   200] loss: 0.118
[12,   400] loss: 0.110
[12,   600] loss: 0.113
[12,   800] loss: 0.119
[12,  1000] loss: 0.110
Accuracy of the network on the test images: 93.61076732673267 %
[12,  1200] loss: 0.116
[12,  1400] loss: 0.120
[12,  1600] loss: 0.108
[12,  1800] loss: 0.112
[12,  2000] loss: 0.117
Accuracy of the network on the test images: 94.07487623762376 %
[12,  2200] loss: 0.114
[12,  2400] loss: 0.111
[12,  2600] loss: 0.109
[12,  2800] loss: 0.111
[12,  3000] loss: 0.111
Accuracy of the network on the test images: 94.29146039603961 %
[12,  3200] loss: 0.116
[12,  3400] loss: 0.118
[12,  3600] loss: 0.110
[12,  3800] loss: 0.115
[12,  4000] loss: 0.108
Accuracy of the network on the test images: 94.2759900990099 %
[12,  4200] loss: 0.110
[12,  4400] loss: 0.106
[12,  4600] loss: 0.107
[12,  4800] loss: 0.111
[12,  5000] loss: 0.108
Accuracy of the network on the test images: 94.56992574257426 %
[12,  5200] loss: 0.115
[12,  5400] loss: 0.111
[12,  5600] loss: 0.112
[12,  580

Accuracy of the network on the test images: 95.90037128712872 %
[17,  7200] loss: 0.086
[17,  7400] loss: 0.090
Accuracy of the network on the test images: 95.3743811881188 %
[18,   200] loss: 0.086
[18,   400] loss: 0.080
[18,   600] loss: 0.079
[18,   800] loss: 0.086
[18,  1000] loss: 0.088
Accuracy of the network on the test images: 95.43626237623762 %
[18,  1200] loss: 0.080
[18,  1400] loss: 0.083
[18,  1600] loss: 0.090
[18,  1800] loss: 0.079
[18,  2000] loss: 0.085
Accuracy of the network on the test images: 95.63737623762376 %
[18,  2200] loss: 0.084
[18,  2400] loss: 0.093
[18,  2600] loss: 0.080
[18,  2800] loss: 0.081
[18,  3000] loss: 0.083
Accuracy of the network on the test images: 95.42079207920791 %
[18,  3200] loss: 0.089
[18,  3400] loss: 0.078
[18,  3600] loss: 0.088
[18,  3800] loss: 0.087
[18,  4000] loss: 0.090
Accuracy of the network on the test images: 95.17326732673267 %
[18,  4200] loss: 0.085
[18,  4400] loss: 0.081
[18,  4600] loss: 0.089
[18,  4800] loss:

In [14]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
confusion_matrix = torch.zeros(4, 4)
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images, labels = images.cuda(), labels.cuda()
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

print(f'Accuracy of the network on the test images: {100 * correct // total} %')
print(confusion_matrix.diag()/confusion_matrix.sum(1))

Accuracy of the network on the test images: 95 %
tensor([0.9410, 0.9682, 0.9653, 0.9475])


In [2]:
!nvidia-smi

Tue Apr 12 11:04:08 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:84:00.0 Off |                    0 |
| N/A   47C    P0    31W / 250W |      0MiB / 16384MiB |      2%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------