In [1]:
import cv2
import glob
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

## ImageAugmentation

In [21]:
class ImageAugmentation:
    def __init__(self, path = "./"):
        self.dim = (64, 36)
        self.folder_list = glob.glob(path + "/*")
        self.folders = []
        for p in self.folder_list:
            self.folders.append(p.split("\\")[-1])
            
        
        
        for folder in self.folders:
            self.files = glob.glob(path + "/" + folder + "/*")
            
            for file in self.files:
                #resize
                file_name = file.split("\\")[-1]
                img = cv2.imread(path + "/" + folder + "/" + file_name)
                resized = cv2.resize(img, self.dim,interpolation=cv2.INTER_AREA)                
                cv2.imwrite(path + "/" + folder + "/" + file_name,resized)
                
                
                constraint = ["rx_", "ry_"]
                if file_name[:3] not in constraint and path.split('/')[-1] != 'test':
                    rows, cols, dim = resized.shape
                    My = np.float32([[-1,  0, cols],
                                    [0 ,  1, 0   ],
                                    [0 ,  0, 1   ]])
                    Mx = np.float32([[1,  0, 0],
                                    [0 ,  -1, rows],
                                    [0 ,  0, 1   ]])
                    #resized img reflection on y axis
                    reflected_img = cv2.warpPerspective(resized,My,(int(cols),int(rows)))
                    cv2.imwrite(path + "/" + folder + "/" + "ry_"+ file_name,reflected_img)
                    
                    #resized img reflection on x axis
                    reflected_img = cv2.warpPerspective(resized,Mx,(int(cols),int(rows)))
                    cv2.imwrite(path + "/" + folder + "/" + "rx_"+ file_name,reflected_img)
        

In [22]:
# resize i reflect 
ImageAugmentation("./data/train")
ImageAugmentation("./data/test")

a.jpg
b.jpg
c.jpg
d.jpg
e.jpg
f.jpg
g.jpg
h.jpg
i.jpg
j.jpg
carA.jpg
carB.jpg
carC.jpg
carD.jpg
carE.jpg
carF.jpg
carG.jpg
carH.jpg
carI.jpg
carJ.jpg
doggo1.jpg
doggo10.jpg
doggo2.jpg
doggo3.jpg
doggo4.jpg
doggo5.jpg
doggo6.jpg
doggo7.jpg
doggo8.jpg
doggo9.jpg
a.jpg
b.jpg
c.jpg
d.jpg
e.jpg
f.jpg
g.jpg
h.jpg
i.jpg
j.jpg


<__main__.ImageAugmentation at 0x1d3e24d6c40>

## Data loading

In [4]:
class CDataset(Dataset):
    def __init__(self, train = False, test = False):
        if(train):
            self.data_path = "./data/train/"
        if(test):
            self.data_path = "./data/test/"
        self.folder_list = glob.glob(self.data_path + "*")

        self.data = []
        for folder in self.folder_list:
            folder_name = folder.split("\\")[-1]
            for img_path in glob.glob(self.data_path + folder_name + "/*.jpg"):
                self.data.append([img_path, folder_name])
        self.class_map = {"Car": 0, "Truck": 1, "Bicycle": 2, "Dog": 3}
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, i):
        img_path, class_name = self.data[i]
        img = cv2.imread(img_path)
        class_id = self.class_map[class_name]
        img_tensor = torch.tensor(img, dtype=torch.float)
        img_tensor = img_tensor.permute(2,0,1)
        class_id = torch.tensor([class_id])
        return img_tensor, class_id

In [5]:
if __name__ == "__main__":
    batch_size = 5
    train_dataset = CDataset(train=True)
    data_loader_train = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    test_dataset = CDataset(test=True)
    data_loader_test = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    #for imgs, labels in data_loader:
        #print("Batch of images has shape: ",imgs.shape)
        #print("Batch of labels has shape: ", labels.shape)

## CNN

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [7]:
if torch.cuda.is_available():
    device = torch.device("cuda:0") 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


In [8]:
class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        self.conv_layer = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            #nadodo
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            
            nn.MaxPool2d(kernel_size=2, stride=2),
        )


        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.4), 
            nn.Linear(18432, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.4), 
            nn.Linear(4096, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.4),
            nn.Linear(256, len(train_dataset.class_map)),
        )


    def forward(self, x): 
        x = self.conv_layer(x)
        x = torch.flatten(x, 1)
        x = self.fc_layer(x)
        return x
    


In [9]:
net = CNN()
def calc_input_dims():
        batch_data = torch.zeros((1, 3, 64, 36))
        
        batch_data = net.conv_layer(batch_data)
        
        return int(np.prod(batch_data.size()))
calc_input_dims()

18432

In [10]:
lossF = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)

In [11]:
net.to(device)  

for epoch in range(50): 
    print("Starting epoch #" + str(epoch))
    running_loss = 0.0
    for i, data in enumerate(data_loader_train, 0):
        # inputs, labels = data
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = lossF(outputs, labels.flatten())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 0 and i != 0: 
            print('loss: %.3f' %
                  (running_loss))
            running_loss = 0.0

print('Finished Training')

Starting epoch #0
loss: 149.089
loss: 107.513
Starting epoch #1
loss: 93.811
loss: 82.826
Starting epoch #2
loss: 68.135
loss: 61.798
Starting epoch #3
loss: 53.231
loss: 46.527
Starting epoch #4
loss: 39.256
loss: 38.457
Starting epoch #5
loss: 36.864
loss: 30.405
Starting epoch #6
loss: 25.137
loss: 22.171
Starting epoch #7
loss: 23.401
loss: 17.006
Starting epoch #8
loss: 20.033
loss: 16.154
Starting epoch #9
loss: 15.243
loss: 18.558
Starting epoch #10
loss: 12.552
loss: 10.344
Starting epoch #11
loss: 14.622
loss: 7.929
Starting epoch #12
loss: 8.494
loss: 14.208
Starting epoch #13
loss: 4.964
loss: 7.131
Starting epoch #14
loss: 6.731
loss: 12.749
Starting epoch #15
loss: 3.561
loss: 7.896
Starting epoch #16
loss: 11.980
loss: 2.900
Starting epoch #17
loss: 3.891
loss: 4.467
Starting epoch #18
loss: 1.899
loss: 4.815
Starting epoch #19
loss: 4.529
loss: 1.450
Starting epoch #20
loss: 11.683
loss: 2.941
Starting epoch #21
loss: 8.448
loss: 9.034
Starting epoch #22
loss: 2.911
loss

In [14]:
net.to(torch.device("cpu")) 

CNN(
  (conv_layer): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): ReLU(inplace=True)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (f

In [22]:
correct = 0
total = 0

with torch.no_grad():
    for data in data_loader_test:
        total += len(data[0])
        images, labels = data
        outputs = net(images)
        
        for i in range(batch_size):
            try:
                if outputs[i][int(labels[i])] == torch.max(outputs[i]):
                    correct += 1
            except:
                pass
        
    print(correct, total)
    
print('Accuracy of the network test images: %d %%' % (
    round(100 * correct / total)))

33 40
Accuracy of the network test images: 82 %
