In [1]:
import cv2
import glob
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

## ImageAugmentation

In [2]:
class ImageAugmentation:
    def __init__(self, path):
        self.dim = (64, 36)
        self.folder_list = glob.glob(path + "/*")
        self.folders = []
        for p in self.folder_list:
            self.folders.append(p.split("\\")[-1])
        
        
        for folder in self.folders:
            self.file_names = []
            self.files = glob.glob(path + "/" + folder + "/*")
            for file in self.files:
                #resize
                self.file_names.append(file.split("\\")[-1])
                img = cv2.imread(path + "/" + folder + "/" + self.file_names[-1])
                resized = cv2.resize(img, self.dim,interpolation=cv2.INTER_AREA)                
                cv2.imwrite(path + "/" + folder + "/" + self.file_names[-1],resized)
                
                #resized img reflection
                if self.file_names[-1][:2] != "r_" and path.split('/')[-1] != 'test':
                    rows, cols, dim = resized.shape
                    M = np.float32([[-1,  0, cols],
                                    [0 ,  1, 0   ],
                                    [0 ,  0, 1   ]])
                    reflected_img = cv2.warpPerspective(resized,M,(int(cols),int(rows)))
                    cv2.imwrite(path + "/" + folder + "/" + "r_"+ self.file_names[-1],reflected_img)
                
            
                
        

In [3]:
ImageAugmentation("./data/train")
ImageAugmentation("./data/test")

<__main__.ImageAugmentation at 0x25246f68f40>

## Data loading

In [4]:
class CDataset(Dataset):
    def __init__(self, train = False, test = False):
        if(train):
            self.data_path = "./data/train/"
        if(test):
            self.data_path = "./data/test/"
        self.folder_list = glob.glob(self.data_path + "*")

        self.data = []
        for folder in self.folder_list:
            folder_name = folder.split("\\")[-1]
            for img_path in glob.glob(self.data_path + folder_name + "/*.jpg"):
                self.data.append([img_path, folder_name])
        self.class_map = {"Car": 0, "Truck": 1, "Bicycle": 2}
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, i):
        img_path, class_name = self.data[i]
        img = cv2.imread(img_path)
        class_id = self.class_map[class_name]
        img_tensor = torch.tensor(img, dtype=torch.float)
        img_tensor = img_tensor.permute(2,0,1)
        class_id = torch.tensor([class_id])
        return img_tensor, class_id

In [5]:
if __name__ == "__main__":
    batch_size = 4
    train_dataset = CDataset(train=True)
    data_loader_train = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    test_dataset = CDataset(test=True)
    data_loader_test = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
    #for imgs, labels in data_loader:
        #print("Batch of images has shape: ",imgs.shape)
        #print("Batch of labels has shape: ", labels.shape)

## CNN

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [7]:
if torch.cuda.is_available():
    device = torch.device("cuda:0") 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


In [8]:
class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        self.conv_layer = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            #nadodo
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            
            nn.MaxPool2d(kernel_size=2, stride=2),
        )


        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.4), 
            nn.Linear(18432, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.4), 
            nn.Linear(4096, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.4),
            nn.Linear(256, 3),
        )


    def forward(self, x): 
        x = self.conv_layer(x)
        x = torch.flatten(x, 1)
        x = self.fc_layer(x)
        return x
    
net = CNN()

In [9]:
def calc_input_dims():
        batch_data = torch.zeros((1, 3, 64, 36))
        
        batch_data = net.conv_layer(batch_data)
        
        return int(np.prod(batch_data.size()))
calc_input_dims()

18432

In [10]:
lossF = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)

In [11]:
net.to(device)  

for epoch in range(50): 
    print("Starting epoch #" + str(epoch))
    running_loss = 0.0
    for i, data in enumerate(data_loader_train, 0):
        # inputs, labels = data
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = lossF(outputs, labels.flatten())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 0 and i != 0: 
            print('loss: %.3f' %
                  (running_loss))
            running_loss = 0.0

print('Finished Training')

Starting epoch #0
loss: 117.979
Starting epoch #1
loss: 81.774
Starting epoch #2
loss: 56.204
Starting epoch #3
loss: 42.297
Starting epoch #4
loss: 37.347
Starting epoch #5
loss: 25.996
Starting epoch #6
loss: 23.555
Starting epoch #7
loss: 15.925
Starting epoch #8
loss: 14.998
Starting epoch #9
loss: 15.971
Starting epoch #10
loss: 10.828
Starting epoch #11
loss: 13.053
Starting epoch #12
loss: 2.519
Starting epoch #13
loss: 5.320
Starting epoch #14
loss: 2.890
Starting epoch #15
loss: 5.113
Starting epoch #16
loss: 14.892
Starting epoch #17
loss: 10.840
Starting epoch #18
loss: 2.522
Starting epoch #19
loss: 2.821
Starting epoch #20
loss: 2.834
Starting epoch #21
loss: 0.519
Starting epoch #22
loss: 0.572
Starting epoch #23
loss: 10.394
Starting epoch #24
loss: 8.085
Starting epoch #25
loss: 4.133
Starting epoch #26
loss: 1.829
Starting epoch #27
loss: 1.358
Starting epoch #28
loss: 2.691
Starting epoch #29
loss: 1.123
Starting epoch #30
loss: 2.313
Starting epoch #31
loss: 4.854
St

In [12]:
net.to(torch.device("cpu")) 

CNN(
  (conv_layer): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): ReLU(inplace=True)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (f

In [13]:
correct = 0
total = 0

with torch.no_grad():
    for data in data_loader_test:
        images, labels = data
        outputs = net(images)
        
        for i in range(batch_size):
            try:
                if outputs[i][int(labels[i])] == torch.max(outputs[i]):
                    correct += 1
            except:
                pass
            total += 1
        
    print(correct, total)
    
print('Accuracy of the network test images: %d %%' % (
    round(100 * correct / total)))

28 32
Accuracy of the network test images: 88 %
