In [1]:
import cv2
import glob
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

## ImageAugmentation

In [2]:
class ImageAugmentation:
    def __init__(self, path = "./"):
        self.dim = (64, 36)
        self.folder_list = glob.glob(path + "/*")
        self.folders = []
        for p in self.folder_list:
            self.folders.append(p.split("\\")[-1])
            
        
        
        for folder in self.folders:
            self.files = glob.glob(path + "/" + folder + "/*")
            
            for file in self.files:
                #resize
                file_name = file.split("\\")[-1]
                img = cv2.imread(path + "/" + folder + "/" + file_name)
                resized = cv2.resize(img, self.dim,interpolation=cv2.INTER_AREA)                
                cv2.imwrite(path + "/" + folder + "/" + file_name,resized)
                
                
                constraint = ["rx_", "ry_"]
                if file_name[:3] not in constraint and path.split('/')[-1] != 'test':
                    rows, cols, dim = resized.shape
                    My = np.float32([[-1,  0, cols],
                                    [0 ,  1, 0   ],
                                    [0 ,  0, 1   ]])
                    Mx = np.float32([[1,  0, 0],
                                    [0 ,  -1, rows],
                                    [0 ,  0, 1   ]])
                    #resized img reflection on y axis
                    reflected_img = cv2.warpPerspective(resized,My,(int(cols),int(rows)))
                    cv2.imwrite(path + "/" + folder + "/" + "ry_"+ file_name,reflected_img)
                    
                    #resized img reflection on x axis
                    reflected_img = cv2.warpPerspective(resized,Mx,(int(cols),int(rows)))
                    cv2.imwrite(path + "/" + folder + "/" + "rx_"+ file_name,reflected_img)
        

In [3]:
# resize i reflect 
ImageAugmentation("./data/train")
ImageAugmentation("./data/test")

<__main__.ImageAugmentation at 0x1f908d09b20>

## Data loading

In [4]:
class CDataset(Dataset):
    def __init__(self, train = False, test = False):
        if(train):
            self.data_path = "./data/train/"
        if(test):
            self.data_path = "./data/test/"
        self.folder_list = glob.glob(self.data_path + "*")

        self.data = []
        for folder in self.folder_list:
            folder_name = folder.split("\\")[-1]
            for img_path in glob.glob(self.data_path + folder_name + "/*.jpg"):
                self.data.append([img_path, folder_name])
        self.class_map = {"Car": 0, "Truck": 1, "Bicycle": 2, "Dog": 3}
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, i):
        img_path, class_name = self.data[i]
        img = cv2.imread(img_path)
        class_id = self.class_map[class_name]
        img_tensor = torch.tensor(img, dtype=torch.float)
        img_tensor = img_tensor.permute(2,0,1)
        class_id = torch.tensor([class_id])
        return img_tensor, class_id

In [5]:
if __name__ == "__main__":
    batch_size = 5
    train_dataset = CDataset(train=True)
    data_loader_train = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    test_dataset = CDataset(test=True)
    data_loader_test = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    #for imgs, labels in data_loader:
        #print("Batch of images has shape: ",imgs.shape)
        #print("Batch of labels has shape: ", labels.shape)

## CNN

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [7]:
if torch.cuda.is_available():
    device = torch.device("cuda:0") 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


In [8]:
class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        self.conv_layer = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1),
            nn.PReLU(),
            
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.PReLU(),
            
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.PReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.PReLU(),
            #nadodo
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.PReLU(),
            
            nn.MaxPool2d(kernel_size=2, stride=2),
        )


        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.6), 
            nn.Linear(18432, 4096),
            nn.PReLU(),
            nn.Dropout(p=0.6), 
            nn.Linear(4096, 256),
            nn.PReLU(),
            nn.Dropout(p=0.6),
            nn.Linear(256, len(train_dataset.class_map)),
        )


    def forward(self, x): 
        x = self.conv_layer(x)
        x = torch.flatten(x, 1)
        x = self.fc_layer(x)
        return x
    


In [9]:
net = CNN()
def calc_input_dims():
        batch_data = torch.zeros((1, 3, 64, 36))
        
        batch_data = net.conv_layer(batch_data)
        
        return int(np.prod(batch_data.size()))
calc_input_dims()

18432

In [10]:
lossF = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)

In [11]:
net.to(device)  

for epoch in range(50): 
    print("Starting epoch #" + str(epoch))
    running_loss = 0.0
    for i, data in enumerate(data_loader_train, 0):
        # inputs, labels = data
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = lossF(outputs, labels.flatten())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 0 and i != 0: 
            print('loss: %.3f' %
                  (running_loss))
            running_loss = 0.0

print('Finished Training')

Starting epoch #0
loss: 176.911
loss: 145.359
Starting epoch #1
loss: 104.634
loss: 118.932
Starting epoch #2
loss: 92.775
loss: 75.690
Starting epoch #3
loss: 86.183
loss: 71.450
Starting epoch #4
loss: 64.733
loss: 57.356
Starting epoch #5
loss: 46.209
loss: 48.775
Starting epoch #6
loss: 54.409
loss: 58.421
Starting epoch #7
loss: 42.278
loss: 49.761
Starting epoch #8
loss: 29.930
loss: 35.085
Starting epoch #9
loss: 29.661
loss: 27.864
Starting epoch #10
loss: 26.450
loss: 34.967
Starting epoch #11
loss: 28.185
loss: 31.674
Starting epoch #12
loss: 21.269
loss: 23.779
Starting epoch #13
loss: 19.474
loss: 24.994
Starting epoch #14
loss: 23.893
loss: 20.869
Starting epoch #15
loss: 19.600
loss: 17.267
Starting epoch #16
loss: 22.658
loss: 15.682
Starting epoch #17
loss: 15.123
loss: 12.748
Starting epoch #18
loss: 9.914
loss: 11.638
Starting epoch #19
loss: 13.008
loss: 13.204
Starting epoch #20
loss: 8.934
loss: 8.208
Starting epoch #21
loss: 10.494
loss: 13.209
Starting epoch #22


In [12]:
net.to(torch.device("cpu")) 

CNN(
  (conv_layer): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): PReLU(num_parameters=1)
    (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): PReLU(num_parameters=1)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): PReLU(num_parameters=1)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): PReLU(num_parameters=1)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): PReLU(num_parameters=1)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, 

In [17]:
correct = 0
total = 0

with torch.no_grad():
    for data in data_loader_test:
        images, labels = data
        outputs = net(images)

        for i in range(batch_size):
            try:
                if outputs[i][int(labels[i])] == torch.max(outputs[i]):
                    correct += 1
                total+=1
            except:
                pass

    print(correct, total)

print('Accuracy on test images: %d %%' % (
    round(100 * correct / total)))

35 40
Accuracy on test images: 88 %


In [18]:
PATH = "./CNNmodule"
torch.save(net.state_dict(), PATH)

In [None]:
model = CNN()
model.load_state_dict(torch.load(PATH))
model.eval()