## Imports

In [1]:
#Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px # this is another plotting library for interactive plot

from sklearn.model_selection import train_test_split
from sklearn import metrics, manifold # we will use the metrics and manifold learning modules from scikit-learn
from pathlib import Path # to interact with file paths
from PIL import Image # to interact with images
from tqdm import tqdm # progress bar
from pprint import pprint # pretty print (useful for a more readable print of objects like lists or dictionaries)
from IPython.display import clear_output # to clear the output of the notebook

import torch
import torch.nn as nn
import torchvision
from torchvision.io import read_image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import cv2 as cv
import os
import shutil


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# device = torch.device("cpu")

cuda


In [2]:
# CONTROL
num_channels = 1

## Load Pretrained Net and create Detector 

In [3]:
#Model

class LaneKeeper(nn.Module):
    def __init__(self, out_dim=4, channels=3): #(default for 640x480)
        super().__init__()
        ### Convoluational layers
        self.conv = nn.Sequential( #in = (320,240)
            nn.Conv2d(channels, 32, kernel_size=3, stride=2), #out = (159,119)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2), #out = (79,59)
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, kernel_size=3, stride=2), #out = (39,29)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2), #out = (19,14)
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, kernel_size=3, stride=2), #out = (9,6)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),#out = (4,2)
        )
        self.flat = nn.Flatten()
        ### Linear sections
        self.lin = nn.Sequential(
            # First linear layer
            nn.Linear(in_features=4*2*64, out_features=512),
            nn.ReLU(True),
            nn.Linear(in_features=512, out_features=out_dim),
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = self.flat(x)
        x = self.lin(x)
        return x

lane_keeper = LaneKeeper(out_dim=4,channels=num_channels).to(device)

In [4]:

#show the image with opencv
img = cv.imread('tests/test_img.jpg')
#resize to 480 x 640
img = cv.resize(img, (320, 240))
if num_channels == 1:
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    img = np.expand_dims(img, axis=2)
#convert to tensor
img = torch.from_numpy(img).float()
img = img.permute(2,0,1)
#add dimension
img = img.unsqueeze(0).to(device)
print(img.shape)

lane_keeper.eval()

# Inference
with torch.no_grad():
    output = lane_keeper(img)
    print(f'out shape: {output.shape}')

torch.Size([1, 1, 240, 320])
out shape: torch.Size([1, 4])


## Loading images and Labels

In [5]:
# #dataset
# class AuxImgDataset(Dataset):
#     def __init__(self, folder, feature_extractor, num_images):
#         self.folder = folder
#         self.feature_extractor = feature_extractor
#         self.num_images = num_images

#     def __len__(self):
#         return self.num_images

#     def __getitem__(self, idx):
#         img = read_image(os.path.join(self.folder, f'img_{idx+1}.png') )
#         #convert to tensor
#         img = img.float()
#         return img

# def analyze_class_labels(class_labels):
#     state_cnt = [0,0,0,0]
#     next_state_cnt = [0,0,0,0]
#     sign_cnt = [0,0,0,0,0,0,0]
#     for label in tqdm(class_labels):
#         #torch->numpy
#         label = label.numpy()
#         assert np.sum(label) == 3, f'{label}'
#         state_lab = label[0:4]
#         assert np.sum(state_lab) == 1, f'{state_lab}'
#         state_cnt[np.argmax(state_lab)] += 1
#         next_state_lab = label[4:8]
#         assert np.sum(next_state_lab) == 1, f'{next_state_lab}'
#         next_state_cnt[np.argmax(next_state_lab)] += 1
#         sign_lab = label[8:15]
#         assert np.sum(sign_lab) == 1, f'{sign_lab}'
#         sign_cnt[np.argmax(sign_lab)] += 1
#     print(f'State counts: {state_cnt}   , tot = {np.sum(state_cnt)}')
#     print(f'Road :        {state_cnt[0]},  {state_cnt[0]/np.sum(state_cnt):.2f}') 
#     print(f'Intersection: {state_cnt[1]},  {state_cnt[1]/np.sum(state_cnt):.2f}')
#     print(f'Roundabout:   {state_cnt[2]},  {state_cnt[2]/np.sum(state_cnt):.2f}')
#     print(f'Junction:     {state_cnt[3]},  {state_cnt[3]/np.sum(state_cnt):.2f}')
#     print(f'Next state counts: {next_state_cnt}')
#     print(f'Next Road :        {next_state_cnt[0]},  {next_state_cnt[0]/np.sum(next_state_cnt):.2f}')
#     print(f'Next Intersection: {next_state_cnt[1]},  {next_state_cnt[1]/np.sum(next_state_cnt):.2f}')
#     print(f'Next Roundabout:   {next_state_cnt[2]},  {next_state_cnt[2]/np.sum(next_state_cnt):.2f}')
#     print(f'Next Junction:     {next_state_cnt[3]},  {next_state_cnt[3]/np.sum(next_state_cnt):.2f}')
#     print(f'Sign counts: {sign_cnt}')

#     return state_cnt, next_state_cnt, sign_cnt

# def analyze_additional_inputs(additional_inputs):
#     action_cnt = [0,0,0,0]
#     for input in tqdm(additional_inputs):
#         input = input.numpy()
#         assert np.sum(input) == 1, f'{input}'
#         action_lab = input[0:4]
#         assert np.sum(action_lab) == 1, f'{action_lab}'
#         action_cnt[np.argmax(action_lab)] += 1
#     print(f'Action counts: {action_cnt},  tot = {np.sum(action_cnt)}')
#     print(f'Straight:    {action_cnt[0]},  {action_cnt[0]/np.sum(action_cnt):.2f}')
#     print(f'Left:        {action_cnt[1]},  {action_cnt[1]/np.sum(action_cnt):.2f}')
#     print(f'Right:       {action_cnt[2]},  {action_cnt[2]/np.sum(action_cnt):.2f}')
#     print(f'Continue:    {action_cnt[3]},  {action_cnt[3]/np.sum(action_cnt):.2f}')

#     return action_cnt


# class CsvDataset(Dataset):
#     def __init__(self, folder, feature_extractor, transform=None, max_load=1000, num_images=5000, equalize=False, skip_img_load=False):
#         self.transform = transform
#         self.folder = folder
#         self.data = []
#         self.tot_imgs = num_images

#         #classification labels
#         class_labels = []
#         with open(folder+'/classification_labels.csv', 'r') as f:
#             lines = f.read().split('\n')
#             lines = lines[0:-1] #remove footer
#             # #set maximumum number of images
#             self.max_load = min(max_load, len(lines))
#             for i in tqdm(range(self.max_load)):
#                 line = lines[i]
#                 sample = line.split(',')
#                 #convert to float
#                 label = np.array([float(s) for s in sample])
#                 #convert to tensor
#                 label = torch.from_numpy(label).float()
#                 class_labels.append(label)
        
        
#         #input data
#         input_data = []
#         with open(folder+'/input_data.csv', 'r') as f:
#             lines = f.read().split('\n')
#             lines = lines[0:-1] #remove footer
#             for i in tqdm(range(self.max_load)):
#                 line = lines[i]
#                 sample = line.split(',')
#                 label = np.array([float(s) for s in sample])
#                 label = torch.from_numpy(label).float()
#                 input_data.append(label)


#          #regression labels
#         regr_labels = []
#         with open(folder+'/regression_labels.csv', 'r') as f:
#             lines = f.read().split('\n')
#             lines = lines[0:-1] #remove footer
#             for i in tqdm(range(self.max_load)):
#                 line = lines[i]
#                 sample = line.split(',')
#                 reg_label = np.array([float(s) for s in sample])
#                 reg_label = torch.from_numpy(reg_label).float()
#                 regr_labels.append(reg_label)

#         if not skip_img_load:
#             #add noise to images
#             #clear the training folder
#             train_folder = 'training'
#             for file in os.listdir(train_folder):
#                 if file.endswith(".png"):
#                     os.remove(os.path.join(train_folder, file))
#             for i in tqdm(range(self.max_load)):
#                 img = cv.imread(self.folder+f'/img_{i+1}.png')
#                 #add noise 
#                 std = 3
#                 noisep = np.random.normal(0, std, img.shape)
#                 noisep = np.uint8(noisep)
#                 noisem = np.random.normal(0, std, img.shape)
#                 noisem = np.uint8(noisem)
#                 img = cv.subtract(img, noisem)
#                 img = cv.add(img, noisep)
#                 if np.random.uniform(0, 1) > 0.7:
#                     img = cv.blur(img, (5,5))
#                 img = cv.resize(img, (320, 240))
#                 cv.imwrite(train_folder+f'/img_{i+1}.png', img)

#         if equalize:
#             print('Before Equalization: ')
#             state_cnt, next_cnt, sign_cnt = analyze_class_labels(class_labels) #print useful info about the labels
#             action_cnt = analyze_additional_inputs(input_data) #print useful info about the inputs
            
#             #Equalize the number of samples per class
#             # state_multipliers = [1.0,1.0,1.0,1.0] #to equalize the number of samples per class
#             next_multipliers = [1.0,1.0,1.0,1.0] #to equalize the number of samples per class
#             # max_state = np.max(state_cnt)
#             max_next = np.max(next_cnt)
#             # for i in range(len(state_cnt)):
#             #     if state_cnt[i] > 1e-4:
#             #         state_multipliers[i] = max_state/state_cnt[i]
#             for i in range(len(next_cnt)):
#                 if next_cnt[i] > 1e-4:
#                     next_multipliers[i] = max_next/next_cnt[i]
#             new_next_samples = [int(next_multipliers[i]*next_cnt[i]) for i in range(len(state_cnt))]
#             print(f'new_next_samples: {new_next_samples}, tot = {np.sum(new_next_samples)}')

#             tot_samples = np.sum(new_next_samples)

#             #create the new samples
#             final_class_labels = []
#             final_input_data = []
#             final_regr_labels = []
#             finished = False
#             idx = 0
#             final_idx = 0
#             # state_cnt = [0,0,0,0] 
#             next_cnt = [0,0,0,0] #equalize only on this
#             while not all([next_cnt[i] >= new_next_samples[i] for i in range(len(next_cnt))]):
#                 #get the next state
#                 next = class_labels[idx][4:8]
#                 assert np.sum(next.numpy()) == 1, f'{next}'
#                 assert next.shape == (4,)
#                 next_idx = np.argmax(next)
#                 next_cnt[next_idx] += 1
#                 if next_cnt[next_idx] < new_next_samples[next_idx]:
#                     final_class_labels.append(class_labels[idx])
#                     final_input_data.append(input_data[idx])
#                     final_regr_labels.append(regr_labels[idx])
#                     shutil.copyfile(self.folder+f'/img_{idx+1}.png', f'training/img_{final_idx+1}.png')
#                     final_idx += 1

#                 idx = (idx+1) % len(class_labels) #increment index
#                 if idx % 50000 == 0:
#                     print(f'{idx}, {final_idx}')

#             print('After Equalization: ')
#             analyze_class_labels(final_class_labels) #print useful info about the labels
#             analyze_additional_inputs(final_input_data) #print useful info about the inputs

#             print(f'Final idx = {final_idx}')
#             tot_samples = final_idx
#             img_folder = 'training'
#         else:
#             img_folder = 'training'
#             tot_samples = min(self.max_load, len(class_labels))
#             final_class_labels = class_labels
#             final_input_data = input_data
#             final_regr_labels = regr_labels

#         #convert images into features
#         #imgs dataset
#         img_dataset = AuxImgDataset(img_folder, feature_extractor, tot_samples)
#         img_loader = DataLoader(img_dataset, batch_size=128, shuffle=False)
#         feature_extractor.eval()
#         features = []
#         with torch.no_grad():
#             for imgs in tqdm(img_loader):
#                 imgs = imgs.to(device)
#                 feat = feature_extractor(imgs)
#                 feat = feat.cpu()
#                 for j in range(feat.shape[0]):
#                     features.append(feat[j])
        
#         #add everything to self.data
#         for i in tqdm(range(tot_samples)):
#             #concatenate feature vector and additional input data (input for the linear layer)
#             input = torch.cat((features[i], final_input_data[i]), dim=0)
#             # print(f'input shape: {input.shape}')
#             #append to data
#             self.data.append((input, final_regr_labels[i], final_class_labels[i]))  

#     def __len__(self):
#         # The length of the dataset is simply the length of the self.data list
#         return len(self.data)

#     def __getitem__(self, idx):
#         # Our sample is the element idx of the list self.data
#         sample = self.data[idx]
#         return sample

In [6]:
# import cv2 as cv
# import numpy as np
# cv.namedWindow('img')
# for i in range(5000):
#     img = cv.imread(os.path.join('training_imgs', f'img_{i+1}.png'))
#     #add noise 
#     std = 100
#     std = np.random.randint(1, std)
#     img = cv.resize(img, (320, 240))
#     img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

#     noisem = np.random.randint(0, std, img.shape, dtype=np.uint8)
#     img = cv.subtract(img, noisem)
#     noisep = np.random.randint(0, std, img.shape, dtype=np.uint8)
#     img = cv.add(img, noisep)
#     if np.random.uniform(0, 1) > 0.7:
#         img = cv.blur(img, (5,5))


#     cv.imshow('img', img)
#     cv.waitKey(1)
# cv.destroyAllWindows()

In [7]:
class CsvDataset(Dataset):
    def __init__(self, folder, transform=None, max_load=1000, channels=3):
        self.transform = transform
        self.folder = folder
        self.data = []
        self.channels = channels
    
        with open(folder+'/regression_labels.csv', 'r') as f:
            lines = f.read().split('\n')
            lines = lines[0:-1] #remove footer
            # Get x and y values from each line and append to self.data
            max_load = min(max_load, len(lines))
            self.all_imgs = torch.zeros((max_load, 240, 320, channels), dtype=torch.uint8)

            labels = []
            cv.namedWindow('img')
            for i in tqdm(range(max_load)):
                #img 
                img = cv.imread(os.path.join(self.folder, f'img_{i+1}.png'))
                img = cv.resize(img, (320, 240))
                if channels == 1:
                    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
                    
                 #add noise to imgs
                std = 100
                std = np.random.randint(1, std)
                noisem = np.random.randint(0, std, img.shape, dtype=np.uint8)
                img = cv.subtract(img, noisem)
                noisep = np.random.randint(0, std, img.shape, dtype=np.uint8)
                img = cv.add(img, noisep)
                if np.random.uniform(0, 1) > 0.7:
                    img = cv.blur(img, (5,5))
                
                cv.imshow('img', img)
                cv.waitKey(1)

                if channels == 1:
                    img = np.expand_dims(img, axis=2)
                self.all_imgs[i] = torch.from_numpy(img)
                
                #label
                line = lines[i]
                sample = line.split(',')
                reg_label = np.array([float(s) for s in sample], dtype=np.float32)
                reg_label = reg_label[0:4] #keep it very simple
                self.data.append(reg_label)  

    def __len__(self):
        # The length of the dataset is simply the length of the self.data list
        return len(self.data)

    def __getitem__(self, idx):
        # img = read_image(os.path.join(self.folder, f'img_{idx+1}.png'))
        # img = img.float()
        img = self.all_imgs[idx]
        img = img.permute(2, 0, 1).float()
        value = self.data[idx]
        return img, value

In [8]:
#create dataset #takes a long time but then training is faster
train_dataset = CsvDataset('training_imgs', max_load=80000, channels=num_channels)
cv.destroyAllWindows()

100%|██████████| 80000/80000 [15:17<00:00, 87.17it/s] 


In [9]:
#data loader
train_dataloader = DataLoader(train_dataset, batch_size=100, shuffle=True)

In [10]:
#test dataloader
sample = next(iter(train_dataloader))
print(sample[0].shape)
print(sample[1].shape)

torch.Size([100, 1, 240, 320])
torch.Size([100, 4])


## Training

In [11]:
# Training function
def train_epoch(model, dataloader, regr_loss_fn, optimizer, device=device):
    # Set the model to training mode
    model.train() #train
    # Initialize the loss
    err_losses = []
    dist_losses = []
    curv_losses = []

    # Loop over the training batches
    for (input, regr_label) in tqdm(dataloader):
        # Move the input and target data to the selected device
        input, regr_label =input.to(device), regr_label.to(device)
        # Zero the gradients
        optimizer.zero_grad()
        # Compute the output
        output = model(input)

        #regression 22 values
        #classification: 3 states, 3 next states, 7 signs
        regr_out = output[:, 0:8]
        err_out = regr_out[:, 0:2]
        dist_out = regr_out[:, 2]
        curv_out = regr_out[:, 3]

        err_label = regr_label[:, 0:2]
        dist_label = regr_label[:, 2].float()
        dist_label = torch.where(dist_label < 0.8, dist_label, dist_out).float() #consider loss only for small distances
        curv_label = regr_label[:, 3]

        # Compute the losses
        err_loss = 1*regr_loss_fn(err_out, err_label)
        dist_loss = 1*regr_loss_fn(dist_out, dist_label) 
        curv_loss = 0*regr_loss_fn(curv_out, curv_label)
        loss = err_loss + dist_loss + curv_loss

        # Compute the gradients
        loss.backward()
        # Update the weights
        optimizer.step()

        #batch loss
        err_losses.append(err_loss.detach().cpu().numpy())
        dist_losses.append(dist_loss.detach().cpu().numpy())
        curv_losses.append(curv_loss.detach().cpu().numpy())

    # Return the average training loss
    err_loss = np.mean(err_losses)
    dist_loss = np.mean(dist_losses)
    curv_loss = np.mean(curv_losses)
    return err_loss, dist_loss, curv_loss

In [12]:
# #load models
# detector.load_state_dict(torch.load('detector.pt'))
# feature_extractor.load_state_dict(torch.load('feature_extractor.pt'))

#parameters
lr = 0.001
epochs = 5
optimizer = torch.optim.Adam(lane_keeper.parameters(), lr=lr, weight_decay=3e-5)
regr_loss_fn = nn.MSELoss()
for epoch in range(epochs):
    # try:
    if True:
        err_loss, dist_loss, curv_loss = train_epoch(lane_keeper, train_dataloader, regr_loss_fn, optimizer, device)
        clear_output(wait=True)
    # except Exception as e:
    #     print(e)
    #     torch.cuda.empty_cache()
    #     continue
    print(f"Epoch {epoch+1}/{epochs}")
    print(f"err_loss: {err_loss}")
    print(f"dist_loss: {dist_loss}")
    print(f"curv_loss: {curv_loss}")
    torch.save(lane_keeper.state_dict(), 'models/lane_keeper.pt')

Epoch 5/5
err_loss: 0.0020709962118417025
dist_loss: 0.00030605788924731314
curv_loss: 0.0


In [13]:
# #testing
# test_dataset = CsvDataset(folder='test_imgs')
# test_dataloader = DataLoader(test_dataset, batch_size=100, shuffle=True)

# #get accuracy
# train_class_loss, train_regr_loss = get_avg_loss(feature_extractor, detector, train_dataloader, class_loss_fn, regr_loss_fn, device)
# test_class_loss, test_regr_loss = get_avg_loss(feature_extractor, detector, test_dataloader, class_loss_fn, regr_loss_fn, device)

# print(f"Training classification loss: {train_class_loss}")
# print(f"Training regression loss: {train_regr_loss}\n")
# print(f"Testing classification loss: {test_class_loss}")
# print(f"Testing regression loss: {test_regr_loss}")

In [14]:
lane_keeper.load_state_dict(torch.load('models/lane_keeper.pt'))

#save the model so that opencv can load it
import torch
import torch.onnx
import torchvision
import torchvision.models as models
import sys

device = torch.device('cpu')
lane_keeper.to(device)

 
onnx_lane_keeper_path = "models/lane_keeper.onnx"

# set the model to inference mode
lane_keeper.eval()

# Create some sample input in the shape this model expects 
# This is needed because the convertion forward pass the network once 
dummy_input = torch.randn(1, num_channels, 240, 320)
torch.onnx.export(lane_keeper, dummy_input, onnx_lane_keeper_path, verbose=True)

clear_output(wait=False)

In [15]:
#test with opencv
sample_image = "training_imgs/img_1.png"
images = [cv.imread(f"training_imgs/img_{i+1}.png") for i in range(100)]
 
#The Magic:
lk =  cv.dnn.readNetFromONNX(onnx_lane_keeper_path) 

avg_col = (0,0,0) if num_channels == 3 else 0

for i in tqdm(range(100)):
    image = images[i]
    image = cv.resize(image, (320, 240))
    if num_channels == 1:
        image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    blob = cv.dnn.blobFromImage(image, 1.0, (320, 240), avg_col, swapRB=True, crop=False)
    # print(blob.shape)
    lk.setInput(blob)
    preds = lk.forward()

print(f"Predictions: {preds}")
print(f"Predictions shape: {preds.shape}")

100%|██████████| 100/100 [00:01<00:00, 60.95it/s]

Predictions: [[ 1.1264813e-02 -8.8612195e-03 -1.7551328e-01 -1.4779145e-39]]
Predictions shape: (1, 4)





In [16]:
#get image and label
cv.namedWindow('img')
dataloader = DataLoader(train_dataset, batch_size=10000, shuffle=False)
for i, (imgs, labels) in enumerate(tqdm(dataloader)):
    #convert img to numpy
    imgs = imgs.cpu().numpy()
    for i in range(imgs.shape[0]):
        img = imgs[i][0]
        #convert to uint8d
        img = img.astype(np.uint8)
        cv.imshow("img", img)
        cv.waitKey(1)

cv.destroyAllWindows()

  0%|          | 0/8 [00:00<?, ?it/s]