## Imports

In [1]:
#Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px # this is another plotting library for interactive plot

from sklearn.model_selection import train_test_split
from sklearn import metrics, manifold # we will use the metrics and manifold learning modules from scikit-learn
from pathlib import Path # to interact with file paths
from PIL import Image # to interact with images
from tqdm import tqdm # progress bar
from pprint import pprint # pretty print (useful for a more readable print of objects like lists or dictionaries)
from IPython.display import clear_output # to clear the output of the notebook

import torch
import torch.nn as nn
import torchvision
from torchvision.io import read_image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import cv2 as cv
import os
import shutil


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# device = torch.device("cpu")

cuda


In [2]:
# CONTROL
num_channels = 1
SIZE = (32,32)
model_name = 'models/lane_keeper_small.pt'
onnx_lane_keeper_path = "models/lane_keeper_small.onnx"
max_load = 150_000

## Load Pretrained Net and create Detector 

In [15]:
# NETWORK ARCHITECTURE

class LaneKeeper(nn.Module):
    def __init__(self, out_dim=4, channels=1): 
        super().__init__()
        ### Convoluational layers
        self.conv = nn.Sequential( #in = (SIZE)
            nn.Conv2d(channels, 16, kernel_size=3, stride=1), #out = 30
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2), #out=15
            nn.BatchNorm2d(16),
            nn.Conv2d(16, 32, kernel_size=5, stride=2), #out = 6
            nn.ReLU(True),
            nn.Conv2d(32, 64, kernel_size=6, stride=2), #out = 1
            nn.ReLU(True),
        )
        self.flat = nn.Flatten()
        ### Linear sections
        self.lin = nn.Sequential(
            # First linear layer
            nn.Linear(in_features=1*1*64, out_features=32),
            nn.ReLU(True),
            nn.Linear(in_features=32, out_features=out_dim),
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = self.flat(x)
        x = self.lin(x)
        return x

lane_keeper = LaneKeeper(out_dim=3,channels=num_channels).to(device)

In [4]:
# TEST NET INPUTS/OUTPUTS
#show the image with opencv
img = cv.imread('tests/test_img.jpg')
img = cv.resize(img, SIZE)
if num_channels == 1:
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    img = np.expand_dims(img, axis=2)
#convert to tensor
img = torch.from_numpy(img).float()
img = img.permute(2,0,1)
#add dimension
img = img.unsqueeze(0).to(device)
print(img.shape)

lane_keeper.eval()

# Inference
with torch.no_grad():
    output = lane_keeper(img)
    print(f'out shape: {output.shape}')

torch.Size([1, 1, 32, 32])
out shape: torch.Size([1, 3])


## Loading images and Labels

In [5]:
# IMG LOADER AND AUGMENTATION
import cv2 as cv
import numpy as np


def load_and_augment_img(i, folder='training_imgs'):
    img = cv.imread(os.path.join(folder, f'img_{i+1}.png'))

    #convert to gray
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    #create random ellipses to simulate light from the sun
    light = np.zeros(img.shape, dtype=np.uint8)
    #add ellipses
    for j in range(2):
        cent = (np.random.randint(0, img.shape[0]), np.random.randint(0, img.shape[1]))
        axes_length = (np.random.randint(10, 50), np.random.randint(50, 300))
        angle = np.random.randint(0, 360)
        light = cv.ellipse(light, cent, axes_length, angle, 0, 360, 255, -1)
    #create an image of random white and black pixels
    light = cv.blur(light, (100,100))
    noise = np.random.randint(0, 2, size=img.shape, dtype=np.uint8)*255
    light = cv.subtract(light, noise)
    light = 5 * light

    #add light to the image
    img = cv.add(img, light)

    # cv.imshow('light', light)
    # if cv.waitKey(0) == ord('q'):
    #     break

    #blur the image
    img = cv.blur(img, (9,9))

    # cut the top third of the image, let it 640x320
    img = img[int(img.shape[0]/3):,:]
    assert img.shape == (320,640), f'img shape cut = {img.shape}'
    #resize 
    img = cv.resize(img, SIZE)

    #add random tilt
    max_offset = 5
    offset = np.random.randint(-max_offset, max_offset)
    img = np.roll(img, offset, axis=0)
    if offset > 0:
        img[:offset, :] = np.random.randint(0,255)
    elif offset < 0:
        img[offset:, :] = np.random.randint(0,255)

    #reduce contrast
    const = np.random.uniform(0.1,1.2)
    if np.random.uniform() > 5:
        const = const*0.2
    img = 127*(1-const) + img*const
    img = img.astype(np.uint8)

    #add noise 
    std = 150
    std = np.random.randint(1, std)
    noisem = np.random.randint(0, std, img.shape, dtype=np.uint8)
    img = cv.subtract(img, noisem)
    noisep = np.random.randint(0, std, img.shape, dtype=np.uint8)
    img = cv.add(img, noisep)
    #blur 
    img = cv.blur(img, (np.random.randint(1,3),np.random.randint(1,3)))

    #add random brightness
    max_brightness = 50
    brightness = np.random.randint(-max_brightness, max_brightness)
    if brightness > 0:
        img = cv.add(img, brightness)
    elif brightness < 0:
        img = cv.subtract(img, -brightness)
    
    # invert color
    if np.random.uniform(0, 1) > 0.6:
        img = cv.bitwise_not(img)

    return img


cv.namedWindow('img', cv.WINDOW_NORMAL)
# cv.setWindowProperty('img', cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)

for i in range(500):
    img = load_and_augment_img(i)
    cv.imshow('img', img)
    key = cv.waitKey(100)
    if key == ord('q') or key == 27:
        break
cv.destroyAllWindows()

In [6]:
# DATASET CLASS
class CsvDataset(Dataset):
    def __init__(self, folder, transform=None, max_load=1000, channels=3):
        self.transform = transform
        self.folder = folder
        self.data = []
        self.channels = channels
    
        with open(folder+'/regression_labels.csv', 'r') as f:
            lines = f.read().split('\n')
            lines = lines[0:-1] #remove footer
            # Get x and y values from each line and append to self.data
            max_load = min(max_load, len(lines))
            self.all_imgs = torch.zeros((max_load, SIZE[1], SIZE[0], channels), dtype=torch.uint8)

            labels = []
            cv.namedWindow('img', cv.WINDOW_NORMAL)
            # cv.setWindowProperty('img', cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)
            for i in tqdm(range(max_load)):
                #img 
                img = load_and_augment_img(i)
                if i < 100:
                    cv.imshow('img', img)
                    cv.waitKey(1)
                    if i == 99:
                        cv.destroyAllWindows()
                
                #add a dimension to the image
                img = img[:, :,np.newaxis]
                self.all_imgs[i] = torch.from_numpy(img)
                
                #label
                line = lines[i]
                sample = line.split(',')
                #keep only info related to the lane, discard distance from stop line 
                sample = [sample[0], sample[1], sample[3]] #e2=lateral error, e3=yaw error point ahead, curvature
                reg_label = np.array([float(s) for s in sample], dtype=np.float32)
                reg_label = reg_label[0:4] #keep it very simple
                self.data.append(reg_label)  

    def __len__(self):
        # The length of the dataset is simply the length of the self.data list
        return len(self.data)

    def __getitem__(self, idx):
        # img = read_image(os.path.join(self.folder, f'img_{idx+1}.png'))
        # img = img.float()
        img = self.all_imgs[idx]
        img = img.permute(2, 0, 1).float()
        value = self.data[idx]
        return img, value

In [7]:
#create dataset #takes a long time but then training is faster
train_dataset = CsvDataset('training_imgs', max_load=max_load, channels=num_channels)
#split dataset into train and val
train_size = int(0.9*len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

100%|██████████| 142527/142527 [16:48<00:00, 141.37it/s]


In [8]:
#data loader
train_dataloader = DataLoader(train_dataset, batch_size=1000, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=100, shuffle=True)

In [9]:
#test dataloader
sample = next(iter(train_dataloader))
print(sample[0].shape)
print(sample[1].shape)

torch.Size([1000, 1, 32, 32])
torch.Size([1000, 3])


## Training

In [10]:
# TRAINING FUNCTION
def train_epoch(model, dataloader, regr_loss_fn, optimizer, device=device):
    # Set the model to training mode
    model.train() #train
    # Initialize the loss
    err_losses2 = []
    err_losses3 = []
    curv_losses = []

    # Loop over the training batches
    for (input, regr_label) in tqdm(dataloader):
        # Move the input and target data to the selected device
        input, regr_label =input.to(device), regr_label.to(device)
        # Zero the gradients
        optimizer.zero_grad()
        # Compute the output
        output = model(input)

        #regression
        err2 = output[:, 0]
        err3 = output[:, 1]
        curv_out = output[:, 2]

        err2_label = regr_label[:, 0]
        err3_label = regr_label[:, 1]
        curv_label = regr_label[:, 2]

        # Compute the losses
        err_loss2 = 1.0*regr_loss_fn(err2, err2_label)
        err_loss3 = 1.0*regr_loss_fn(err3, err3_label)
        curv_loss = 1.0*regr_loss_fn(curv_out, curv_label)
        loss = err_loss3 + err_loss2 + curv_loss

        # Compute the gradients
        loss.backward()
        # Update the weights
        optimizer.step()

        #batch loss
        err_losses2.append(err_loss2.detach().cpu().numpy())
        err_losses3.append(err_loss3.detach().cpu().numpy())
        curv_losses.append(curv_loss.detach().cpu().numpy())

    # Return the average training loss
    err_loss2 = np.mean(err_losses2)
    err_loss3 = np.mean(err_losses3)
    curv_loss = np.mean(curv_losses)
    return err_loss2, err_loss3, curv_loss

In [16]:
# TRAINING 
#parameters
lr = 0.001
epochs = 9
optimizer = torch.optim.Adam(lane_keeper.parameters(), lr=lr, weight_decay=3e-5)
regr_loss_fn = nn.MSELoss()
for epoch in range(epochs):
    try:
    # if True:
        err_loss2, err_loss3, curv_loss = train_epoch(lane_keeper, train_dataloader, regr_loss_fn, optimizer, device)
        # clear_output(wait=True)
    except Exception as e:
        print(e)
        torch.cuda.empty_cache()
        continue
    print(f"Epoch {epoch+1}/{epochs}")
    print(f"lateral_err_loss2: {err_loss2}")
    print(f"yaw_err_loss3: {err_loss3}")
    print(f"curv_loss: {curv_loss}")
    torch.save(lane_keeper.state_dict(), model_name)

100%|██████████| 129/129 [00:13<00:00,  9.44it/s]


Epoch 1/9
lateral_err_loss2: 0.0025141031946986914
yaw_err_loss3: 0.024964775890111923
curv_loss: 0.0007118975045159459


100%|██████████| 129/129 [00:13<00:00,  9.51it/s]


Epoch 2/9
lateral_err_loss2: 0.0016623620176687837
yaw_err_loss3: 0.018703734502196312
curv_loss: 0.0003223623789381236


100%|██████████| 129/129 [00:13<00:00,  9.55it/s]


Epoch 3/9
lateral_err_loss2: 0.0015338852535933256
yaw_err_loss3: 0.01667984575033188
curv_loss: 0.00027623301139101386


100%|██████████| 129/129 [00:13<00:00,  9.60it/s]


Epoch 4/9
lateral_err_loss2: 0.0014529043110087514
yaw_err_loss3: 0.01545812003314495
curv_loss: 0.0002548804914113134


100%|██████████| 129/129 [00:13<00:00,  9.57it/s]


Epoch 5/9
lateral_err_loss2: 0.0014099818654358387
yaw_err_loss3: 0.014395018108189106
curv_loss: 0.00025119769270531833


100%|██████████| 129/129 [00:13<00:00,  9.55it/s]


Epoch 6/9
lateral_err_loss2: 0.0013662553392350674
yaw_err_loss3: 0.01361039187759161
curv_loss: 0.00024482220760546625


100%|██████████| 129/129 [00:15<00:00,  8.28it/s]


Epoch 7/9
lateral_err_loss2: 0.0013206459116190672
yaw_err_loss3: 0.012738094665110111
curv_loss: 0.0002436410286463797


100%|██████████| 129/129 [00:16<00:00,  8.05it/s]


Epoch 8/9
lateral_err_loss2: 0.0012876620749011636
yaw_err_loss3: 0.012085651978850365
curv_loss: 0.00022658352099824697


100%|██████████| 129/129 [00:13<00:00,  9.31it/s]

Epoch 9/9
lateral_err_loss2: 0.0012522588949650526
yaw_err_loss3: 0.011334914714097977
curv_loss: 0.0002158917486667633





In [17]:
# EVALUATE ON TEST SET (UNSEEN DATA)
lane_keeper.eval()
err_losses3 = []
err_losses2 = []
curv_losses = []
for (input, regr_label) in tqdm(val_dataloader):
    input, regr_label =input.to(device), regr_label.to(device)
    output = lane_keeper(input)

    regr_out = output
    err2 = regr_out[:, 0]
    err3 = regr_out[:, 1]
    curv_out = regr_out[:, 2]

    err2_label = regr_label[:, 0]
    err3_label = regr_label[:, 1]
    curv_label = regr_label[:, 2]

    err_loss3 = 1.0*regr_loss_fn(err3, err3_label)
    err_loss2 = 1.0*regr_loss_fn(err2, err2_label)
    curv_loss = 1.0*regr_loss_fn(curv_out, curv_label)
    loss = err_loss3 + err_loss2 + curv_loss

    err_losses2.append(err_loss2.detach().cpu().numpy())
    err_losses3.append(err_loss3.detach().cpu().numpy())
    curv_losses.append(curv_loss.detach().cpu().numpy())

print(f"lateral_err2_loss: {np.mean(err_losses2)}")
print(f"yaw_err3_loss: {np.mean(err_losses3)}")
print(f"curv_loss: {np.mean(curv_losses)}")

100%|██████████| 143/143 [00:00<00:00, 188.06it/s]

lateral_err2_loss: 0.0013410353567451239
yaw_err3_loss: 0.014180360361933708
curv_loss: 0.00021161524637136608





In [18]:
# CONVERT TO ONNX MODEL FOR OPENCV
lane_keeper.load_state_dict(torch.load(model_name))

#save the model so that opencv can load it
import torch
import torch.onnx
import torchvision
import torchvision.models as models
import sys

device = torch.device('cpu')
lane_keeper.to(device)

# set the model to inference mode
lane_keeper.eval()

# Create some sample input in the shape this model expects 
# This is needed because the convertion forward pass the network once 
dummy_input = torch.randn(1, num_channels, SIZE[1], SIZE[0])
torch.onnx.export(lane_keeper, dummy_input, onnx_lane_keeper_path, verbose=True)

clear_output(wait=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
lane_keeper.to(device)

LaneKeeper(
  (conv): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): Conv2d(16, 32, kernel_size=(5, 5), stride=(2, 2))
    (5): ReLU(inplace=True)
    (6): Conv2d(32, 64, kernel_size=(6, 6), stride=(2, 2))
    (7): ReLU(inplace=True)
  )
  (flat): Flatten(start_dim=1, end_dim=-1)
  (lin): Sequential(
    (0): Linear(in_features=64, out_features=32, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=32, out_features=3, bias=True)
  )
)

In [19]:
# TEST WITH OPENCV
sample_image = "training_imgs/img_1.png"
images = [cv.imread(f"training_imgs/img_{i+1}.png") for i in range(100)]
 
#The Magic:
lk =  cv.dnn.readNetFromONNX(onnx_lane_keeper_path) 

avg_col = (0,0,0) if num_channels == 3 else 0

for i in tqdm(range(100)):
    image = images[i]
    image = cv.resize(image, SIZE)
    if num_channels == 1:
        image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    blob = cv.dnn.blobFromImage(image, 1.0, SIZE, avg_col, swapRB=True, crop=False)
    # print(blob.shape)
    lk.setInput(blob)
    preds = lk.forward()
    # print(f"Predictions: {preds[0][2]}")

print(f"Predictions: {preds}")
print(f"Predictions shape: {preds.shape}")

100%|██████████| 100/100 [00:00<00:00, 4948.97it/s]

Predictions: [[ 0.01731549 -0.02110345 -0.01171526]]
Predictions shape: (1, 3)



