## Imports

In [1]:
#Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px # this is another plotting library for interactive plot

from sklearn.model_selection import train_test_split
from sklearn import metrics, manifold # we will use the metrics and manifold learning modules from scikit-learn
from pathlib import Path # to interact with file paths
from PIL import Image # to interact with images
from tqdm import tqdm # progress bar
from pprint import pprint # pretty print (useful for a more readable print of objects like lists or dictionaries)
from IPython.display import clear_output # to clear the output of the notebook

import torch
import torch.nn as nn
import torchvision
from torchvision.io import read_image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import cv2 as cv
import os
import shutil


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# device = torch.device("cpu")

cuda


In [2]:
# CONTROL
num_channels = 1
SIZE = (128,64)
model_name = 'models/lane_keeper_small.pt'
onnx_lane_keeper_path = "models/lane_keeper_small.onnx"
max_load = 150_000

## Load Pretrained Net and create Detector 

In [3]:
#Model

class LaneKeeper(nn.Module):
    def __init__(self, out_dim=4, channels=1): 
        super().__init__()
        ### Convoluational layers
        self.conv = nn.Sequential( #in = (SIZE)
            nn.Conv2d(channels, 32, kernel_size=3, stride=2), #out = (63,31)
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=3, stride=2), 
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, kernel_size=3, stride=2), #out = (31,15)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2), #out = (15,7)
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, kernel_size=3, stride=2), #out = (7,3)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),#out = (3,1)
        )
        self.flat = nn.Flatten()
        ### Linear sections
        self.lin = nn.Sequential(
            # First linear layer
            nn.Linear(in_features=3*1*64, out_features=256),
            nn.ReLU(True),
            nn.Linear(in_features=256, out_features=out_dim),
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = self.flat(x)
        x = self.lin(x)
        return x

lane_keeper = LaneKeeper(out_dim=4,channels=num_channels).to(device)

In [4]:

#show the image with opencv
img = cv.imread('tests/test_img.jpg')
img = cv.resize(img, SIZE)
if num_channels == 1:
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    img = np.expand_dims(img, axis=2)
#convert to tensor
img = torch.from_numpy(img).float()
img = img.permute(2,0,1)
#add dimension
img = img.unsqueeze(0).to(device)
print(img.shape)

lane_keeper.eval()

# Inference
with torch.no_grad():
    output = lane_keeper(img)
    print(f'out shape: {output.shape}')

torch.Size([1, 1, 64, 128])
out shape: torch.Size([1, 4])


## Loading images and Labels

In [5]:
import cv2 as cv
import numpy as np

def load_and_augment_img(i, folder='training_imgs'):
    img = cv.imread(os.path.join(folder, f'img_{i+1}.png'))

    #convert to gray
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    #create random ellipses to simulate light from the sun
    light = np.zeros(img.shape, dtype=np.uint8)
    #add ellipses
    for j in range(2):
        cent = (np.random.randint(0, img.shape[0]), np.random.randint(0, img.shape[1]))
        axes_length = (np.random.randint(10, 50), np.random.randint(50, 300))
        angle = np.random.randint(0, 360)
        light = cv.ellipse(light, cent, axes_length, angle, 0, 360, 255, -1)
    #create an image of random white and black pixels
    light = cv.blur(light, (100,100))
    noise = np.random.randint(0, 2, size=img.shape, dtype=np.uint8)*255
    light = cv.subtract(light, noise)
    light = 5 * light

    #add light to the image
    img = cv.add(img, light)

    # cv.imshow('light', light)
    # if cv.waitKey(0) == ord('q'):
    #     break

    # cut the top third of the image, let it 640x320
    img = img[int(img.shape[0]/3):,:]
    assert img.shape == (320,640), f'img shape cut = {img.shape}'
    #resize 
    img = cv.resize(img, SIZE)

    #add random tilt
    max_offset = 5
    offset = np.random.randint(-max_offset, max_offset)
    img = np.roll(img, offset, axis=0)
    if offset > 0:
        img[:offset, :] = np.random.randint(0,255)
    elif offset < 0:
        img[offset:, :] = np.random.randint(0,255)

    #reduce contrast
    const = np.random.uniform(0.1,1.2)
    if np.random.uniform() > 5:
        const = const*0.2
    img = 127*(1-const) + img*const
    img = img.astype(np.uint8)

    #add noise 
    std = 150
    std = np.random.randint(1, std)
    noisem = np.random.randint(0, std, img.shape, dtype=np.uint8)
    img = cv.subtract(img, noisem)
    noisep = np.random.randint(0, std, img.shape, dtype=np.uint8)
    img = cv.add(img, noisep)
    #blur 
    img = cv.blur(img, (5,5))

    #add random brightness
    max_brightness = 50
    brightness = np.random.randint(-max_brightness, max_brightness)
    if brightness > 0:
        img = cv.add(img, brightness)
    elif brightness < 0:
        img = cv.subtract(img, -brightness)
    
    # invert color
    if np.random.uniform(0, 1) > 0.6:
        img = cv.bitwise_not(img)

    return img


cv.namedWindow('img', cv.WINDOW_NORMAL)
cv.setWindowProperty('img', cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)

for i in range(500):
    img = load_and_augment_img(i)
    cv.imshow('img', img)
    key = cv.waitKey(1)
    if key == ord('q') or key == 27:
        break
cv.destroyAllWindows()

In [6]:
class CsvDataset(Dataset):
    def __init__(self, folder, transform=None, max_load=1000, channels=3):
        self.transform = transform
        self.folder = folder
        self.data = []
        self.channels = channels
    
        with open(folder+'/regression_labels.csv', 'r') as f:
            lines = f.read().split('\n')
            lines = lines[0:-1] #remove footer
            # Get x and y values from each line and append to self.data
            max_load = min(max_load, len(lines))
            self.all_imgs = torch.zeros((max_load, SIZE[1], SIZE[0], channels), dtype=torch.uint8)

            labels = []
            cv.namedWindow('img', cv.WINDOW_NORMAL)
            cv.setWindowProperty('img', cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)
            for i in tqdm(range(max_load)):
                #img 
                img = load_and_augment_img(i)
                if i < 1000:
                    cv.imshow('img', img)
                    cv.waitKey(1)
                    if i == 999:
                        cv.destroyAllWindows()
                
                #add a dimension to the image
                img = img[:, :,np.newaxis]
                self.all_imgs[i] = torch.from_numpy(img)
                
                #label
                line = lines[i]
                sample = line.split(',')
                reg_label = np.array([float(s) for s in sample], dtype=np.float32)
                reg_label = reg_label[0:4] #keep it very simple
                self.data.append(reg_label)  

    def __len__(self):
        # The length of the dataset is simply the length of the self.data list
        return len(self.data)

    def __getitem__(self, idx):
        # img = read_image(os.path.join(self.folder, f'img_{idx+1}.png'))
        # img = img.float()
        img = self.all_imgs[idx]
        img = img.permute(2, 0, 1).float()
        value = self.data[idx]
        return img, value

In [7]:
#create dataset #takes a long time but then training is faster
train_dataset = CsvDataset('training_imgs', max_load=max_load, channels=num_channels)
cv.destroyAllWindows()

100%|██████████| 142527/142527 [20:20<00:00, 116.77it/s]


In [8]:
#data loader
train_dataloader = DataLoader(train_dataset, batch_size=100, shuffle=True)

In [9]:
#test dataloader
sample = next(iter(train_dataloader))
print(sample[0].shape)
print(sample[1].shape)

torch.Size([100, 1, 64, 128])
torch.Size([100, 4])


## Training

In [10]:
# Training function
def train_epoch(model, dataloader, regr_loss_fn, optimizer, device=device):
    # Set the model to training mode
    model.train() #train
    # Initialize the loss
    err_losses3 = []
    dist_losses = []
    curv_losses = []

    # Loop over the training batches
    for (input, regr_label) in tqdm(dataloader):
        # Move the input and target data to the selected device
        input, regr_label =input.to(device), regr_label.to(device)
        # Zero the gradients
        optimizer.zero_grad()
        # Compute the output
        output = model(input)

        #regression
        regr_out = output[:, 0:4]
        err2 = regr_out[:, 0]
        err3 = regr_out[:, 1]
        dist_out = regr_out[:, 2]
        curv_out = regr_out[:, 3]

        err2_label = regr_label[:, 0]
        err3_label = regr_label[:, 1]
        dist_label = regr_label[:, 2].float()
        dist_label = torch.where(dist_label < 0.2, torch.abs(1/(torch.abs(dist_label)+0.1))-10./3, torch.zeros_like(dist_out)).float() #consider loss only for small distances
        # print(dist_label)
        curv_label = regr_label[:, 3]

        # Compute the losses
        err_loss3 = 1.0*regr_loss_fn(err3, err3_label)
        err_loss2 = .8*regr_loss_fn(err2, err2_label)
        dist_loss = .5*regr_loss_fn(dist_out, dist_label) 
        curv_loss = .5*regr_loss_fn(curv_out, curv_label)
        loss = err_loss3 + err_loss2 + dist_loss + curv_loss

        # Compute the gradients
        loss.backward()
        # Update the weights
        optimizer.step()

        #batch loss
        err_losses3.append(err_loss3.detach().cpu().numpy())
        dist_losses.append(dist_loss.detach().cpu().numpy())
        curv_losses.append(curv_loss.detach().cpu().numpy())

    # Return the average training loss
    err_loss3 = np.mean(err_losses3)
    dist_loss = np.mean(dist_losses)
    curv_loss = np.mean(curv_losses)
    return err_loss3, dist_loss, curv_loss

In [11]:
# TRAINING 
#parameters
lr = 0.001
epochs = 4
optimizer = torch.optim.Adam(lane_keeper.parameters(), lr=lr, weight_decay=3e-5)
regr_loss_fn = nn.MSELoss()
for epoch in range(epochs):
    # try:
    if True:
        err_loss, dist_loss, curv_loss = train_epoch(lane_keeper, train_dataloader, regr_loss_fn, optimizer, device)
        # clear_output(wait=True)
    # except Exception as e:
    #     print(e)
    #     torch.cuda.empty_cache()
    #     continue
    print(f"Epoch {epoch+1}/{epochs}")
    print(f"err_loss: {err_loss}")
    print(f"dist_loss: {dist_loss}")
    print(f"curv_loss: {curv_loss}")
    torch.save(lane_keeper.state_dict(), model_name)

100%|██████████| 1426/1426 [01:48<00:00, 13.09it/s]


Epoch 1/4
err_loss: 0.01659850776195526
dist_loss: 0.2158520668745041
curv_loss: 0.0003874658141285181


100%|██████████| 1426/1426 [01:48<00:00, 13.15it/s]


Epoch 2/4
err_loss: 0.012646714225411415
dist_loss: 0.17322061955928802
curv_loss: 9.851963113760576e-05


100%|██████████| 1426/1426 [01:49<00:00, 13.05it/s]


Epoch 3/4
err_loss: 0.011448409408330917
dist_loss: 0.15254470705986023
curv_loss: 9.550261893309653e-05


100%|██████████| 1426/1426 [01:49<00:00, 13.06it/s]

Epoch 4/4
err_loss: 0.010769281536340714
dist_loss: 0.1381664127111435
curv_loss: 9.622463403502479e-05





In [12]:
lane_keeper.load_state_dict(torch.load(model_name))

#save the model so that opencv can load it
import torch
import torch.onnx
import torchvision
import torchvision.models as models
import sys

device = torch.device('cpu')
lane_keeper.to(device)

# set the model to inference mode
lane_keeper.eval()

# Create some sample input in the shape this model expects 
# This is needed because the convertion forward pass the network once 
dummy_input = torch.randn(1, num_channels, SIZE[1], SIZE[0])
torch.onnx.export(lane_keeper, dummy_input, onnx_lane_keeper_path, verbose=True)

clear_output(wait=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
lane_keeper.to(device)

LaneKeeper(
  (conv): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flat): Flatten(start_dim=1, end_dim=-1)
  (lin): Sequential(
    (0): Linear(in_features=192, out_features=256, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=256, out_features=4, bias=True)
  )
)

In [13]:
#test with opencv
sample_image = "training_imgs/img_1.png"
images = [cv.imread(f"training_imgs/img_{i+1}.png") for i in range(100)]
 
#The Magic:
lk =  cv.dnn.readNetFromONNX(onnx_lane_keeper_path) 

avg_col = (0,0,0) if num_channels == 3 else 0

for i in tqdm(range(100)):
    image = images[i]
    image = cv.resize(image, SIZE)
    if num_channels == 1:
        image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    blob = cv.dnn.blobFromImage(image, 1.0, SIZE, avg_col, swapRB=True, crop=False)
    # print(blob.shape)
    lk.setInput(blob)
    preds = lk.forward()
    print(f"Predictions: {preds[0][2]}")

print(f"Predictions: {preds}")
print(f"Predictions shape: {preds.shape}")

100%|██████████| 100/100 [00:00<00:00, 775.15it/s]

Predictions: 0.47976553440093994
Predictions: 0.4476059079170227
Predictions: 0.22929474711418152
Predictions: 0.9418675303459167
Predictions: 0.24700593948364258
Predictions: 0.36024123430252075
Predictions: 0.1485559493303299
Predictions: 0.20769353210926056
Predictions: 0.5378673672676086
Predictions: 0.6656802296638489
Predictions: 0.7005097270011902
Predictions: 0.5399004817008972
Predictions: 0.6779091954231262
Predictions: 0.9275228977203369
Predictions: 1.1031086444854736
Predictions: 1.1169837713241577
Predictions: 0.9599494338035583
Predictions: 1.2297452688217163
Predictions: 0.791668176651001
Predictions: 0.4857800602912903
Predictions: 0.4301396608352661
Predictions: 0.20934231579303741
Predictions: 0.7110320925712585
Predictions: 0.3723095655441284
Predictions: 0.20023608207702637
Predictions: 0.19524705410003662
Predictions: 0.007985636591911316
Predictions: 0.08303899317979813
Predictions: 0.10979293286800385
Predictions: 0.2018427550792694
Predictions: 0.45076358318328




In [14]:
# #get image and label
# cv.namedWindow('img')
# dataloader = DataLoader(train_dataset, batch_size=10000, shuffle=False)
# for i, (imgs, labels) in enumerate(tqdm(dataloader)):
#     #convert img to numpy
#     imgs = imgs.cpu().numpy()
#     for i in range(imgs.shape[0]):
#         img = imgs[i][0]
#         #convert to uint8d
#         img = img.astype(np.uint8)
#         cv.imshow("img", img)
#         cv.waitKey(1)

# cv.destroyAllWindows()