## Imports

In [1]:
#Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px # this is another plotting library for interactive plot

from sklearn.model_selection import train_test_split
from sklearn import metrics, manifold # we will use the metrics and manifold learning modules from scikit-learn
from pathlib import Path # to interact with file paths
from PIL import Image # to interact with images
from tqdm import tqdm # progress bar
from pprint import pprint # pretty print (useful for a more readable print of objects like lists or dictionaries)
from IPython.display import clear_output # to clear the output of the notebook

import torch
import torch.nn as nn
import torchvision
from torchvision.io import read_image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import cv2 as cv
import os
import shutil


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
device = torch.device("cpu")

cuda


In [2]:
# CONTROL
num_channels = 1
IMG_SIZE = (480, 640)
SIZE = (32,32)
ROI = [30,260,-100,640] #[a,b,c,d] ==> [a:b, c:d]
model_name = 'models/trafficlight_classifier_small.pt'
onnx_path = "models/trafficlight_classifier_small.onnx"
max_load = 300

In [3]:
#load examples
examples_folder = 'traffic_light_imgs'     
names = [f for f in os.listdir(examples_folder) if f.endswith('.png')]
classes = ['traffic_light','no_traffic_light']
example_imgs = [cv.imread(os.path.join(examples_folder, name)) for name in names]
example_labels = [0 if name.startswith('traffic_light') else 1 for name in names]
tot_examples = len(example_imgs)
tot_labels = len(classes)       

#show images
cv.namedWindow('example', cv.WINDOW_NORMAL)
for i in range(tot_examples):
    img = example_imgs[i].copy()
    #add label
    cv.putText(img, classes[example_labels[i]], (10,30), cv.FONT_HERSHEY_SIMPLEX, 1, 255, 2)
    cv.imshow('example', img)
    key = cv.waitKey(1)
    if key == 27:
        break
cv.destroyAllWindows()      

## Load Pretrained Net and create Detector 

In [4]:
#Model
class TrafficLightClassifier(nn.Module):
    def __init__(self, out_dim=4, channels=1): 
        super().__init__()
        ### Convoluational layers
        self.conv = nn.Sequential( #in = (SIZE)
            nn.Conv2d(channels, 32, kernel_size=5, stride=1), #out = 28
            nn.ReLU(True),
            nn.Conv2d(32, 32, kernel_size=5, stride=2), #out = 12
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=1), #out = 11
            nn.BatchNorm2d(32),
            nn.Dropout(0.5),
            nn.Conv2d(32, 64, kernel_size=3, stride=2), #out = 9
            nn.ReLU(True),
            nn.Conv2d(64, 128, kernel_size=3, stride=2), #out = 4
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2), #out = 3
            nn.Dropout(0.5),
        )
        self.flat = nn.Flatten()
        ### Linear sections
        self.lin = nn.Sequential(
            # First linear layer
            nn.Linear(in_features=128, out_features=256),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(in_features=256, out_features=out_dim),
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = self.flat(x)
        x = self.lin(x)
        return x

classifier = TrafficLightClassifier(out_dim=tot_labels,channels=num_channels).to(device)

In [5]:
#show the image with opencv
img = cv.imread('tests/test_img.jpg')
img = cv.resize(img, SIZE)
if num_channels == 1:
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    img = np.expand_dims(img, axis=2)
#convert to tensor
img = torch.from_numpy(img).float()
img = img.permute(2,0,1)
#add dimension
img = img.unsqueeze(0).to(device)
print(img.shape)

classifier.eval()
# Inference
with torch.no_grad():
    output = classifier(img)
    print(f'out shape: {output.shape}')

torch.Size([1, 1, 32, 32])
out shape: torch.Size([1, 2])


## Loading images and Labels

In [6]:
import cv2 as cv
import numpy as np
from numpy.random import randint

def load_and_augment_img(i, folder='training_imgs', example_index=0, example_imgs=example_imgs):
    # img = cv.imread(os.path.join(folder, f'img_{i+1}.png')
    img_index = randint(0, tot_examples)
    while  example_index != example_labels[img_index]:
        img_index = randint(0, tot_examples)
    img = example_imgs[img_index].copy()
    #convert to gray
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    # img = randint(0,255,(480,640), dtype=np.uint8)

    #crop in the example ROI
    # img = img[ROI[0]:ROI[1], ROI[2]:ROI[3]]

    # #add random shapes to the image
    # num_vertices = 5
    # num_shapes = 3
    # for i in range(num_shapes):
    #     #add polygon
    #     ptsx = randint(0,img.shape[1],(num_vertices,))
    #     ptsy = randint(0,img.shape[0],(num_vertices,))
    #     pts = np.stack((ptsx,ptsy), axis=1)
    #     img = cv.polylines(img, [pts], isClosed=False, color=randint(0,255), thickness=randint(2,10))
    #     img = cv.circle(img, (randint(0,img.shape[1]), randint(0,img.shape[0])), randint(5,60), randint(0,255), randint(1,8))

    # ## EXAMPLE AUGMENTATION
    # #load example
    # example = example_imgs[example_index]
    # resize_ratio = 240./max(example.shape[0], example.shape[1])
    # example = cv.resize(example, (int(example.shape[1]*resize_ratio), int(example.shape[0]*resize_ratio)))
    
    # # #flip
    # # if np.random.uniform() < 0.5:
    # #     #flip
    # #     example = cv.flip(example, 1)

    # #get example mask
    # example_mask = np.where(example == 0, np.zeros_like(example), 255*np.ones_like(example))
    # #blur the example
    # example = cv.blur(example, (randint(3,9),randint(3,9)))

    # #add noise to the example
    # std = 100
    # std = randint(1, std)
    # noisem = randint(0, std, example.shape, dtype=np.uint8)
    # example = cv.subtract(example, noisem)
    # noisep = randint(0, std, example.shape, dtype=np.uint8)
    # example = cv.add(example, noisep)

    # #dilate
    # kernel = np.ones((randint(3,7),randint(3,7)), np.uint8)
    # example = cv.dilate(example, kernel, iterations=1)

    # #set zero where example mask is zero
    # example = np.where(example_mask == 0, np.zeros_like(example), example)
    # # cv.imshow('test', example)

    # #random rotation
    # angle = randint(-10,10)
    # M = cv.getRotationMatrix2D((example.shape[1]/2, example.shape[0]/2), angle, 1)
    # example = cv.warpAffine(example, M, (example.shape[1], example.shape[0]))

    # #perspective transform example
    # perspective_deformation = 30
    # pts1 = np.float32([[0,0],[example.shape[1],0],[example.shape[1],example.shape[0]],[0,example.shape[0]]])
    # pts2 = np.float32([[0,0],[example.shape[1],0],[example.shape[1],example.shape[0]],[0,example.shape[0]]])
    # pts2 = pts2 + np.float32(randint(0,perspective_deformation,size=pts2.shape))
    # # print(f'pts2 = \n{pts2}')
    # new_size_x = int(np.max(pts2[:,0]) - np.min(pts2[:,0]))
    # new_size_y = int(np.max(pts2[:,1]) - np.min(pts2[:,1]))
    # M = cv.getPerspectiveTransform(pts1,pts2)
    # example = cv.warpPerspective(example,M,(new_size_x,new_size_y))

    # #resize example keeping proportions
    # img_example_ratio = min(img.shape[0]/example.shape[0], img.shape[1]/example.shape[1])
    # scale_factor = np.random.uniform(.75, .95)
    # scale_factor = scale_factor * img_example_ratio
    # example = cv.resize(example, (0,0), fx=scale_factor, fy=scale_factor)
    # #match img shape
    # example_canvas = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)

    # #get a random position for the example
    # example_y = randint(0, img.shape[0] - example.shape[0])
    # example_x = randint(0, img.shape[1] - example.shape[1])
    # #paste example on canvas
    # example_canvas[example_y:example_y+example.shape[0], example_x:example_x+example.shape[1]] = example

    # #reduce contrast
    # old_example_canvas = example_canvas.copy()
    # const = np.random.uniform(0.4,1.5)
    # example_canvas = 127*(1-const) + example_canvas*const
    # #clip values
    # example_canvas = np.clip(example_canvas, 0, 255).astype(np.uint8)

    # #paste canvas on img
    # img = np.where(old_example_canvas > 0, example_canvas, img) 
    # example_canvas = cv.blur(example_canvas, (randint(1,7),randint(1,7)))
    ## END OF EXAMPLE

    ## WHOLE IMAGE AUGMENTATION

    #random rotation
    angle = randint(-5,5)
    M = cv.getRotationMatrix2D((img.shape[1]/2, img.shape[0]/2), angle, 1)
    img = cv.warpAffine(img, M, (img.shape[1], img.shape[0]))

    #random shift
    shift_x = randint(-10,10)
    shift_y = randint(-10,10)
    M = np.float32([[1,0,shift_x],[0,1,shift_y]])
    img = cv.warpAffine(img, M, (img.shape[1], img.shape[0]))

    #create random ellipses to simulate light from the sun
    # light = np.zeros(img.shape, dtype=np.uint8)
    # #add ellipses
    # for j in range(2):
    #     cent = (randint(0, img.shape[0]), randint(0, img.shape[1]))
    #     # axes_length = (randint(10, 50), randint(50, 300))
    #     axes_length = (randint(1, 50), randint(10, 100))
    #     angle = randint(0, 360)
    #     light = cv.ellipse(light, cent, axes_length, angle, 0, 360, 255, -1)
    # #create an image of random white and black pixels
    # noise = randint(0, 2, size=img.shape, dtype=np.uint8)*255
    # light = cv.blur(light, (10,10))
    # light = cv.subtract(light, noise)
    # light = 1 * light
    # #add light to the image
    # img = cv.add(img, light)

    #add noise 
    std = 80
    std = randint(1, std)
    noisem = randint(0, std, img.shape, dtype=np.uint8)
    img = cv.subtract(img, noisem)
    noisep = randint(0, std, img.shape, dtype=np.uint8)
    img = cv.add(img, noisep)
    
    #blur 
    img = cv.blur(img, (randint(3,9),randint(3,9)))

    #add random brightness
    max_brightness = 50
    brightness = randint(-max_brightness, max_brightness)
    if brightness > 0:
        img = cv.add(img, brightness)
    elif brightness < 0:
        img = cv.subtract(img, -brightness)

    
    # # invert color
    # if np.random.uniform(0, 1) > 0.6:
    #     img = cv.bitwise_not(img)

    #resize 
    img = cv.resize(img, SIZE)

    # img = cv.equalizeHist(img)

    return img

cv.namedWindow('img', cv.WINDOW_NORMAL)
cv.namedWindow('test', cv.WINDOW_NORMAL)
# cv.setWindowProperty('img', cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)

print(f'tot labels = {tot_labels}')

example_index = 0
for i in range(500):
    img = load_and_augment_img(i, example_index=example_index)
    cv.imshow('img', img)
    key = cv.waitKey(1)
    if key == ord('q') or key == 27:
        break
    example_index = (example_index + 1) % tot_labels
cv.destroyAllWindows()

tot labels = 2


In [7]:
class CsvDataset(Dataset):
    def __init__(self, folder, example_imgs=example_imgs, transform=None, max_load=1000, channels=3):
        self.transform = transform
        self.folder = folder
        self.data = []
        self.channels = channels
    
        with open(folder+'/regression_labels.csv', 'r') as f:
            lines = f.read().split('\n')
            lines = lines[0:-1] #remove footer
            # Get x and y values from each line and append to self.data
            max_load = min(max_load, len(lines))
            self.all_imgs = torch.zeros((max_load, SIZE[1], SIZE[0], channels), dtype=torch.uint8)

            cv.namedWindow('img', cv.WINDOW_NORMAL)
            cv.setWindowProperty('img', cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)
            
            example_index = 0

            for i in tqdm(range(max_load)):
                #img 
                img = load_and_augment_img(i, example_index=example_index, example_imgs=example_imgs)

                max_show = min(500, max_load)
                if i < max_show:
                    cv.imshow('img', img)
                    cv.waitKey(1)
                    if i == (max_show-1):
                        cv.destroyAllWindows()
                
                #add a dimension to the image
                img = img[:, :,np.newaxis]
                self.all_imgs[i] = torch.from_numpy(img)
                
                #label (it is just the index of the example)
                self.data.append(example_index)  
                example_index = (example_index + 1) % tot_labels

    def __len__(self):
        # The length of the dataset is simply the length of the self.data list
        return len(self.data)

    def __getitem__(self, idx):
        # img = read_image(os.path.join(self.folder, f'img_{idx+1}.png'))
        # img = img.float()
        img = self.all_imgs[idx]
        img = img.permute(2, 0, 1).float()
        value = self.data[idx]
        return img, value

In [8]:
#create dataset #takes a long time but then training is faster
train_dataset = CsvDataset('training_imgs', max_load=max_load, channels=num_channels)
cv.destroyAllWindows()

100%|██████████| 300/300 [00:04<00:00, 70.97it/s]


In [9]:
#data loader
train_dataloader = DataLoader(train_dataset, batch_size=100, shuffle=True)

In [10]:
#test dataloader
sample = next(iter(train_dataloader))
print(sample[0].shape)
print(sample[1].shape)
# cv.namedWindow('img', cv.WINDOW_NORMAL)
# for i in range(100):
#     img = sample[0][i].permute(1, 2, 0).numpy().astype(np.uint8)
#     label = sample[1][i]
#     # print(img.shape)
#     cv.imshow(names[label], img)
#     k = cv.waitKey(0)
#     cv.destroyAllWindows()
#     if k == ord('q') or k == 27:
#         break

torch.Size([100, 1, 32, 32])
torch.Size([100])


## Training

In [11]:
# Training function
def train_epoch(model, dataloader, loss_fn, optimizer, device=device):
    # Set the model to training mode
    model.train() #train
    # Initialize the loss
    losses = []

    # Loop over the training batches
    for (input, label) in tqdm(dataloader):
        #convert label in one-hot vector
        target = torch.eye(tot_labels)[label] 
        # Move the input and target data to the selected device
        input, target = input.to(device), target.to(device)
        # Zero the gradients
        optimizer.zero_grad()
        # Compute the output
        output = model(input)

        #Loss
        loss = loss_fn(output, target)

        # Compute the gradients
        loss.backward()
        # Update the weights
        optimizer.step()

        #batch loss
        losses.append(loss.detach().cpu().numpy())

    # Return the average training loss
    loss = np.mean(losses)
    return loss

In [12]:
# TRAINING 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
classifier.to(device)
#parameters
lr = 0.001
epochs = 3
optimizer = torch.optim.Adam(classifier.parameters(), lr=lr, weight_decay=3e-5) #3e-5
loss_fn = torch.nn.CrossEntropyLoss()
for epoch in range(epochs):
    # try:
    if True:
        loss = train_epoch(classifier, train_dataloader, loss_fn, optimizer, device)
        clear_output(wait=True)
    # except Exception as e:
    #     print(e)
    #     torch.cuda.empty_cache()
    #     continue
    print(f"Epoch {epoch+1}/{epochs}")
    print(f"loss: {loss}")
    torch.save(classifier.state_dict(), model_name)

Epoch 3/3
loss: 0.5462525486946106


In [13]:
classifier.load_state_dict(torch.load(model_name))

#save the model so that opencv can load it
import torch
import torch.onnx
import torchvision
import torchvision.models as models
import sys

device = torch.device('cpu')
classifier.to(device)

# set the model to inference mode
classifier.eval()

# Create some sample input in the shape this model expects 
# This is needed because the convertion forward pass the network once 
dummy_input = torch.randn(1, num_channels, SIZE[1], SIZE[0])
torch.onnx.export(classifier, dummy_input, onnx_path, verbose=True)

clear_output(wait=False)

In [18]:
#test with opencv
num_test_imgs = 1000
# sample_image = "training_imgs/img_1.png"
images0 = [load_and_augment_img(0, example_index=0) for i in range(int(num_test_imgs/2))]
images1 = [load_and_augment_img(0, example_index=1) for i in range(int(num_test_imgs/2))]
# for i in range(int(num_test_imgs/2)):
#     cv.imshow('img', images0[i])
#     cv.waitKey(0)
#     cv.imshow('img', images1[i])
#     cv.waitKey(0)
# cv.destroyAllWindows()
images = []
labels = []
for i in range(int(num_test_imgs/2)):
    images.append(images0[i])
    labels.append(0)
    images.append(images1[i])
    labels.append(1)
# for i in range(num_test_imgs):
#     cv.imshow('img', images[i])
#     cv.waitKey(0)
# cv.destroyAllWindows()

#The Magic:
lk =  cv.dnn.readNetFromONNX(onnx_path) 

# sample = next(iter(train_dataloader))
# images = [sample[0][i].permute(1, 2, 0).numpy().astype(np.uint8) for i in range(100)]
# labels = [sample[1][i] for i in range(100)]
cv.namedWindow('img', cv.WINDOW_NORMAL)

correct = 0

for i, image in enumerate(images):
    # image = images[i]
    image = cv.resize(image, SIZE)
    #add noise 
    std = 80
    std = randint(1, std)
    noisem = randint(0, std, image.shape, dtype=np.uint8)
    image = cv.subtract(image, noisem)
    noisep = randint(0, std, image.shape, dtype=np.uint8)
    image = cv.add(image, noisep)
    image = cv.blur(image, (3,3))
    # if num_channels == 1:
    #     image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    blob = cv.dnn.blobFromImage(image, 1.0, SIZE, 0)
    assert SIZE == (32, 32)
    # print(blob.shape)
    lk.setInput(blob)
    preds = lk.forward()
    preds = preds[0]
    preds = preds.argmax()
    cv.putText(image, classes[preds], (5, 10), cv.FONT_HERSHEY_SIMPLEX, 0.3, 0, 1)
    #put true label
    cv.putText(image, classes[labels[i]], (5, 20), cv.FONT_HERSHEY_SIMPLEX, 0.3, 0, 1)

    if preds == labels[i]:
        correct += 1

    cv.imshow("img", image)
    k = cv.waitKey(1)   
    if k == ord('q') or k == 27:
        break
cv.destroyAllWindows()

# print(f"Predictions: {preds}")
# print(f"Predictions shape: {preds.shape}")

#accuracy
print(f"Accuracy: {(correct/num_test_imgs)*100.}%")

Accuracy: 78.0%


In [15]:
# #get image and label
# cv.namedWindow('img')
# dataloader = DataLoader(train_dataset, batch_size=10000, shuffle=False)
# for i, (imgs, labels) in enumerate(tqdm(dataloader)):
#     #convert img to numpy
#     imgs = imgs.cpu().numpy()
#     for i in range(imgs.shape[0]):
#         img = imgs[i][0]
#         #convert to uint8d
#         img = img.astype(np.uint8)
#         cv.imshow("img", img)
#         cv.waitKey(1)

# cv.destroyAllWindows()