## Imports

In [None]:
#Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px # this is another plotting library for interactive plot

from sklearn.model_selection import train_test_split
from sklearn import metrics, manifold # we will use the metrics and manifold learning modules from scikit-learn
from pathlib import Path # to interact with file paths
from PIL import Image # to interact with images
from tqdm import tqdm # progress bar
from pprint import pprint # pretty print (useful for a more readable print of objects like lists or dictionaries)
from IPython.display import clear_output # to clear the output of the notebook

import torch
import torch.nn as nn
import torchvision
from torchvision.io import read_image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import cv2 as cv
import os
import shutil


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# device = torch.device("cpu")

## Load Pretrained Net and create Detector 

In [None]:
# Model
# model = torch.hub.load('ultralytics/yolov5', 'yolov5n', pretrained=True) #faster but less accurate
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) 
# model = torch.hub.load('ultralytics/yolov5', 'yolov5n6', pretrained=True) 
# model = torch.hub.load('ultralytics/yolov3', 'yolov3') #bad 
model.to(device)

#https://github.com/ultralytics/yolov5/issues/1314

#backnbone is layers 0->9

backbone_layers = [f'model.{x}' for x in range(10)]

backbone = nn.Sequential(
    model.model.model.model[0],
    model.model.model.model[1],
    model.model.model.model[2],
    model.model.model.model[3],
    model.model.model.model[4],
    model.model.model.model[5],
    model.model.model.model[6],
    model.model.model.model[7],
    model.model.model.model[8],
    model.model.model.model[9],
    model.model.model.model[10],
    )

class FeatureExtractor(nn.Module):
    def __init__(self, backbone): 
        super().__init__()

        ## Pretrained layers
        self.pretrained = backbone

        #Pool layer
        self.pool = nn.MaxPool2d(3, stride=2)

        ### Flatten layer
        self.flatten = nn.Flatten(start_dim=1)

    def forward(self, x):
        # Apply convolutions
        x = self.pretrained(x)
        # pool 
        x = self.pool(x)
        # Flatten
        x = self.flatten(x)
        return x

class Detector(nn.Module):
    def __init__(self, add_inputs=4, regr_out=22, class_out=13, features=76800): #(default for 640x320)
        super().__init__()
        ### Linear sections
        self.lin = nn.Sequential(
            # First linear layer
            nn.Linear(in_features=features+add_inputs, out_features=1024),
            nn.ReLU(True),
            nn.Linear(in_features=1024, out_features=regr_out+class_out),
        )
        
    def forward(self, x):
        x = self.lin(x)
        return x

#define Feature Extractor
feature_extractor = FeatureExtractor(backbone)
#define detector
detector = Detector(add_inputs=4, regr_out=8, class_out=15, features=3072) #v1=20480 v2=3072

#freeeze backbone
for param in feature_extractor.pretrained.parameters():
    param.requires_grad = False

feature_extractor.to(device)
detector.to(device)

# #check
# for param_name, param in detector.named_parameters():
#     print('%s \t- requires_grad=%s' % (param_name, param.requires_grad))

In [None]:
# test backbone
#show the image with opencv
img = cv.imread('tests/test_img.jpg')
#resize to 480 x 640
img = cv.resize(img, (320, 240))
#convert to tensor
img = torch.from_numpy(img).float().permute(2, 0, 1)
#add dimension
img = img.unsqueeze(0).to(device)
print(img.shape)

detector.eval()

# Inference
with torch.no_grad():
    data = torch.zeros(1, 4).to(device)
    feat = feature_extractor(img)
    print(f'feat shape: {feat.shape}')
    input = torch.cat((feat, data), dim=1)
    print(f'input shape: {input.shape}')
    out = detector(input) 
    print(f'out shape: {out.shape}') # (320, 240)->torch.Size([1, 20480]) v2-> torch.Size([1, 3076])
                    # (640, 480)->torch.Size([1, 76800])





## Loading images and Labels

In [None]:
# #augmentation tests
# import cv2 as cv
# import numpy as np

# idx=0
# img = cv.imread(f'training/img_{idx+1}.png')
# #add noise with cv2
# std = 3
# noisep = np.random.normal(0, std, img.shape)
# noisep = np.uint8(noisep)
# noisem = np.random.normal(0, std, img.shape)
# noisem = np.uint8(noisem)
# img = cv.subtract(img, noisem)
# img = cv.add(img, noisep)
# img = cv.blur(img, (3,3))
# img = cv.resize(img, (320, 240))
# cv.imshow('img', img)
# cv.waitKey(0)
# cv.destroyAllWindows()

In [None]:
#dataset
class AuxImgDataset(Dataset):
    def __init__(self, folder, feature_extractor, num_images):
        self.folder = folder
        self.feature_extractor = feature_extractor
        self.num_images = num_images

    def __len__(self):
        return self.num_images

    def __getitem__(self, idx):
        img = read_image(os.path.join(self.folder, f'img_{idx+1}.png') )
        #convert to tensor
        img = img.float()
        return img

def analyze_class_labels(class_labels):
    state_cnt = [0,0,0,0]
    next_state_cnt = [0,0,0,0]
    sign_cnt = [0,0,0,0,0,0,0]
    for label in tqdm(class_labels):
        #torch->numpy
        label = label.numpy()
        assert np.sum(label) == 3, f'{label}'
        state_lab = label[0:4]
        assert np.sum(state_lab) == 1, f'{state_lab}'
        state_cnt[np.argmax(state_lab)] += 1
        next_state_lab = label[4:8]
        assert np.sum(next_state_lab) == 1, f'{next_state_lab}'
        next_state_cnt[np.argmax(next_state_lab)] += 1
        sign_lab = label[8:15]
        assert np.sum(sign_lab) == 1, f'{sign_lab}'
        sign_cnt[np.argmax(sign_lab)] += 1
    print(f'State counts: {state_cnt}   , tot = {np.sum(state_cnt)}')
    print(f'Road :        {state_cnt[0]},  {state_cnt[0]/np.sum(state_cnt):.2f}') 
    print(f'Intersection: {state_cnt[1]},  {state_cnt[1]/np.sum(state_cnt):.2f}')
    print(f'Roundabout:   {state_cnt[2]},  {state_cnt[2]/np.sum(state_cnt):.2f}')
    print(f'Junction:     {state_cnt[3]},  {state_cnt[3]/np.sum(state_cnt):.2f}')
    print(f'Next state counts: {next_state_cnt}')
    print(f'Next Road :        {next_state_cnt[0]},  {next_state_cnt[0]/np.sum(next_state_cnt):.2f}')
    print(f'Next Intersection: {next_state_cnt[1]},  {next_state_cnt[1]/np.sum(next_state_cnt):.2f}')
    print(f'Next Roundabout:   {next_state_cnt[2]},  {next_state_cnt[2]/np.sum(next_state_cnt):.2f}')
    print(f'Next Junction:     {next_state_cnt[3]},  {next_state_cnt[3]/np.sum(next_state_cnt):.2f}')
    print(f'Sign counts: {sign_cnt}')

    return state_cnt, next_state_cnt, sign_cnt

def analyze_additional_inputs(additional_inputs):
    action_cnt = [0,0,0,0]
    for input in tqdm(additional_inputs):
        input = input.numpy()
        assert np.sum(input) == 1, f'{input}'
        action_lab = input[0:4]
        assert np.sum(action_lab) == 1, f'{action_lab}'
        action_cnt[np.argmax(action_lab)] += 1
    print(f'Action counts: {action_cnt},  tot = {np.sum(action_cnt)}')
    print(f'Straight:    {action_cnt[0]},  {action_cnt[0]/np.sum(action_cnt):.2f}')
    print(f'Left:        {action_cnt[1]},  {action_cnt[1]/np.sum(action_cnt):.2f}')
    print(f'Right:       {action_cnt[2]},  {action_cnt[2]/np.sum(action_cnt):.2f}')
    print(f'Continue:    {action_cnt[3]},  {action_cnt[3]/np.sum(action_cnt):.2f}')

    return action_cnt


class CsvDataset(Dataset):
    def __init__(self, folder, feature_extractor, transform=None, max_load=1000, num_images=5000, equalize=False, skip_img_load=False):
        self.transform = transform
        self.folder = folder
        self.data = []
        self.tot_imgs = num_images

        #classification labels
        class_labels = []
        with open(folder+'/classification_labels.csv', 'r') as f:
            lines = f.read().split('\n')
            lines = lines[0:-1] #remove footer
            # #set maximumum number of images
            self.max_load = min(max_load, len(lines))
            for i in tqdm(range(self.max_load)):
                line = lines[i]
                sample = line.split(',')
                #convert to float
                label = np.array([float(s) for s in sample])
                #convert to tensor
                label = torch.from_numpy(label).float()
                class_labels.append(label)
        
        
        #input data
        input_data = []
        with open(folder+'/input_data.csv', 'r') as f:
            lines = f.read().split('\n')
            lines = lines[0:-1] #remove footer
            for i in tqdm(range(self.max_load)):
                line = lines[i]
                sample = line.split(',')
                label = np.array([float(s) for s in sample])
                label = torch.from_numpy(label).float()
                input_data.append(label)


         #regression labels
        regr_labels = []
        with open(folder+'/regression_labels.csv', 'r') as f:
            lines = f.read().split('\n')
            lines = lines[0:-1] #remove footer
            for i in tqdm(range(self.max_load)):
                line = lines[i]
                sample = line.split(',')
                reg_label = np.array([float(s) for s in sample])
                reg_label = torch.from_numpy(reg_label).float()
                regr_labels.append(reg_label)

        if not skip_img_load:
            #add noise to images
            #clear the training folder
            train_folder = 'training'
            for file in os.listdir(train_folder):
                if file.endswith(".png"):
                    os.remove(os.path.join(train_folder, file))
            for i in tqdm(range(self.max_load)):
                img = cv.imread(self.folder+f'/img_{i+1}.png')
                #add noise 
                std = 3
                noisep = np.random.normal(0, std, img.shape)
                noisep = np.uint8(noisep)
                noisem = np.random.normal(0, std, img.shape)
                noisem = np.uint8(noisem)
                img = cv.subtract(img, noisem)
                img = cv.add(img, noisep)
                if np.random.uniform(0, 1) > 0.7:
                    img = cv.blur(img, (5,5))
                img = cv.resize(img, (320, 240))
                cv.imwrite(train_folder+f'/img_{i+1}.png', img)

        if equalize:
            print('Before Equalization: ')
            state_cnt, next_cnt, sign_cnt = analyze_class_labels(class_labels) #print useful info about the labels
            action_cnt = analyze_additional_inputs(input_data) #print useful info about the inputs
            
            #Equalize the number of samples per class
            # state_multipliers = [1.0,1.0,1.0,1.0] #to equalize the number of samples per class
            next_multipliers = [1.0,1.0,1.0,1.0] #to equalize the number of samples per class
            # max_state = np.max(state_cnt)
            max_next = np.max(next_cnt)
            # for i in range(len(state_cnt)):
            #     if state_cnt[i] > 1e-4:
            #         state_multipliers[i] = max_state/state_cnt[i]
            for i in range(len(next_cnt)):
                if next_cnt[i] > 1e-4:
                    next_multipliers[i] = max_next/next_cnt[i]
            new_next_samples = [int(next_multipliers[i]*next_cnt[i]) for i in range(len(state_cnt))]
            print(f'new_next_samples: {new_next_samples}, tot = {np.sum(new_next_samples)}')

            tot_samples = np.sum(new_next_samples)

            #create the new samples
            final_class_labels = []
            final_input_data = []
            final_regr_labels = []
            finished = False
            idx = 0
            final_idx = 0
            # state_cnt = [0,0,0,0] 
            next_cnt = [0,0,0,0] #equalize only on this
            while not all([next_cnt[i] >= new_next_samples[i] for i in range(len(next_cnt))]):
                #get the next state
                next = class_labels[idx][4:8]
                assert np.sum(next.numpy()) == 1, f'{next}'
                assert next.shape == (4,)
                next_idx = np.argmax(next)
                next_cnt[next_idx] += 1
                if next_cnt[next_idx] < new_next_samples[next_idx]:
                    final_class_labels.append(class_labels[idx])
                    final_input_data.append(input_data[idx])
                    final_regr_labels.append(regr_labels[idx])
                    shutil.copyfile(self.folder+f'/img_{idx+1}.png', f'training/img_{final_idx+1}.png')
                    final_idx += 1

                idx = (idx+1) % len(class_labels) #increment index
                if idx % 50000 == 0:
                    print(f'{idx}, {final_idx}')

            print('After Equalization: ')
            analyze_class_labels(final_class_labels) #print useful info about the labels
            analyze_additional_inputs(final_input_data) #print useful info about the inputs

            print(f'Final idx = {final_idx}')
            tot_samples = final_idx
            img_folder = 'training'
        else:
            img_folder = 'training'
            tot_samples = min(self.max_load, len(class_labels))
            final_class_labels = class_labels
            final_input_data = input_data
            final_regr_labels = regr_labels

        #convert images into features
        #imgs dataset
        img_dataset = AuxImgDataset(img_folder, feature_extractor, tot_samples)
        img_loader = DataLoader(img_dataset, batch_size=128, shuffle=False)
        feature_extractor.eval()
        features = []
        with torch.no_grad():
            for imgs in tqdm(img_loader):
                imgs = imgs.to(device)
                feat = feature_extractor(imgs)
                feat = feat.cpu()
                for j in range(feat.shape[0]):
                    features.append(feat[j])
        
        #add everything to self.data
        for i in tqdm(range(tot_samples)):
            #concatenate feature vector and additional input data (input for the linear layer)
            input = torch.cat((features[i], final_input_data[i]), dim=0)
            # print(f'input shape: {input.shape}')
            #append to data
            self.data.append((input, final_regr_labels[i], final_class_labels[i]))  

    def __len__(self):
        # The length of the dataset is simply the length of the self.data list
        return len(self.data)

    def __getitem__(self, idx):
        # Our sample is the element idx of the list self.data
        sample = self.data[idx]
        return sample

In [None]:
# class CsvDataset(Dataset):
#     def __init__(self, folder, feature_extractor, transform=None):
#         self.transform = transform
#         self.folder = folder
#         self.data = []
#         class_labels = []
#         with open(folder+'/classification_labels.csv', 'r') as f:
#             lines = f.read().split('\n')
#             lines = lines[0:-1] #remove footer
#             # Get x and y values from each line and append to self.data
#             labels = []
#             for i in tqdm(range(len(lines))):
#                 line = lines[i]
#                 sample = line.split(',')
#                 #convert to float
#                 label = np.array([float(s) for s in sample])
#                 #convert to tensor
#                 label = torch.from_numpy(label).float()
#                 # img = img.unsqueeze(0)
#                 class_labels.append(label)

#         input_data = []
#         with open(folder+'/input_data.csv', 'r') as f:
#             lines = f.read().split('\n')
#             lines = lines[0:-1] #remove footer
#             # Get x and y values from each line and append to self.data
#             labels = []
#             for i in tqdm(range(len(lines))):
#                 line = lines[i]
#                 sample = line.split(',')
#                 #convert to float
#                 label = np.array([float(s) for s in sample])
#                 #convert to tensor
#                 label = torch.from_numpy(label).float()
#                 # img = img.unsqueeze(0)
#                 input_data.append(label)

#         #load labels and convert images in features
#         feature_extractor.eval()

#         with open(folder+'/regression_labels.csv', 'r') as f:
#             lines = f.read().split('\n')
#             lines = lines[0:-1] #remove footer
#             # Get x and y values from each line and append to self.data
#             labels = []
#             for i in tqdm(range(len(lines))):
#                 line = lines[i]
#                 sample = line.split(',')
#                 #convert to float
#                 reg_label = np.array([float(s) for s in sample])
#                 #convert to tensor
#                 reg_label = torch.from_numpy(reg_label).float()
#                 img = cv.imread(folder+f'/img_{i+1}.png')
#                 img = cv.resize(img, (320, 240))
#                 img = torch.from_numpy(img).float().permute(2, 0, 1)
#                 img = img.unsqueeze(0).to(device)
#                 #create feature vector 
#                 feat = feature_extractor(img)
#                 # print(f'feat shape: {feat.shape}')
#                 feat = feat[0].cpu()
#                 #concatenate feature vector and additional input data (input for the linear layer)
#                 input = torch.cat((feat, input_data[i]), dim=0)
#                 # print(f'input shape: {input.shape}')
#                 #append to data
#                 self.data.append((input, reg_label, class_labels[i]))  

#     def __len__(self):
#         # The length of the dataset is simply the length of the self.data list
#         return len(self.data)

#     def __getitem__(self, idx):
#         # Our sample is the element idx of the list self.data
#         sample = self.data[idx]
#         return sample

In [None]:
#create dataset #takes a long time but then training is faster
train_dataset = CsvDataset('training_imgs', feature_extractor,max_load=10000, num_images=200000, equalize=False, skip_img_load=True)

In [None]:
#data loader
train_dataloader = DataLoader(train_dataset, batch_size=10000, shuffle=True)

In [None]:
#test dataloader
sample = next(iter(train_dataloader))
print(sample[0].shape)
print(sample[1].shape)
print(sample[2].shape)


## Training

In [None]:
# Training function
def train_epoch(det, dataloader, class_loss_fn, regr_loss_fn, optimizer, device=device):
    # Set the model to training mode
    # ext.eval() #dont train the extractor
    det.train() #train detector
    # Initialize the loss
    train_loss_class = []
    train_loss_regr = []

    err_losses = []
    dist_losses = []
    curv_losses = []
    bb_losses = []

    # Loop over the training batches
    for (input, regr_label, class_label) in tqdm(dataloader):
        # Move the input and target data to the selected device
        input, regr_label, class_label =input.to(device), regr_label.to(device), class_label.to(device)
        # Zero the gradients
        optimizer.zero_grad()
        # Compute the output
        output = det(input)

        #regression 22 values
        #classification: 3 states, 3 next states, 7 signs
        regr_out = output[:, 0:8]
        err_out = regr_out[:, 0:2]
        dist_out = regr_out[:, 2]
        curv_out = regr_out[:, 3]
        bb_out = regr_out[:, 4:8]

        err_label = regr_label[:, 0:2]
        dist_label = regr_label[:, 2]
        dist_label = torch.where(dist_label < 0.8, dist_label, dist_out) #consider loss only for small distances
        curv_label = regr_label[:, 3]
        bb_label = regr_label[:, 4:8]

        #classification
        class_out = output[:, 8:23]
        state_out = class_out[:, 0:4]
        next_out = class_out[:, 4:8]
        sign_out = class_out[:, 8:15]
        
        state_label = class_label[:, 0:4]
        next_label = class_label[:, 4:8]
        sign_label = class_label[:, 8:15]

        # Compute the losses
        err_loss = 50.0*regr_loss_fn(err_out, err_label)
        dist_loss = 0.1*regr_loss_fn(dist_out, dist_label) 
        curv_loss = 0.1*regr_loss_fn(curv_out, curv_label)
        bb_loss = 0.0*regr_loss_fn(bb_out, bb_label)

        state_loss = 0.0*class_loss_fn(state_out, state_label)
        next_loss = 0.0*class_loss_fn(next_out, next_label)
        assert sign_out.shape == sign_label.shape, f'{sign_out.shape} != {sign_label.shape}'
        sign_loss = 0.0*class_loss_fn(sign_out, sign_label)
        loss = err_loss + dist_loss + curv_loss + bb_loss + state_loss + next_loss + sign_loss

        # Compute the gradients
        loss.backward()
        # Update the weights
        optimizer.step()

        #batch loss
        c_loss = (state_loss + next_loss + sign_loss).detach().cpu().numpy()
        train_loss_class.append(c_loss)
        err_losses.append(err_loss.detach().cpu().numpy())
        dist_losses.append(dist_loss.detach().cpu().numpy())
        curv_losses.append(curv_loss.detach().cpu().numpy())
        bb_losses.append(bb_loss.detach().cpu().numpy())

    # Return the average training loss
    train_loss_c = np.mean(train_loss_class)
    err_loss = np.mean(err_losses)
    dist_loss = np.mean(dist_losses)
    curv_loss = np.mean(curv_losses)
    bb_loss = np.mean(bb_losses)
    return train_loss_c, err_loss, dist_loss, curv_loss, bb_loss

def get_avg_loss(ext, det, dataloader, class_loss_fn, regr_loss_fn, device):
    ext.eval()
    det.eval()
    class_losses = []
    regr_losses = []
    with torch.no_grad():
        for (input, regr_label, class_label) in tqdm(dataloader):
            # Move the input and target data to the selected device
            input, regr_label, class_label =input.to(device), regr_label.to(device), class_label.to(device)
            # Zero the gradients
            optimizer.zero_grad()
            # Compute the output
            output = det(input)
            
            #regression 22 values
            #classification: 4 states, 4 next states, 7 signs
            regr_out = output[:, :22]
            state_out = output[:, 22:25]
            next_out = output[:, 25:28]
            sign_out = output[:, 28:]
            
            state_label = class_label[:, 0:3]
            next_label = class_label[:, 3:6]
            sign_label = class_label[:, 6:]

            # Compute the losses
            regr_loss = regr_loss_fn(regr_out, regr_label)
            state_loss = class_loss_fn(state_out, state_label)
            next_loss = class_loss_fn(next_out, next_label)
            sign_loss = class_loss_fn(sign_out, sign_label)
            class_loss = state_loss + next_loss + sign_loss

            class_losses.append(class_loss.detach().cpu().numpy())
            regr_losses.append(regr_loss.detach().cpu().numpy())
    # Return the accuracy and test loss
    class_loss = np.mean(class_losses)
    regr_loss = np.mean(regr_losses)
    return class_loss, regr_loss

In [None]:
# #load models
# detector.load_state_dict(torch.load('detector.pt'))
# feature_extractor.load_state_dict(torch.load('feature_extractor.pt'))

#parameters
lr = 0.001
epochs = 150
optimizer = torch.optim.Adam(detector.parameters(), lr=lr, weight_decay=0.0)
regr_loss_fn = nn.MSELoss()
class_loss_fn = nn.CrossEntropyLoss()

for epoch in range(epochs):
    # try:
    if True:
        train_loss_c, err_loss, dist_loss, curv_loss, bb_loss = train_epoch(detector, train_dataloader, class_loss_fn, regr_loss_fn, optimizer, device)
        clear_output(wait=True)
    # except Exception as e:
    #     print(e)
    #     torch.cuda.empty_cache()
    #     continue
    print(f"Epoch {epoch+1}/{epochs}")
    print(f"err_loss: {err_loss}")
    print(f"dist_loss: {dist_loss}")
    print(f"curv_loss: {curv_loss}")
    print(f"bb_loss: {bb_loss}")
    print(f"Classification loss: {train_loss_c}")
    torch.save(detector.state_dict(), 'models/detector.pt')
    torch.save(feature_extractor.state_dict(), 'models/feature_extractor.pt')

In [None]:
# #testing
# test_dataset = CsvDataset(folder='test_imgs')
# test_dataloader = DataLoader(test_dataset, batch_size=100, shuffle=True)

# #get accuracy
# train_class_loss, train_regr_loss = get_avg_loss(feature_extractor, detector, train_dataloader, class_loss_fn, regr_loss_fn, device)
# test_class_loss, test_regr_loss = get_avg_loss(feature_extractor, detector, test_dataloader, class_loss_fn, regr_loss_fn, device)

# print(f"Training classification loss: {train_class_loss}")
# print(f"Training regression loss: {train_regr_loss}\n")
# print(f"Testing classification loss: {test_class_loss}")
# print(f"Testing regression loss: {test_regr_loss}")

In [None]:
detector.load_state_dict(torch.load('models/detector.pt'))
feature_extractor.load_state_dict(torch.load('models/feature_extractor.pt'))

# #save pytorch model
# torch.save(detector.state_dict(), 'detector.pt')
# torch.save(feature_extractor.state_dict(), 'feature_extractor.pt')

#save the model so that opencv can load it
import torch
import torch.onnx
import torchvision
import torchvision.models as models
import sys

device = torch.device('cpu')
detector.to(device)
feature_extractor.to(device)
 
onnx_detector_path = "models/detector.onnx"
onnx_feature_extractor_path = "models/feature_extractor.onnx"

# set the model to inference mode
detector.eval()
feature_extractor.eval()
 
# Create some sample input in the shape this model expects 
# This is needed because the convertion forward pass the network once 
dummy_input = torch.randn(1, 3, 240, 320)
dummy_input2 = torch.randn(1, 3076) 
torch.onnx.export(feature_extractor, dummy_input, onnx_feature_extractor_path, verbose=True)
torch.onnx.export(detector, dummy_input2, onnx_detector_path, verbose=True)

clear_output(wait=False)



In [None]:
#test with opencv
sample_image = "training_imgs/img_1.png"
images = [cv.imread(f"training_imgs/img_{i+1}.png") for i in range(100)]
 
#The Magic:
det =  cv.dnn.readNetFromONNX(onnx_detector_path) 
ext = cv.dnn.readNetFromONNX(onnx_feature_extractor_path)

for i in tqdm(range(100)):
    image = images[i]
    blob = cv.dnn.blobFromImage(image, 1.0, (320, 240),(0, 0, 0), swapRB=True, crop=False)
    ext.setInput(blob)
    features = ext.forward()
    # print(features.shape)
    action_vec = np.ones((1,4))
    input = np.concatenate((features, action_vec), axis=1)
    # print(input.shape)
    det.setInput(input)
    preds = det.forward()

print(f"Predictions: {preds}")
print(f"Predictions shape: {preds.shape}")