## Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px # this is another plotting library for interactive plot

from sklearn.model_selection import train_test_split
from sklearn import metrics, manifold # we will use the metrics and manifold learning modules from scikit-learn
from pathlib import Path # to interact with file paths
from PIL import Image # to interact with images
from tqdm import tqdm # progress bar
from pprint import pprint # pretty print (useful for a more readable print of objects like lists or dictionaries)

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import cv2 as cv

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

## Load Pretrained Net and create Detector 

In [None]:
# Model
# model = torch.hub.load('ultralytics/yolov5', 'yolov5n', pretrained=True) #faster but less accurate
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) 
# model = torch.hub.load('ultralytics/yolov5', 'yolov5n6', pretrained=True) 
# model = torch.hub.load('ultralytics/yolov3', 'yolov3') #bad 
model.to(device)

# Analyze network

# print(model)

# for param_name, param in model.named_parameters():
#     print(param_name)

# for i, (k, v) in enumerate(model.named_parameters()):
#     print(f'{i} - {k}')

#https://github.com/ultralytics/yolov5/issues/1314

#backnbone is layers 0->9

backbone_layers = [f'model.{x}' for x in range(9)]

backbone = nn.Sequential(
    model.model.model.model[0],
    model.model.model.model[1],
    model.model.model.model[2],
    model.model.model.model[3],
    model.model.model.model[4],
    model.model.model.model[5],
    model.model.model.model[6],
    model.model.model.model[7],
    model.model.model.model[8],
    model.model.model.model[9],
    model.model.model.model[10],
    )

# print(backbone)

class Detector(nn.Module):
    def __init__(self, backbone, outputs, features=76800): #(default for 640x320)
        super().__init__()

        ## Pretrained layers
        self.pretrained = backbone

        ### Flatten layer
        self.flatten = nn.Flatten(start_dim=1)

        ### Linear section
        self.lin = nn.Sequential(
            # First linear layer
            nn.Linear(in_features=features, out_features=512),
            nn.ReLU(True),
            # nn.Dropout(p=0.5),
            # Second linear
            nn.Linear(in_features=512, out_features=outputs),
        )
        
    def forward(self, x):
        # Apply convolutions
        x = self.pretrained(x)
        # Flatten
        x = self.flatten(x)
        # # Apply linear layers
        x = self.lin(x)
        return x

#define detector
detector = Detector(backbone, outputs=2, features=20480)


#freeeze backbone
for param in detector.pretrained.parameters():
    param.requires_grad = False

detector.to(device)

# #check
# for param_name, param in detector.named_parameters():
#     print('%s \t- requires_grad=%s' % (param_name, param.requires_grad))

In [None]:
# test backbone
#show the image with opencv
img = cv.imread('tests/test_img.jpg')
#resize to 480 x 640
img = cv.resize(img, (320, 240))
#convert to tensor
img = torch.from_numpy(img).float().permute(2, 0, 1)
#add dimension
img = img.unsqueeze(0).to(device)

detector.eval()

# Inference
with torch.no_grad():
    out = detector(img) 
    print(out.shape) # (320, 240)->torch.Size([1, 20480])
                    # (640, 480)->torch.Size([1, 76800])





## Loading images and Labels

In [None]:
#dataset
class CsvDataset(Dataset):
    def __init__(self, folder, transform=None):
        self.transform = transform
        self.data = []
        #load labels
        with open(folder+'/labels.csv', 'r') as f:
            lines = f.read().split('\n')
            lines = lines[0:-1] #remove footer
            # Get x and y values from each line and append to self.data
            labels = []
            for i in tqdm(range(len(lines))):
                line = lines[i]
                sample = line.split(',')
                #convert to float
                label = np.array([float(sample[0]), float(sample[1])])
                #convert to tensor
                label = torch.from_numpy(label).float()
                #load img
                img = cv.imread(folder+f'/img_{i+1}.png')
                img = cv.resize(img, (320, 240))
                img = torch.from_numpy(img).float().permute(2, 0, 1)
                # img = img.unsqueeze(0)
                self.data.append((img, label))
                
    def __len__(self):
        # The length of the dataset is simply the length of the self.data list
        return len(self.data)

    def __getitem__(self, idx):
        # Our sample is the element idx of the list self.data
        sample = self.data[idx]
        if self.transform:
            sample = self.transform(sample)
        return sample

#create dataset
train_dataset = CsvDataset(folder='training_imgs')

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)


## Training

In [None]:
# Training function
def train_epoch(model, dataloader, loss_fn, optimizer, device):
    # Set the model to training mode
    model.train()
    # Initialize the loss
    train_loss = []
    # Loop over the training batches
    for (data, label) in dataloader:
        # Move the input and target data to the selected device
        data, label = data.to(device), label.to(device)
        # Zero the gradients
        optimizer.zero_grad()
        # Compute the output
        output = model(data)
        assert output.shape == label.shape
        # Compute the loss
        loss = loss_fn(output, label)
        # Compute the gradients
        loss.backward()
        # Update the weights
        optimizer.step()
        #batch loss
        loss_batch = loss.detach().cpu().numpy()
        train_loss.append(loss_batch)
    # Return the average training loss
    train_loss = np.mean(train_loss)
    # print(f"Training loss: {train_loss}")
    return train_loss

def get_avg_loss(net, dataloader, loss_fn, device):
    net.eval()
    losses = []
    with torch.no_grad():
        for (data, label) in tqdm(dataloader):
            # Move the input and target data to the selected device
            data, label = data.to(device), label.to(device)
            # Compute the output
            output = net(data)
            assert output.shape == label.shape
            # Compute the loss
            loss = loss_fn(output, label)
            losses.append(loss.detach().cpu().numpy())
    # Return the accuracy and test loss
    test_loss = np.mean(losses)
    return test_loss

In [None]:
#load model
detector.load_state_dict(torch.load('detector.pt'))

#parameters
lr = 0.001
epochs = 10
optimizer = torch.optim.Adam(detector.parameters(), lr=lr)
loss_fn = nn.MSELoss()

for epoch in range(epochs):
    train_loss = train_epoch(detector, train_dataloader, loss_fn, optimizer, device)
    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Training loss: {train_loss}")

In [None]:
#testing
test_dataset = CsvDataset(folder='test_imgs')
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True)

#get accuracy
train_loss = get_avg_loss(detector, train_dataloader, loss_fn, device)
test_loss = get_avg_loss(detector, test_dataloader, loss_fn, device)

print(f"Training loss: {train_loss}")
print(f"Testing loss: {test_loss}")

In [None]:
print(test_dataset.data[0][0].shape)

In [None]:
#save pytorch model
torch.save(detector.state_dict(), 'detector.pt')

#save the model so that opencv can load it
import torch
import torch.onnx
import torchvision
import torchvision.models as models
import sys

device = torch.device('cpu')
detector.to(device)
 
onnx_model_path = "model_test.onnx"

# set the model to inference mode
detector.eval()
 
# Create some sample input in the shape this model expects 
# This is needed because the convertion forward pass the network once 
dummy_input = torch.randn(1, 3, 240, 320)
torch.onnx.export(detector, dummy_input, onnx_model_path, verbose=True)


In [None]:
#test with opencv
sample_image = "training_imgs/img_1.png"
images = [cv.imread(f"training_imgs/img_{i+1}.png") for i in range(100)]
 
#The Magic:
net =  cv.dnn.readNetFromONNX(onnx_model_path) 

for i in tqdm(range(100)):
    image = images[i]
    blob = cv.dnn.blobFromImage(image, 1.0, (320, 240),(0, 0, 0), swapRB=True, crop=False)
    net.setInput(blob)
    preds = net.forward()

print ("Predictions: ", preds)