APPLICATION OF SIMPLE MACHINE LEARNING REGRESSION METHODS AND BASIC NEURAL NETWORKS
TO A DISTANCE ESTIMATION PROBLEM ON THE KITTY OBJECT DETECTION DATASET. 

THE TRAINING DATA INCLUDE A SECTION OF THE INFORMATION (FEATURES) PROVIDED IN THE LABEL 
TXT FILES, CONVERTED TO A TABULAR REPRESENTATION. 

THE TRAINING IS PERFORMED INITIALLY WITH MULTIPLE FEATURES AS INPUT TO THE MODELS.

THE LABEL IS TAKEN AS THE 'zloc' VARIABLE REPRESENTING THE DISTANCE OF THE OBJECT
FROM THE CAMERA.

In [61]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
sns.set_style('darkgrid')

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, SGDRegressor, ElasticNet, Lasso, Ridge
#from sklearn.svm import SVR, LinearSVR, NuSVR
#from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, HistGradientBoostingRegressor, ExtraTreesRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

import cv2
from PIL import Image

In [94]:
# Image parameters
w = 1242
h = 375
d = 3
cR = (0, 0, 255)
cG = (0, 255, 0)
cB = (255, 0, 0)
cK = (0, 0, 0)

# IMPORT DATA

In [None]:
# IMPORT DATA
df = pd.read_csv('annotations.csv')
#print(df.info()) # Display the information

import cv2
from PIL import Image
idx = round(np.random.rand()*6492)
file = df.iloc[idx,0]

odf_file = df[df['filename']==file]

#im = Image.open('images/training/image_2/'+file)
#im.show()

''' Check Images on bounding boxes
im = cv2.imread('images/training/image_2/'+file.replace('.txt','.png'))
res = im.copy()
for id in range(len(odf_file)):
    bbox = odf_file.iloc[id,5:9].to_numpy()
    cv2.rectangle(res, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), cR, 2)

cv2.imshow("Distance", res)
cv2.waitKey(0)
cv2.destroyAllWindows()
'''
odf_file

In [None]:
# ADD ADDITIONAL FEATURES
df['size'] = (df['xmax'] - df['xmin'])*(df['ymax'] - df['ymin'])/(h*w)
df['volume'] = df['height']*df['width']*df['length']
df['area'] = df['height']*df['width']

df['xmin'] = df['xmin']/w
df['xmax'] = df['xmax']/w
df['ymin'] = df['ymin']/h
df['ymax'] = df['ymax']/h

df

In [None]:
# FILTER THE DATA IF A CERTAIN CLASS IS TO BE FILTERED AND CERTAIN ATTRIBUTES TO BE CORRECTED.
# SKIP THIS TO TRAIN WITH ALL CLASSES
df_class = df[df['class']=='Car'] # Filter cars
df_class = df_class[df_class['occluded']<=1] # Filter not occluded cars
df_class = df_class[df_class['zloc']>0] # Filter cars with positive zlocs
df_class.info() # Display Info - Total number of examples to train with

In [None]:
df_class.head()

In [67]:
#sc = StandardScaler()
#sc.fit_transform(df_class[['height','width','length']])

mm = MinMaxScaler()
nuval = mm.fit_transform(df_class[['height','width','length','xloc','yloc','zloc','size','volume','area']])

In [None]:
# NORMALIZE THE LAST COLUMNS
print('Maximum values before and after normalization')
print(df_class['height'].abs().max())
print(df_class['width'].abs().max())
print(df_class['length'].abs().max())

print(df_class['xloc'].abs().max())
print(df_class['yloc'].abs().max(),'\n')

#print(df_class['zloc'].abs().min())
#print(df_class['zloc'].abs().max(),'\n')

zlocmax = df_class['zloc'].abs().max()
zlocmin = df_class['zloc'].abs().min()

# create dataset for training
ds_class = df_class.drop(columns=['class','truncated','occluded'])
ds_class[['height','width','length','xloc','yloc','zloc','size','volume','area']] = nuval

print(ds_class['height'].abs().max())
print(ds_class['width'].abs().max())
print(ds_class['length'].abs().max())

print(ds_class['xloc'].abs().max())
print(ds_class['yloc'].abs().max())

# REMOVE FEATURES IF NECESSARY
# ds_class.drop(columns=['observation angle','rot_y'],inplace=True)

In [None]:
# FINAL DATASET FOR TRAINING
ds_class.head()


In [None]:
fig = plt.figure(figsize=(12,8))
sns.scatterplot(ds_class,x=ds_class['size'],y=ds_class['zloc'],hue=ds_class['volume'])
fig.suptitle('Object distance vs bbox size for different object volumes')

# TRAINING

In [None]:
# DATASET PREPARATION
X = ds_class.drop(columns='zloc')
y = ds_class['zloc']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
# print(X_test)

Id = X_test['filename']
Bboxes = X_test.iloc[:,2:6]
Bboxes['xmin'] = Bboxes['xmin']*w
Bboxes['xmax'] = Bboxes['xmax']*w
Bboxes['ymin'] = Bboxes['ymin']*h
Bboxes['ymax'] = Bboxes['ymax']*h

X_train.drop(columns=['filename'],inplace=True)
X_test.drop(columns=['filename'],inplace=True)

print(Bboxes)
print(Id)

In [None]:
# LINEAR REGRESSORS
LinReg = LinearRegression()
SGDReg = SGDRegressor(learning_rate='adaptive',penalty='l2',alpha=0.0001,l1_ratio=0.0001, max_iter=100000,tol=1e-6)
RReg = Ridge(alpha=0.001,solver='lsqr')
LS = Lasso(alpha=0.0001,max_iter=10000,tol=1e-6)
ENet = ElasticNet(alpha=0.00001,l1_ratio=0.001,max_iter=100000,tol=1e-6)

LinReg.fit(X=X_train,y=y_train)
SGDReg.fit(X=X_train,y=y_train)
RReg.fit(X=X_train,y=y_train)
LS.fit(X=X_train,y=y_train)
ENet.fit(X=X_train,y=y_train)

# MLP
NN = MLPRegressor(hidden_layer_sizes=(100,20),activation='relu',learning_rate='adaptive',max_iter=10000,tol=1e-6,random_state=1)
NN.fit(X=X_train,y=y_train)
cfs = NN.coefs_
print(cfs[0].shape)
print(cfs[1].shape)
print(cfs[2].shape)

# EVALUATION

In [None]:
# PREDICT WITH THE FITTED MODELS
y_lr = abs(LinReg.predict(X=X_test))
y_sg = abs(SGDReg.predict(X=X_test))
y_rr = abs(RReg.predict(X=X_test))
y_ls = abs(LS.predict(X=X_test))
y_en = abs(ENet.predict(X=X_test))
y_nn = abs(NN.predict(X=X_test))

e_lr = mean_squared_error(y_true=y_test,y_pred=y_lr)
e_sg = mean_squared_error(y_true=y_test,y_pred=y_sg)
e_rr = mean_squared_error(y_true=y_test,y_pred=y_rr)
e_ls = mean_squared_error(y_true=y_test,y_pred=y_ls)
e_en = mean_squared_error(y_true=y_test,y_pred=y_en)
e_nn = mean_squared_error(y_true=y_test,y_pred=y_nn)

e = np.array([[e_lr,e_sg,e_rr,e_ls,e_en,e_nn]])
edf = pd.DataFrame(e,index=['MSE'],columns=['LinReg','LinSGD','Ridge','Lasso','ElasticNet','MLP'])

print('RESULTS')
print('-----------------------------------------------------------------')
print(edf,'\n')

y_o = np.concatenate([y_nn.reshape(-1,1)*(zlocmax-zlocmin)+zlocmin, y_test.to_numpy().reshape(-1,1)*(zlocmax-zlocmin)+zlocmin, Bboxes.values],axis=1)
print(y_o.shape)

# CREATE AND JOIN OUTPUT DATAFRAMES
odf = pd.DataFrame(Id.values,columns=['filename'])
tmpdf = pd.DataFrame(y_o,columns=['prediction (meters)','label (meters)','xmin','ymin','xmax','ymax'])

odf = odf.join(tmpdf)
odf



In [None]:
# DISPLAY EXAMPLES
idx = round(np.random.rand()*6492)
file = odf.iloc[idx,0]

odf_file = odf[odf['filename']==file]
print(odf_file)

#im = Image.open('images/training/image_2/'+file)
#im.show()
im = cv2.imread('images/training/image_2/'+file.replace('.txt','.png'))
res = im.copy()
for id in range(len(odf_file)):
    bbox = odf_file.iloc[id,3:].values
    pred = str(round(odf_file.iloc[id,1]))
    lab = str(round(odf_file.iloc[id,2]))
    cv2.rectangle(res, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), cR, 1)
    cv2.putText(res, pred+'|'+lab, (int(bbox[0]), int(bbox[1])),cv2.FONT_HERSHEY_PLAIN, 1, cR ,2)

cv2.imshow("Distance", res)
cv2.waitKey(0)
cv2.destroyAllWindows()

# TRAIN WITH REDUCED DATASET (ONLY BOUNDING BOX INFORMATION)

In [None]:
# DATASET PREPARATION
X = ds_class.drop(columns='zloc')
y = ds_class['zloc']

Xr_train,Xr_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
# print(X_test)

Id = Xr_test['filename']
Bboxes = Xr_test.iloc[:,2:6]
Bboxes['xmin'] = Bboxes['xmin']*w
Bboxes['xmax'] = Bboxes['xmax']*w
Bboxes['ymin'] = Bboxes['ymin']*h
Bboxes['ymax'] = Bboxes['ymax']*h

Xr_train.drop(columns=['filename','volume','area','height','width','length','xloc','yloc','rot_y','observation angle'],inplace=True)
Xr_test.drop(columns=['filename','volume','area','height','width','length','xloc','yloc','rot_y','observation angle'],inplace=True)

Xr_train.head()

In [None]:
# LINEAR REGRESSORS
rLinReg = LinearRegression()
rSGDReg = SGDRegressor(loss='huber',learning_rate='adaptive',penalty='l2',alpha=0.0001,l1_ratio=0.0001, max_iter=100000,tol=1e-6)
rRReg = Ridge(alpha=0.001,solver='lsqr')
rLS = Lasso(alpha=0.0001,max_iter=10000,tol=1e-6)
rENet = ElasticNet(alpha=0.00001,l1_ratio=0.001,max_iter=100000,tol=1e-6)

rLinReg.fit(X=Xr_train,y=y_train)
rSGDReg.fit(X=Xr_train,y=y_train)
rRReg.fit(X=Xr_train,y=y_train)
rLS.fit(X=Xr_train,y=y_train)
rENet.fit(X=Xr_train,y=y_train)

# MLP
rNN = MLPRegressor(hidden_layer_sizes=(100,80,60,30),activation='relu',learning_rate='adaptive',max_iter=10000,tol=1e-6,random_state=1)
rNN.fit(X=Xr_train,y=y_train)
rcfs = rNN.coefs_
for i in range(len(rcfs)):
    print(rcfs[i].shape)

In [None]:
# PREDICT WITH THE FITTED MODELS
ry_lr = abs(rLinReg.predict(X=Xr_test))
ry_sg = abs(rSGDReg.predict(X=Xr_test))
ry_rr = abs(rRReg.predict(X=Xr_test))
ry_ls = abs(rLS.predict(X=Xr_test))
ry_en = abs(rENet.predict(X=Xr_test))
ry_nn = abs(rNN.predict(X=Xr_test))

re_lr = mean_squared_error(y_true=y_test,y_pred=ry_lr)
re_sg = mean_squared_error(y_true=y_test,y_pred=ry_sg)
re_rr = mean_squared_error(y_true=y_test,y_pred=ry_rr)
re_ls = mean_squared_error(y_true=y_test,y_pred=ry_ls)
re_en = mean_squared_error(y_true=y_test,y_pred=ry_en)
re_nn = mean_squared_error(y_true=y_test,y_pred=ry_nn)

re = np.array([[re_lr,re_sg,re_rr,re_ls,re_en,re_nn]])
redf = pd.DataFrame(re,index=['MSE'],columns=['LinReg','LinSGD','Ridge','Lasso','ElasticNet','MLP'])

print('RESULTS')
print('-----------------------------------------------------------------')
print(redf,'\n')

ry_o = np.concatenate([ry_nn.reshape(-1,1)*(zlocmax-zlocmin)+zlocmin, y_test.to_numpy().reshape(-1,1)*(zlocmax-zlocmin)+zlocmin, Bboxes.values],axis=1)
print(y_o.shape)

# CREATE AND JOIN OUTPUT DATAFRAMES
rodf = pd.DataFrame(Id.values,columns=['filename'])
tmpdf = pd.DataFrame(ry_o,columns=['prediction (meters)','label (meters)','xmin','ymin','xmax','ymax'])

rodf = rodf.join(tmpdf)
rodf

In [None]:
# DISPLAY EXAMPLES
idx = round(np.random.rand()*6492)
file = rodf.iloc[idx,0]

odf_file = rodf[rodf['filename']==file]
print(odf_file)

#im = Image.open('images/training/image_2/'+file)
#im.show()
im = cv2.imread('images/training/image_2/'+file.replace('.txt','.png'))
res = im.copy()
for id in range(len(odf_file)):
    bbox = odf_file.iloc[id,3:].values
    pred = str(round(odf_file.iloc[id,1]))
    lab = str(round(odf_file.iloc[id,2]))
    cv2.rectangle(res, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), cR, 1)
    cv2.putText(res, pred+'|'+lab, (int(bbox[0]), int(bbox[1])),cv2.FONT_HERSHEY_DUPLEX, 1, cR ,2)

cv2.imshow("Distance", res)
cv2.waitKey(0)
cv2.destroyAllWindows()

# USING KERAS

In [130]:
import setuptools.dist # import setuptuls before distutils, avoids distutils error
import tensorflow as tf
import keras
from keras import Input, ops
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization

# Define custom functions if necessary
def custom_lr_scheduler(epoch,lr):
    if epoch < 100:
        return lr
    else:
        return lr/(1+epoch/1000000)
    
def custom_loss(label,pred):
    difference = ops.square(label - pred)
    return ops.sqrt(ops.mean(difference, axis=-1))

In [None]:
# BUILD AND TRAIN MODEL
modelk = Sequential()
modelk.add(Input(shape=(5,)))
modelk.add(Dense(100, activation='relu'))
modelk.add(BatchNormalization())
modelk.add(Dense(5, activation='relu'))
modelk.add(BatchNormalization())
modelk.add(Dense(2, activation='relu'))
modelk.add(BatchNormalization())
modelk.add(Dense(1))

ad_lr = keras.callbacks.LearningRateScheduler(custom_lr_scheduler)
opt = Adam(learning_rate=5e-3)
modelk.summary()
modelk.compile(loss='huber', optimizer=opt)
modelk.fit(Xr_train, y_train, epochs=2000, batch_size=512,verbose=1, callbacks=[ad_lr])

In [None]:
# EVALUATE MODEL
print(Xr_test)
ry_nnk = abs(modelk.predict(Xr_test))
re_nnk = mean_squared_error(y_true=y_test,y_pred=ry_nnk)
print(re_nnk)

ry_ok = np.concatenate([ry_nnk*(zlocmax-zlocmin)+zlocmin,y_test.to_numpy().reshape(-1,1)*(zlocmax-zlocmin)+zlocmin,Bboxes.values],axis=1)
print(ry_ok.shape)

# CREATE AND JOIN OUTPUT DATAFRAMES
rodfk = pd.DataFrame(Id.values,columns=['filename'])
tmpdf = pd.DataFrame(ry_ok,columns=['prediction (meters)','label (meters)','xmin','ymin','xmax','ymax'])

rodfk = rodfk.join(tmpdf)
rodfk

In [None]:
# DISPLAY EXAMPLES
idx = round(np.random.rand()*6492)
file = rodfk.iloc[idx,0]

odf_file = rodfk[rodfk['filename']==file]
print(odf_file)

#im = Image.open('images/training/image_2/'+file)
#im.show()
im = cv2.imread('images/training/image_2/'+file.replace('.txt','.png'))
res = im.copy()
for id in range(len(odf_file)):
    bbox = odf_file.iloc[id,3:].values
    pred = str(round(odf_file.iloc[id,1]))
    lab = str(round(odf_file.iloc[id,2]))
    cv2.rectangle(res, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), cR, 1)
    cv2.putText(res, pred+'|'+lab, (int(bbox[0]), int(bbox[1])),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, cR ,2)

cv2.imshow("Distance", res)
cv2.waitKey(0)
cv2.destroyAllWindows()

# USING PYTORCH

In [192]:
import torch
import torch.optim as optim
from torch.nn import Sequential,ReLU,SiLU,BatchNorm1d,Linear,HuberLoss,SmoothL1Loss,MSELoss
import torch.utils.data as data_utils
from tqdm import trange, tqdm

# Convert datatables to torch tensors and load with DataLoader
train = data_utils.TensorDataset(torch.tensor(Xr_train.values.astype('float32')), torch.tensor(y_train.values.astype('float32').reshape(-1,1)))
train_loader = data_utils.DataLoader(train, batch_size=100, shuffle=True)
Xr_test_torch = torch.tensor(Xr_test.values.astype('float32'))
# y_test_torch = torch.tensor(y_test.values.astype('float32').reshape(-1,1))

In [None]:
# BUILD AND TRAIN MODELS
modelt = Sequential(
    Linear(5,100),
    ReLU(),
    BatchNorm1d(100),
    Linear(100,5),
    ReLU(),
    BatchNorm1d(5),
    Linear(5,2),
    ReLU(),
    BatchNorm1d(2),
    Linear(2,1)
)
print(modelt,'\n')

loss_fn = HuberLoss()
opt = optim.Adam(modelt.parameters(),lr=5e-3,betas=(0.9,0.999),eps=1e-16,weight_decay=0.0005)
lr_scheduler = optim.lr_scheduler.ExponentialLR(opt,gamma=0.99)
modelt.train()
epochs = trange(1000)
batchsize = 512
total_loss = 0
for epoch in epochs:
    for X_batch,y_batch in train_loader:
        y_pred = modelt(X_batch)
        loss = loss_fn(y_pred,y_batch)
        opt.zero_grad()
        loss.backward()
        opt.step()
    lr_scheduler.step()
    total_loss+= loss
    epochs.set_description('Loss: ' + str(total_loss.detach().numpy()/(epoch+1)),refresh=True)

In [None]:
# EVALUATE MODEL
modelt.eval()
ry_nnt = abs(modelt(Xr_test_torch).detach().numpy())
re_nnt = mean_squared_error(y_true=y_test,y_pred=ry_nnt)
print(re_nnt)

ry_ot = np.concatenate([ry_nnt*(zlocmax-zlocmin)+zlocmin,y_test.to_numpy().reshape(-1,1)*(zlocmax-zlocmin)+zlocmin,Bboxes.values],axis=1)
print(ry_ot.shape)

# CREATE AND JOIN OUTPUT DATAFRAMES
rodft = pd.DataFrame(Id.values,columns=['filename'])
tmpdf = pd.DataFrame(ry_ot,columns=['prediction (meters)','label (meters)','xmin','ymin','xmax','ymax'])

rodft = rodft.join(tmpdf)
rodft

In [None]:
# DISPLAY EXAMPLES
idx = round(np.random.rand()*6492)
file = rodft.iloc[idx,0]

odf_file = rodft[rodft['filename']==file]
print(odf_file)

#im = Image.open('images/training/image_2/'+file)
#im.show()
im = cv2.imread('images/training/image_2/'+file.replace('.txt','.png'))
res = im.copy()
for id in range(len(odf_file)):
    bbox = odf_file.iloc[id,3:].values
    pred = str(round(odf_file.iloc[id,1]))
    lab = str(round(odf_file.iloc[id,2]))
    cv2.rectangle(res, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), cR, 1)
    cv2.putText(res, pred+'|'+lab, (int(bbox[0]), int(bbox[1])),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, cR ,2)

cv2.imshow("Distance", res)
cv2.waitKey(0)
cv2.destroyAllWindows()