In [1]:
%matplotlib inline
import numpy as np
import cv2
from velocity import VeloEval
import copy
import math
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from sklearn import preprocessing, datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import math
import os


In [2]:
dataset_path = 'benchmark_velocity_train/clips/'
folder_path = os.listdir(dataset_path)

annotations = [os.path.join(dataset_path, x, 'annotation.json') for x in folder_path]
annotations.remove('benchmark_velocity_train/clips/.DS_Store/annotation.json')
annotations.sort() #sort it so that we can read images in order
gt = VeloEval.load_annotation(annotations)

images_training_paths = []
for x in folder_path:
    image_path = "benchmark_velocity_train/clips/"+x+"/imgs/040.jpg"
    images_training_paths.append(image_path)
images_training_paths.remove('benchmark_velocity_train/clips/.DS_Store/imgs/040.jpg')
images_training_paths.sort()
len(images_training_paths)

Finished loading 1074 annotations.


1074

In [3]:
dataset_path_t = 'benchmark_velocity_test/clips/'
folder_path_t = os.listdir(dataset_path_t)

annotations_t = [os.path.join('.','gt.json')]
#annotations_t.remove('benchmark_velocity_test/clips/.DS_Store/annotation.json')
#annotations_t.sort() #sort it so that we can read images in order
gt_t = VeloEval.load_json_file(annotations_t)[0]
len(gt_t)

images_testing_paths = []
for x in folder_path_t:
    image_path = "benchmark_velocity_test/clips/"+x+"/imgs/040.jpg"
    images_testing_paths.append(image_path)
images_testing_paths.remove('benchmark_velocity_test/clips/.DS_Store/imgs/040.jpg')
images_testing_paths.sort()
len(images_testing_paths)

269

In [4]:
def get_v(flag, gt):
    '''This function takes a single gt and return corresponding values.
    The reason why this function exists is that training and testing json files differ in format'''
    if flag == 0:
        #testing data
        y1 = gt['bbox']['top']
        x2 = gt['bbox']['right']
        x1 = gt['bbox']['left']
        y2 = gt['bbox']['bottom']
        z0 = gt['position'][0]
        x0 = gt['position'][1]
    else:
        #training
        y1 = gt['bbox'][0][0]
        x1 = gt['bbox'][0][1]
        y2 = gt['bbox'][0][2]
        x2 = gt['bbox'][0][3]
        z0 = gt['position'][0]
        x0 = gt['position'][1]
    return (x1,x2,y1,y2,z0,x0)
        
        

In [5]:
def predict_distance_6(gt,flag = 1):
    '''Input is a image and the ground truth of this image that includes the position and bbox of cars.
    There could be more than one cars. The cdf of training data and corresponding weights bins
    Output is the difference between the ground truth position and the predicted position'''
    p = 670 #this is a value tested among a range of values
    good_z0 = 0
    good_x0 = 0
    
    diff_list = []
    diff_x0_list = []
    fx = 714.1526

    for i in range(len(gt)):
        if flag == 0: #testing data
            x1,x2,y1,y2,z0,x0 = get_v(0,gt[i])
        else:#training data
            x1,x2,y1,y2,z0,x0 = get_v(1,gt[i])


        pixel_w = x2 - x1
        pixel_h = y2 - y1
        w = -0.030522707866872622 * y1 + 0.019645624118997243 * pixel_w -0.026543674714586008* pixel_h + -1.3394041753397261e-05 * pixel_w * pixel_h+ 12.566680966429356
        '''(-0.030522707866872622,
             0.019645624118997243,
             -0.026543674714586008,
             -1.3394041753397261e-05,
             12.566680966429356)'''

        pred_z0 = fx * w / pixel_w
        pred_x0 = (x1 - p) * pred_z0/fx
        diff_x0 = x0 - pred_x0
        diff_z0 = z0-pred_z0

        if abs(diff_z0)/z0 < 0.10:
            good_z0 +=1
        if abs(diff_x0)/x0 < 0.10:
            good_x0 +=1



        diff_x0_list.append(abs(diff_x0))
        diff_list.append(abs(diff_z0))


    return (diff_list,diff_x0_list,good_z0, good_x0)
       

## Performance training set

In [6]:
diff = []
diff_x0 = []
count_z0 = 0
count_x0 = 0
for i in range(len(images_training_paths)):
    gt_temp = gt[i]
    cars_info = predict_distance_6(gt_temp,flag=1)

    diff.extend(cars_info[0])
    diff_x0.extend(cars_info[1])
    count_z0 += cars_info[2]
    count_x0 += cars_info[3]

print("EXPR {0}/10 Train_set: AVG Error Z0 {1:.2f}, Distance_within_10%_diff_rate: {2:.2f}%, AVG Error X0 {3:.2f}, X0_within_10%_rate:{4:.2f}%".format(2,np.average(diff),100 * count_z0/len(diff),np.average(diff_x0),100 * count_x0/len(diff_x0)))



EXPR 2/10 Train_set: AVG Error Z0 2.46, Distance_within_10%_diff_rate: 75.94%, AVG Error X0 0.48, X0_within_10%_rate:74.06%


## Performance on training set

In [7]:
diff = []
diff_x0 = []
count_z0 = 0
count_x0 = 0
for i in range(len(images_testing_paths)):
    gt_temp = gt_t[i]
    cars_info = predict_distance_6(gt_temp,flag=0)
    diff.extend(cars_info[0])
    diff_x0.extend(cars_info[1])
    count_z0 += cars_info[2]
    count_x0 += cars_info[3]

print("EXPR {0}/10 Test_set: AVG Error Z0 {1:.2f}, Distance_within_10%_rate: {2:.2f}%, AVG Error X0 {3:.2f}, X0_within_10%_rate:{4:.2f}%".format(2,np.average(diff),100 * count_z0/len(diff),np.average(diff_x0),100 * count_x0/len(diff_x0)))



EXPR 2/10 Test_set: AVG Error Z0 2.55, Distance_within_10%_rate: 75.20%, AVG Error X0 0.51, X0_within_10%_rate:74.13%


## Below are the functions we will submit, they can be found in t1_position.py file

In [9]:
def get_v_submission(flag, gt):
    '''This function takes a single gt and return corresponding values.
    The reason why this function exists is that training and testing json files differ in format'''
    if flag == 0:
        #testing data
        y1 = gt['bbox']['top']
        x2 = gt['bbox']['right']
        x1 = gt['bbox']['left']
        y2 = gt['bbox']['bottom']
        
    else:
        #training
        y1 = gt['bbox'][0][0]
        x1 = gt['bbox'][0][1]
        y2 = gt['bbox'][0][2]
        x2 = gt['bbox'][0][3]
        
    return (x1,x2,y1,y2)

In [10]:
def predict_distance(gt,p = 670,flag = 1):
    '''Input is a list of ground truth of an image that includes the bbox of cars detected.
    There could be more than one cars. 
    Output is the prediction of positions for all cars detected.
    Performance:
    This prediction has average error around 2.46-2.55 meters on position[0]:distance.
    This prediction has average error around 0.48-0.51 meters on position[1]: x0
        '''

    fx = 714.1526
    positions = []

    for i in range(len(gt)):
        if flag == 0: #testing data
            x1,x2,y1,y2= get_v_submission(0,gt[i])
        else:#training data
            x1,x2,y1,y2 = get_v_submission(1,gt[i])


        pixel_w = x2 - x1
        pixel_h = y2 - y1

        w = -0.030522707866872622 * y1 + 0.019645624118997243 * pixel_w -0.026543674714586008* pixel_h + -1.3394041753397261e-05 * pixel_w * pixel_h+ 12.566680966429356
        #(-0.030522707866872622,
             #0.019645624118997243,
             #-0.026543674714586008,
             #-1.3394041753397261e-05,
             #12.566680966429356) 
             #This is the result of a linear regression model'''

        pred_z0 = fx * w / pixel_w
        pred_x0 = (x1 - p) * pred_z0/fx

        positions.append([pred_z0,pred_x0])


    return (positions)
       
        
        

In [11]:
#t1_position.predict_distance(gt,p = 670,flag = 1)
diff = []
diff_x0 = []
count_z0 = 0
count_x0 = 0
for i in range(len(images_training_paths)):
    gt_temp = gt[i]
    gt_positions = []
    for gt_p in gt_temp:
        gt_positions.append(gt_p['position'])
    
    pred_positions = predict_distance(gt_temp)
    print(f"GT: {gt_positions}, Pred: {pred_positions}")
    
    
#print("EXPR {0}/10 Train_set: AVG Error Z0 {1:.2f}, Distance_within_10%_diff_rate: {2:.2f}%, AVG Error X0 {3:.2f}, X0_within_10%_rate:{4:.2f}%".format(2,np.average(diff),100 * count_z0/len(diff),np.average(diff_x0),100 * count_x0/len(diff_x0)))



GT: [[21.30970213, 2.7243980956]], Pred: [[20.127436829659576, 3.231648234784138]]
GT: [[36.67238106, -0.731694659]], Pred: [[38.58832491257313, -1.0479521644444825]]
GT: [[36.0571946939, -0.8063057612]], Pred: [[40.935833863198354, -1.249387407591628]]
GT: [[5.4738858347, 2.6635025732]], Pred: [[6.725094959278355, 2.3953972877930942]]
GT: [[53.49782052, -2.861662541]], Pred: [[48.868000694123815, -3.2832345059614676]]
GT: [[52.075702877, -1.6121741087]], Pred: [[47.99056292762573, -1.9363729300865615]]
GT: [[33.9717392151, -3.2567796834]], Pred: [[29.199727390958458, -3.345668631724989]]
GT: [[21.1030705572, -3.6098391469], [26.470583249, -0.3631040734]], Pred: [[19.417517586423678, -3.6537358022995945], [26.035222950268356, -0.35375373013665545]]
GT: [[40.6098923529, -4.0093172889]], Pred: [[42.42805662936841, -4.474004830081862]]
GT: [[42.7824521349, -2.3771608397]], Pred: [[44.501070706124224, -3.0194627595087686]]
GT: [[34.159769045, -0.2996495368]], Pred: [[36.87165367852275, -0.