# System setup

In [None]:
# Import fundamental dependencies (which are installed by default or by cloning taxibot repo)
import os
import sys
import time
from pprint import pprint
import argparse

import userSettings
import setup

#%% System setup

# Load user settings
userSettings = userSettings.main()

files, paths = setup.installPackages(userSettings['homePath'], userSettings['labels'], firstInstallation=False)
                                     
import cv2 as cv
import numpy as np
import cameraSetup as cameraSetup

cameraSetup.getIntrinsicsLeftCamera(0, paths, singleLGenImages=False, singleLTestCombinations=False, singleLCalibration=False, debuggingMode=False)
cameraSetup.getIntrinsicsRightCamera(0, paths, singleRGenImages=False, singleRTestCombinations=False, singleRCalibration=False, debuggingMode=False)


Trns = cameraSetup.calibrateStereoSetup(0, 0, paths, stereoGenImages=False, stereoTestCombinations=False, stereoCalibration=False, debuggingMode=False)
leftMapX, leftMapY, rightMapX, rightMapY, Q, rectifiedCameraMatrixL = cameraSetup.getRectificationMap(0, 0, paths, newRectificationMapping=False, debuggingMode=False)

In [None]:
print('Importing modules for object detection')
import tensorflow as tf
import object_detection
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder
from object_detection.utils import config_util
print('Finished importing modules')
os.chdir(paths['home'])

# Determine latest checkpoint
path_model = paths['trainedModels']+'/'+userSettings['pretrainedModelName']
files = os.listdir(path_model)
last_checkpoint = 0
for file in files:
    ckpt = file.split('.')[0]
    try:
        ckpt = ckpt.split('-')[1]
    except:
        break
    else:
        #print('Current checkpoint: '+ckpt)
        if (int(ckpt) > last_checkpoint):
            last_checkpoint = int(ckpt)
last_checkpoint = 'ckpt-'+str(last_checkpoint)
print('Found latest checkpoint: '+last_checkpoint)

# Load pipeline config and build a detection model
path_pipeline = path_model+'/pipeline.config'
configs = config_util.get_configs_from_pipeline_file(path_pipeline)
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(path_model, str(last_checkpoint))).expect_partial()

def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections


path_labelmap = path_model+'/label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(path_labelmap)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import time


# Test detection

## Set detection parameters

In [None]:
testResults = []

In [None]:
minDisparity = 192
maxDisparity = 272
numDisparities = maxDisparity-minDisparity
blockSize = 5
uniquenessRatio = 15

groundTruthDistance = str(40)

## Object detection

In [None]:
# Display new image
frameL = cv.imread(userSettings['homePath']+'/3_Evaluation/imagesPrinted/stereoAlgTestPrintedCamL_'+groundTruthDistance+'cm.png')
frameR = cv.imread(userSettings['homePath']+'/3_Evaluation/imagesPrinted/stereoAlgTestPrintedCamR_'+groundTruthDistance+'cm.png')

f, axarr = plt.subplots(1,2, figsize=(15, 15))
axarr[0].imshow(cv.cvtColor(frameL, cv.COLOR_BGR2RGB))
axarr[1].imshow(cv.cvtColor(frameR, cv.COLOR_BGR2RGB))

bboxCentreCoords = {}

### Detect object in left image

In [None]:
# Rectify and convert color
prev_frame_time = time.time()
Left_rectified = cv.remap(frameL,leftMapX,leftMapY, cv.INTER_LINEAR, cv.BORDER_CONSTANT)
height, width, channels = Left_rectified.shape

image_np = np.array(Left_rectified)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)

num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
            for key, value in detections.items()}
detections['num_detections'] = num_detections
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detections = image_np.copy()

image_np_with_detections, aryFound = viz_utils.visualize_boxes_and_labels_on_image_array(
            image_np_with_detections,
            detections['detection_boxes'],
            detections['detection_classes']+label_id_offset,
            detections['detection_scores'],
            category_index,
            use_normalized_coordinates=True,
            max_boxes_to_draw=2,
            min_score_thresh=.8,
            agnostic_mode=False)


if aryFound != 0:

    keys = aryFound.keys()
    coordsLeftImg = []

    for key in keys:

        xmin = aryFound[key][0]*width
        xmax = aryFound[key][1]*width
        ymin = aryFound[key][2]*height
        ymax = aryFound[key][3]*height

        # Location (centre of object)
        x_centre = xmin+0.5*(xmax-xmin)
        x_centre = int(x_centre)
        y_centre = ymin+0.5*(ymax-ymin)
        y_centre = int(y_centre)
        
        # Store bbox centre coordinates in left image to be used for depth
        bboxCentreCoords[key] = [x_centre, y_centre]

        # Store coordiantes for disparity by object detection
        coordsLeftImg.append(x_centre)

        # Mark point
        text = ('x. u = {:.2f}pixels'.format(x_centre))
        cv.putText(image_np_with_detections, text, (x_centre, y_centre), cv.FONT_HERSHEY_PLAIN, 2, (0, 255, 255), 2, cv.LINE_AA)



new_frame_time = time.time()
timePassed = (new_frame_time-prev_frame_time)
print('Computation time: {:.4f} seconds'.format(timePassed))

plt.figure(figsize=(5, 5))
plt.imshow(cv.cvtColor(image_np_with_detections, cv.COLOR_BGR2RGB))
plt.show()

### Detect in right image

In [None]:
Right_rectified = cv.remap(frameR,rightMapX,rightMapY, cv.INTER_LINEAR, cv.BORDER_CONSTANT)
height, width, channels = Right_rectified.shape

# Detect object on right rectified camera stream
height, width, channels = Right_rectified.shape
image_np = np.array(Right_rectified)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)

num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
        for key, value in detections.items()}
detections['num_detections'] = num_detections
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detectionsR = image_np.copy()

image_np_with_detectionsR, aryFoundR = viz_utils.visualize_boxes_and_labels_on_image_array(
        image_np_with_detectionsR,
        detections['detection_boxes'],
        detections['detection_classes']+label_id_offset,
        detections['detection_scores'],
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=2,
        min_score_thresh=.8,
        agnostic_mode=False)


if aryFoundR != 0:

    keys = aryFoundR.keys()
    coordsRightImg = []

    for key in keys:

        xmin = aryFoundR[key][0]*width
        xmax = aryFoundR[key][1]*width
        ymin = aryFoundR[key][2]*height
        ymax = aryFoundR[key][3]*height

        # Location (centre of object) as seen from left rectified camera
        x_centre = xmin+0.5*(xmax-xmin)
        x_centre = int(x_centre)
        y_centre = ymin+0.5*(ymax-ymin)
        y_centre = int(y_centre)

        # Store coordiantes for disparity by object detection
        coordsRightImg.append(x_centre)

        # Mark point
        text = ('x. u = {:.2f}pixels'.format(x_centre))
        cv.putText(image_np_with_detectionsR, text, (x_centre, y_centre), cv.FONT_HERSHEY_PLAIN, 2, (0, 255, 255), 2, cv.LINE_AA)

new_frame_time = time.time()
timePassed = (new_frame_time-prev_frame_time)
print('Computation time: {:.4f} seconds'.format(timePassed))

plt.figure(figsize=(5, 5))
plt.imshow(cv.cvtColor(image_np_with_detectionsR, cv.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

### Object detection result

In [None]:
print(sorted(coordsLeftImg))

In [None]:
# Check if all bounding box centre coordiantes of the left image are correct
print('Coordinates to be checked for in all functions: '+str(bboxCentreCoords))

# Check disparity via difference between image coords
       
if (len(coordsLeftImg) >= 2) and (len(coordsRightImg) >= 2):
    engineL = sorted(coordsLeftImg)[0] - sorted(coordsRightImg)[0]
    engineR = sorted(coordsLeftImg)[1] - sorted(coordsRightImg)[1]
    print('Disparity from img coords left engine: {:.2f}pixels'.format(engineL))
    print('Disparity from img coords right engine: {:.2f}pixels'.format(engineR))
    meanDisparity = 0.5*(engineL+engineR)
else:
    engine = coordsLeftImg[0] - coordsRightImg[0]
    #print('Disparity from img coords right engine: {:.2f}pixels'.format(engine))
    meanDisparity = engine

# Add to global results 
localResults = ['ObjectDetection', groundTruthDistance, '-', meanDisparity, timePassed, blockSize]
testResults.append(localResults.copy())

print('Coordinates of engine(s) in left image: {} pixels'.format(coordsLeftImg))
print('Coordinates of engine(s) in right image: {} pixels'.format(coordsRightImg))
print('Result: '+str(localResults))

## Compute depth

In [None]:
Left_rectified = cv.cvtColor(Left_rectified, cv.COLOR_BGR2GRAY)
Right_rectified = cv.cvtColor(Right_rectified, cv.COLOR_BGR2GRAY)

### SGBM

In [None]:
def getAverageDisp(disparityMap, y, x, margin):
    ymin = y-margin
    ymax = y+margin
    xmin = x-margin
    xmax = x+margin
    img_piece = disparityMap[ymin:ymax, xmin:xmax].copy()
    
    res = [sum(idx) / len(idx) for idx in zip(*img_piece)]
    res = sum(res) / len(res)
    
    return res    

In [None]:
# SGBM Initialisation
left_matcherSGBM = cv.StereoSGBM_create(minDisparity = minDisparity,
                                    numDisparities = numDisparities,
                                    blockSize = blockSize,
                                    uniquenessRatio = uniquenessRatio,
                                       )                           

# Filter settings
sigma = 1.5
lmbda = 8000.0

right_matcherSGBM = cv.ximgproc.createRightMatcher(left_matcherSGBM)
wls_filterSGBM = cv.ximgproc.createDisparityWLSFilter(left_matcherSGBM)
wls_filterSGBM.setLambda(lmbda)
wls_filterSGBM.setSigmaColor(sigma)

left_dispSGBM = left_matcherSGBM.compute(Left_rectified, Right_rectified).astype(np.float32) / 16
right_dispSGBM = right_matcherSGBM.compute(Right_rectified,Left_rectified).astype(np.float32) / 16
filtered_dispSGBM = wls_filterSGBM.filter(left_dispSGBM, Left_rectified, disparity_map_right=right_dispSGBM)


In [None]:
# Left disparity map
prev_frame_time = time.time()
leftdisptime = time.time()

left_dispSGBM = left_matcherSGBM.compute(Left_rectified, Right_rectified).astype(np.float32) / 16

# Compute disparity and depth for both engines, use mean value
meanDepth = 0
meanDisp = 0
for key in bboxCentreCoords.keys():
    
    x_centre = bboxCentreCoords[key][0]
    y_centre = bboxCentreCoords[key][1]
    
    disparity = left_dispSGBM[y_centre][x_centre]
    depth =(-Trns[0][0]*rectifiedCameraMatrixL[0][0])/disparity
    print('Depth {:.2f} cm'.format(depth))
    meanDepth += depth
    meanDisp += disparity

meanDepth = meanDepth/len(bboxCentreCoords.keys())
meanDisp = meanDisp/len(bboxCentreCoords.keys())

print('Mean depth {:.2f} cm'.format(meanDepth))
print('Mean disparity {:.2f} pixels'.format(meanDisp))

new_frame_time = time.time()
timePassed = (new_frame_time-prev_frame_time)
print('Computation time: {:.4f} seconds'.format(timePassed))

# Add to global results[Algo, ground truth, computed depth, disparity, time, blockSize]
localResults = ['SGBM_LeftDispMap', groundTruthDistance, meanDepth, meanDisp, timePassed, blockSize]
testResults.append(localResults.copy())

# Visualisation
dispForColor = left_dispSGBM.copy()
dispForColor = cv.normalize(src=dispForColor, dst=dispForColor, alpha=255, beta=0 , norm_type=cv.NORM_MINMAX)
disp8 = np.uint8(dispForColor)
colored_disp = cv.applyColorMap(disp8, cv.COLORMAP_JET)
plt.figure(figsize=(5, 5))
plt.imshow(cv.cvtColor(colored_disp, cv.COLOR_BGR2RGB))
plt.show()

In [None]:
## PCL from left disparity map
prev_frame_time = time.time()

points_3DSGBM_left = cv.reprojectImageTo3D(left_dispSGBM, Q)

# Compute depth for both engines, use mean value
meanDepth = 0
for key in bboxCentreCoords.keys():
    
    x_centre = bboxCentreCoords[key][0]
    y_centre = bboxCentreCoords[key][1]
    depth = points_3DSGBM_left[y_centre][x_centre][2]
    
    print('Depth {:.2f} cm'.format(depth))
    meanDepth += depth

meanDepth = meanDepth/len(bboxCentreCoords.keys())
print('Mean depth {:.2f} cm'.format(meanDepth))

new_frame_time = time.time()
timePassed = (new_frame_time-leftdisptime)
print('Computation time: {:.4f} seconds'.format(timePassed))

# Add to global results[Algo, ground truth, computed depth, disparity, time, blockSize]
localResults = ['SGBM_PCL_LeftDispMap', groundTruthDistance, meanDepth, '-', timePassed, blockSize]
testResults.append(localResults.copy())

In [None]:
## Filtered disparity map
prev_frame_time = time.time()
FilteredDisptime = time.time()

left_dispSGBM = left_matcherSGBM.compute(Left_rectified, Right_rectified).astype(np.float32) / 16
right_dispSGBM = right_matcherSGBM.compute(Right_rectified,Left_rectified).astype(np.float32) / 16
filtered_dispSGBM = wls_filterSGBM.filter(left_dispSGBM, Left_rectified, disparity_map_right=right_dispSGBM)

# Compute disparity and depth for both engines, use mean value
meanDepth = 0
meanDisp = 0
for key in bboxCentreCoords.keys():
    
    x_centre = bboxCentreCoords[key][0]
    y_centre = bboxCentreCoords[key][1]
    
    disparity = filtered_dispSGBM[y_centre][x_centre]
    depth =(-Trns[0][0]*rectifiedCameraMatrixL[0][0])/disparity
    print('Depth {:.2f} cm'.format(depth))
    meanDepth += depth
    meanDisp += disparity

meanDepth = meanDepth/len(bboxCentreCoords.keys())
print('Mean depth {:.2f} cm'.format(meanDepth))

meanDisp = meanDisp/len(bboxCentreCoords.keys())
print('Mean disparity {:.2f} pixels'.format(meanDisp))

new_frame_time = time.time()
timePassed = (new_frame_time-prev_frame_time)
print('Computation time: {:.4f} seconds'.format(timePassed))

# Add to global results[Algo, ground truth, computed depth, disparity, time, blockSize]
localResults = ['SGBM_FilteredLeftDispMap', groundTruthDistance, meanDepth, meanDisp, timePassed, blockSize]
testResults.append(localResults.copy())

# Visualisation
dispForColor = filtered_dispSGBM.copy()
dispForColor = cv.normalize(src=dispForColor, dst=dispForColor, alpha=255, beta=0 , norm_type=cv.NORM_MINMAX)
disp8 = np.uint8(dispForColor)
colored_disp = cv.applyColorMap(disp8, cv.COLORMAP_JET)
plt.figure(figsize=(5, 5))
plt.imshow(cv.cvtColor(colored_disp, cv.COLOR_BGR2RGB))
plt.show()

In [None]:
## PCL from filtered disparity map
prev_frame_time = time.time()

points_3DSGBM_filtered = cv.reprojectImageTo3D(filtered_dispSGBM, Q)

# Compute depth for both engines, use mean value
meanDepth = 0
for key in bboxCentreCoords.keys():
    
    x_centre = bboxCentreCoords[key][0]
    y_centre = bboxCentreCoords[key][1]
    depth = points_3DSGBM_filtered[y_centre][x_centre][2]
    
    print('Depth {:.2f} cm'.format(depth))
    meanDepth += depth

meanDepth = meanDepth/len(bboxCentreCoords.keys())
print('Mean depth {:.2f} cm'.format(meanDepth))


new_frame_time = time.time()
timePassed = (new_frame_time-FilteredDisptime)
print('Computation time: {:.4f} seconds'.format(timePassed))

# Add to global results[Algo, ground truth, computed depth, disparity, time, blockSize]
localResults = ['SGBM_PCL_FilteredLeftDispMap', groundTruthDistance, meanDepth, '-', timePassed, blockSize]
testResults.append(localResults.copy())

### BM

In [None]:
## BM Settings
left_matcherBM = cv.StereoBM_create(numDisparities = numDisparities,
                                    blockSize = blockSize)

left_matcherBM.setUniquenessRatio(uniquenessRatio)
left_matcherBM.setMinDisparity(minDisparity)

## Filter settings
sigma = 1.5
lmbda = 8000.0

right_matcherBM = cv.ximgproc.createRightMatcher(left_matcherBM)
wls_filterBM = cv.ximgproc.createDisparityWLSFilter(left_matcherBM)
wls_filterBM.setLambda(lmbda)
wls_filterBM.setSigmaColor(sigma)

In [None]:
## Left disparity map
prev_frame_time = time.time()
leftdisptime = time.time()

left_dispBM = left_matcherBM.compute(Left_rectified, Right_rectified).astype(np.float32) / 16

# Compute disparity and depth for both engines, use mean value
meanDepth = 0
meanDisp = 0
for key in bboxCentreCoords.keys():
    
    x_centre = bboxCentreCoords[key][0]
    y_centre = bboxCentreCoords[key][1]
    
    disparity = left_dispBM[y_centre][x_centre]
    depth =(-Trns[0][0]*rectifiedCameraMatrixL[0][0])/disparity
    print('Depth {:.2f} cm'.format(depth))
    meanDepth += depth
    meanDisp += disparity

meanDepth = meanDepth/len(bboxCentreCoords.keys())
print('Mean depth {:.2f} cm'.format(meanDepth))

meanDisp = meanDisp/len(bboxCentreCoords.keys())
print('Mean disp {:.2f} pixels'.format(meanDisp))

new_frame_time = time.time()
timePassed = (new_frame_time-prev_frame_time)
print('Computation time: {:.4f} seconds'.format(timePassed))

# Visualisation
dispForColor = left_dispBM.copy()
dispForColor = cv.normalize(src=dispForColor, dst=dispForColor, alpha=255, beta=0 , norm_type=cv.NORM_MINMAX)
disp8 = np.uint8(dispForColor)
colored_disp = cv.applyColorMap(disp8, cv.COLORMAP_JET)
plt.figure(figsize=(5, 5))
plt.imshow(cv.cvtColor(colored_disp, cv.COLOR_BGR2RGB))
plt.show()

# Add to global results[Algo, ground truth, computed depth, disparity, time, blockSize]
localResults = ['BM_LeftDispMap', groundTruthDistance, meanDepth, meanDisp, timePassed, blockSize]
testResults.append(localResults.copy())

In [None]:
## PCL from left disparity map
prev_frame_time = time.time()

points_3DBM_left = cv.reprojectImageTo3D(left_dispBM, Q)

# Compute depth for both engines, use mean value
meanDepth = 0
for key in bboxCentreCoords.keys():
    
    x_centre = bboxCentreCoords[key][0]
    y_centre = bboxCentreCoords[key][1]
    depth = points_3DBM_left[y_centre][x_centre][2]
    
    print('Depth {:.2f} cm'.format(depth))
    meanDepth += depth

meanDepth = meanDepth/len(bboxCentreCoords.keys())
print('Mean depth {:.2f} cm'.format(meanDepth))

new_frame_time = time.time()
timePassed = (new_frame_time-leftdisptime)
print('Computation time: {:.4f} seconds'.format(timePassed))

# Add to global results[Algo, ground truth, computed depth, disparity, time, blockSize]
localResults = ['BM_PCL_LeftDispMap', groundTruthDistance, meanDepth, '-', timePassed, blockSize]
testResults.append(localResults.copy())

In [None]:
## Filtered disparity map
prev_frame_time = time.time()
FilteredDisptime = time.time()

left_dispBM = left_matcherBM.compute(Left_rectified, Right_rectified).astype(np.float32) / 16
right_dispBM = right_matcherBM.compute(Right_rectified,Left_rectified).astype(np.float32) / 16
filtered_dispBM = wls_filterBM.filter(left_dispBM, Left_rectified, disparity_map_right=right_dispBM)


# Compute disparity and depth for both engines, use mean value
meanDepth = 0
meanDisp = 0
for key in bboxCentreCoords.keys():
    
    x_centre = bboxCentreCoords[key][0]
    y_centre = bboxCentreCoords[key][1]
    
    disparity = filtered_dispBM[y_centre][x_centre]
    depth =(-Trns[0][0]*rectifiedCameraMatrixL[0][0])/disparity
    print('Depth {:.2f} cm'.format(depth))
    print('Disparity: '+str(disparity))
    meanDepth += depth
    meanDisp += disparity

meanDepth = meanDepth/len(bboxCentreCoords.keys())
print('Mean depth {:.2f} cm'.format(meanDepth))

meanDisp = meanDisp/len(bboxCentreCoords.keys())
print('Mean disp {:.2f} pixels'.format(meanDisp))

new_frame_time = time.time()
timePassed = (new_frame_time-prev_frame_time)
print('Computation time: {:.4f} seconds'.format(timePassed))

# Visualisation
dispForColor = filtered_dispBM.copy()
dispForColor = cv.normalize(src=dispForColor, dst=dispForColor, alpha=255, beta=0 , norm_type=cv.NORM_MINMAX)
disp8 = np.uint8(dispForColor)
colored_disp = cv.applyColorMap(disp8, cv.COLORMAP_JET)
plt.figure(figsize=(5, 5))
plt.imshow(cv.cvtColor(colored_disp, cv.COLOR_BGR2RGB))
plt.show()

# Add to global results[Algo, ground truth, computed depth, disparity, time, blockSize]
localResults = ['BM_FilteredLeftDispMap', groundTruthDistance, meanDepth, meanDisp, timePassed, blockSize]
testResults.append(localResults.copy())

In [None]:
## PCL from filtered disparity map
prev_frame_time = time.time()

points_3DBM_filtered = cv.reprojectImageTo3D(filtered_dispBM, Q)

# Compute depth for both engines, use mean value
meanDepth = 0
for key in bboxCentreCoords.keys():
    
    x_centre = bboxCentreCoords[key][0]
    y_centre = bboxCentreCoords[key][1]
    depth = points_3DBM_filtered[y_centre][x_centre][2]
    
    print('Depth {:.2f} cm'.format(depth))
    meanDepth += depth

meanDepth = meanDepth/len(bboxCentreCoords.keys())
print('Mean depth {:.2f} cm'.format(meanDepth))

new_frame_time = time.time()
timePassed = (new_frame_time-FilteredDisptime)
print('Computation time: {:.4f} seconds'.format(timePassed))

# Add to global results[Algo, ground truth, computed depth, disparity, time, blockSize]
localResults = ['BM_PCL_FilteredLeftDispMap', groundTruthDistance, meanDepth, '-', timePassed, blockSize]
testResults.append(localResults.copy())

# Result 

In [None]:
from pprint import pprint
pprint(testResults)

In [None]:
# Run this at the end after checking multiple parameters
import csv
header = ['Algorithm', 'Actual distance', 'Computed distance', 'Disparity', 'Computation time', 'blocksize']

# Output error vis to csv
#with open(userSettings['homePath']+'/3_Evaluation/stereoAlgoResults.csv', 'w', encoding='UTF8') as f:
#    writer = csv.writer(f)
#    writer.writerow(header)
#    for element in testResults:
#        writer.writerow(element)