In [1]:
import pyrealsense2 as rs
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from StereoNet_single import StereoNet

In [2]:
def getDisparityMap(left, right, algo = "bm"):
    # OpenCV disparity filtering
    # algo: stereo matching method (bm or sgbm)
    
    no_downscale = True
    max_disp = 128
    sigma = 1.5
    lmbda = 8000.0
    vis_mult = 1 # coefficient used to scale disparity map visualizations

    wsize = 17 # default window size for BM on full-sized views
    if algo=="sgbm":
        wsize = 3; # default window size for SGBM
    #elif not no_downscale and algo=="bm" and filter=="wls_conf":
     #   wsize = 7; # default window size for BM on downscaled views (downscaling is performed only for wls_conf)

    left_for_matcher = None
    right_for_matcher = None
    left_disp = None
    right_disp = None
    wls_filter = None

    if not no_downscale:
        ## downscale the views to speed-up the matching stage, as we will need to compute both left
        ## and right disparity maps for confidence map computation
        ## [downscale]
        max_disp/=2;
        if (max_disp%16) != 0:
            max_disp += 16-(max_disp%16);

        max_disp = int(max_disp)

        # @TODO: Resizing is not, correct it
#         scale_percent = 60 # percent of original size
#         width = int(img.shape[1] * scale_percent / 100)
#         height = int(img.shape[0] * scale_percent / 100)

#         left_for_matcher = cv2.resize(left, None, fx=0.5 , fy=0.5, interpolation=cv2.INTER_LINEAR_EXACT);
#         right_for_matcher = cv2.resize(right, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR_EXACT);

    else:
        left_for_matcher = ir1_image.copy()
        right_for_matcher = ir2_image.copy()

#         print('Size of left_for_matcher frame: ', left_for_matcher.shape)
#         print('Size of right_for_matcher frame: ', right_for_matcher.shape)

    if algo=="bm":
        left_matcher = cv2.StereoBM_create(max_disp, wsize);
        wls_filter = cv2.ximgproc.createDisparityWLSFilter(left_matcher);
        right_matcher = cv2.ximgproc.createRightMatcher(left_matcher);

        ## Don't need to convert to GRAY as we are already using IR images (1-channel)
        # cvtColor(left_for_matcher,  left_for_matcher,  COLOR_BGR2GRAY);
        # cvtColor(right_for_matcher, right_for_matcher, COLOR_BGR2GRAY);

        ## Matching
        # matching_time = (double)getTickCount();
        left_disp = left_matcher.compute(left_for_matcher, right_for_matcher);
        right_disp = right_matcher.compute(right_for_matcher,left_for_matcher);
        # matching_time = ((double)getTickCount() - matching_time)/getTickFrequency(); 
    elif algo=="sgbm":
        left_matcher  = cv2.StereoSGBM_create(max_disp,wsize);
        left_matcher.setP1(24*wsize*wsize);
        left_matcher.setP2(96*wsize*wsize);
        left_matcher.setPreFilterCap(63);
        left_matcher.setMode(cv2.StereoSGBM_MODE_SGBM_3WAY);
        wls_filter = cv2.ximgproc.createDisparityWLSFilter(left_matcher);
        right_matcher = cv2.ximgproc.createRightMatcher(left_matcher);

        # matching_time = (double)getTickCount();
        left_disp = left_matcher.compute(left_for_matcher, right_for_matcher);
        right_disp = right_matcher.compute(right_for_matcher,left_for_matcher);
        # matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();

    ## Filtering
    wls_filter.setLambda(lmbda);
    wls_filter.setSigmaColor(sigma);
    # filtering_time = (double)getTickCount();
    filtered_disp = wls_filter.filter(left_disp, left, disparity_map_right=right_disp);
    # filtering_time = ((double)getTickCount() - filtering_time)/getTickFrequency();

    raw_disparity = left_disp
    filtered_disparity = filtered_disp
    
    conf_map = wls_filter.getConfidenceMap();
    ROI = wls_filter.getROI();
    
    return raw_disparity, filtered_disparity

In [3]:
focal_length = 643.33 #942.8       # lense focal length, 1.88mm, 942.8 ???
baseline = 55   #49.75  distance in mm between the two cameras
units = 0.512     # depth units, adjusted for the output to fit in one byte

def convertDisparityMapToDepthMap(disparityMap):
    # shape: disparityMap.shape
    valid_pixels = disparityMap > 0
    depth = np.zeros(shape=disparityMap.shape).astype("uint8")
    depth[valid_pixels] = (focal_length * baseline) / (units * disparityMap[valid_pixels])
    
    return depth, valid_pixels

In [4]:
stereoNetModel = StereoNet(k=4-1, r=4-1, maxdisp=192)
stereoNetModel = nn.DataParallel(stereoNetModel)
stereoNetModel.eval();

def getStereoNet(left, right):
    normalize = {'mean': [0.0, 0.0, 0.0], 'std': [1.0, 1.0, 1.0]}
    m = left.shape[0] # 480
    n = right.shape[1] # 640
    imgL = np.zeros((m, n, 3))
    imgL[:,:,0] = left
    imgL[:,:,1] = left
    imgL[:,:,2] = left
    imgL = imgL.astype(float)

    imgR = np.zeros((m, n, 3))
    imgR[:,:,0] = right
    imgR[:,:,1] = right
    imgR[:,:,2] = right
    imgR = imgR.astype(float)

#         print (ir1_image.shape)
#         print (imgL.shape)
#         neighbour = torch.tensor(neighbour).float().unsqueeze(0)

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(**normalize)
    ])

    imgL = transform(imgL).float()
    imgR = transform(imgR).float()

    outputs = None
    with torch.no_grad():
        imgL1 = imgL.unsqueeze(0)
        imgR1 = imgR.unsqueeze(0)
        outputs = stereoNetModel(imgL1, imgR1)

#         print('model output')
#         print(len(outputs))

    sn_disparity = outputs[0].squeeze(0).cpu().numpy()
    sn_disparity_refined = outputs[1].squeeze(0).cpu().numpy()
    # convert to depth map
    sn_depth_map, sn_valid_pixels = convertDisparityMapToDepthMap(sn_disparity_refined)

    return sn_depth_map, sn_disparity_refined

In [None]:
# Configure depth and color streams
pipeline = rs.pipeline()
config = rs.config()

res_x = 640
res_y = 480

config.enable_stream(rs.stream.depth, res_x, res_y, rs.format.z16, 30)
config.enable_stream(rs.stream.color, res_x, res_y, rs.format.bgr8, 30)

config.enable_stream(rs.stream.infrared, 1, res_x, res_y, rs.format.y8, 30)
config.enable_stream(rs.stream.infrared, 2, res_x, res_y, rs.format.y8, 30)

# Start streaming
pipeline_profile = pipeline.start(config)

## To set laser on/off or set laser power
device = pipeline_profile.get_device()
depth_sensor = device.query_sensors()[0]
laser_pwr = depth_sensor.get_option(rs.option.laser_power)
print("laser power = ", laser_pwr)
laser_range = depth_sensor.get_option_range(rs.option.laser_power)
print("laser power range = " , laser_range.min , "~", laser_range.max)
depth_sensor.set_option(rs.option.laser_power, 0)


spatial = rs.spatial_filter()
spatial.set_option(rs.option.filter_magnitude, 3)
spatial.set_option(rs.option.filter_smooth_alpha, 0.5)
spatial.set_option(rs.option.filter_smooth_delta, 20)
spatial.set_option(rs.option.holes_fill, 0)
temporal = rs.temporal_filter()
# decimation = rs.decimation_filter()
# decimation.set_option(rs.option.filter_magnitude, 4)
hole_filling = rs.hole_filling_filter()
hole_filling.set_option(rs.option.holes_fill, 1)

# Image directory 
directory = r'./outputs/'
img_count = 1
try:
    while True:
        # Wait for a coherent pair of frames: depth and color
        frames = pipeline.wait_for_frames()
        depth_frame = frames.get_depth_frame()
        color_frame = frames.get_color_frame()
        ir1_frame = frames.get_infrared_frame(1) # Left IR Camera, it allows 0, 1 or no input
        ir2_frame = frames.get_infrared_frame(2) # Right IR camera
        if not depth_frame or not color_frame:
            continue

        if not ir1_frame or not ir2_frame:
            continue

        # Convert images to numpy arrays
        depth_image = np.asanyarray(depth_frame.get_data()) # dtype of print(depth_image.dtype) is: uint16
        color_image = np.asanyarray(color_frame.get_data()) # dtype of print(color_image.dtype) is: uint8
        ir1_image = np.asanyarray(ir1_frame.get_data())
        ir2_image = np.asanyarray(ir2_frame.get_data())
        
        left = ir1_image
        right = ir2_image
        
        rs_depth = depth_image # (image of 16-bit per pixel)
        rs_depth_sccaled  = cv2.convertScaleAbs(depth_image, alpha=0.03)  # (image converted to 8-bit per pixel)
        
        ## Apply filters to real-sense depth
        filtered_rs_depth = spatial.process(depth_frame)
        filtered_rs_depth = temporal.process(filtered_rs_depth)
        filtered_rs_depth = hole_filling.process(filtered_rs_depth)
        
        filtered_rs_depth = np.asanyarray(filtered_rs_depth.get_data()) # dtype of print(filtered_rs_depth.dtype) is: uint16
        filtered_rs_depth_scaled = cv2.convertScaleAbs(filtered_rs_depth, alpha=0.03)  # (image converted to 8-bit per pixel)
        
        ## StereoBM Disparity and Depth Maps
        raw_disparity, filtered_disparity = getDisparityMap(left, right, "bm")
        
        raw_depth_map, raw_valid_pixels = convertDisparityMapToDepthMap(raw_disparity)
        filtered_depth_map, filtered_valid_pixels = convertDisparityMapToDepthMap(filtered_disparity)
        
        ## Use StereoNet to estimate depth
        sn_depth_map, sn_disparity = getStereoNet(left, right)
        
        
        #### VISUALISATION ####
        is_visual_on = True
        if not is_visual_on:
            continue
        
        ## Visualize RGB frame
#         cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
#         cv2.imshow('RealSense', color_image)
        
        ## Visualize IR frames
        ir_images = np.hstack((ir1_image, ir2_image))
        cv2.namedWindow('IRSense', cv2.WINDOW_AUTOSIZE)
        cv2.imshow('IRSense', ir_images)
        
        ## Visualize Real sense depth frames
        ## Apply colormap on Real-sense depth image 
        depth_colormap = cv2.applyColorMap(rs_depth_sccaled, cv2.COLORMAP_JET)
        #depth_colormap = cv2.applyColorMap(cv2.equalizeHist(rs_depth_sccaled), cv2.COLORMAP_JET)
        #filtered_depth_colormap = cv2.applyColorMap(filtered_rs_depth_scaled, cv2.COLORMAP_JET)
        filtered_depth_colormap = cv2.applyColorMap(cv2.equalizeHist(filtered_rs_depth_scaled), cv2.COLORMAP_JET)
        both_depths2 = np.hstack((depth_colormap, filtered_depth_colormap))
        cv2.namedWindow('RS_DepthMap', cv2.WINDOW_AUTOSIZE)
        cv2.imshow('RS_DepthMap', both_depths2)
        
        
        ## Visualize stereoNet disparity and depth maps (StereoNET)
        o1 = cv2.applyColorMap(np.array(sn_disparity*2, dtype=np.uint8), cv2.COLORMAP_JET)
        #o2 = cv2.applyColorMap(cv2.equalizeHist(sn_depth_map), cv2.COLORMAP_JET)
        o2 = cv2.applyColorMap(sn_depth_map, cv2.COLORMAP_JET)
#         o2[sn_depth_map < 0] = 0
        #stn_images = o1
        stn_images = np.hstack((o1, o2))
        cv2.namedWindow('StereoNet', cv2.WINDOW_AUTOSIZE)
        cv2.imshow('StereoNet', stn_images)

#         ## Visualize disparity maps (stereobm)
#         intensity_scale_factor = 6  # for visualizing strong intensity
#         raw_disparity_scaled = cv2.convertScaleAbs(raw_disparity * intensity_scale_factor, alpha=0.03)
#         filtered_disparity_scaled = cv2.convertScaleAbs(filtered_disparity * intensity_scale_factor, alpha=0.03)
#         disparity_images = np.hstack((raw_disparity_scaled, filtered_disparity_scaled))
#         cv2.namedWindow("Computed_Disparity", cv2.WINDOW_AUTOSIZE);
#         cv2.imshow("Computed_Disparity", disparity_images);

#         ## Visualize depth maps (stereobm)
#         temp1 = cv2.applyColorMap(cv2.equalizeHist(raw_depth_map), cv2.COLORMAP_JET)
#         temp1[~raw_valid_pixels] = 0
        
#         temp2 = cv2.applyColorMap(cv2.equalizeHist(filtered_depth_map), cv2.COLORMAP_JET)
#         temp2[~filtered_valid_pixels] = 0
        
#         both_depths = np.hstack((temp1, temp2))
        
#         cv2.namedWindow("depth_maps", cv2.WINDOW_AUTOSIZE);
#         cv2.imshow("depth_maps", both_depths);

        key = cv2.waitKey(1)
        # Press esc or 'q' to close the image window
        if key & 0xFF == ord('q') or key == 27:
            cv2.destroyAllWindows()
            break
        if key == 116:
            cv2.imwrite(directory + str(img_count) + '_color_image.jpg', color_image)
            cv2.imwrite(directory + str(img_count) + '_left.jpg', left)
            cv2.imwrite(directory + str(img_count) + '_right.jpg', right)
            cv2.imwrite(directory + str(img_count) + '_rs_depth.jpg', depth_colormap)
            cv2.imwrite(directory + str(img_count) + '_rs_filtered_depth.jpg', filtered_depth_colormap)
            cv2.imwrite(directory + str(img_count) + '_bm_depth.jpg', temp1)
            cv2.imwrite(directory + str(img_count) + '_bm_filtered_depth.jpg', temp2)
            cv2.imwrite(directory + str(img_count) + '_bm_disparity.jpg', raw_disparity_scaled)
            cv2.imwrite(directory + str(img_count) + '_bm_filtered_disparity.jpg', filtered_disparity_scaled)
            img_count = img_count+1

finally:

    # Stop streaming
    pipeline.stop()

laser power =  150.0
laser power range =  0.0 ~ 360.0
