In [1]:
import torch, os
import torch.nn as nn
import torchvision.transforms as T
from PIL import Image
import matplotlib.pyplot as plt
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('GPU') if str(device) == "cuda:0" else print('GPU not Detected - CPU Selected')
print(f"GPUs Count: {torch.cuda.device_count()}")

import cv2
import numpy as np
import open3d as o3d

GPU
GPUs Count: 1
Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
########################################### Select Image Pair

IMAGE_PAIR = "CONES"     ### Choices:     CONES     DOLLS     MOEBIUS     ROCKS     CLOTH

########################################################################################################

if IMAGE_PAIR == "CONES":
    img_left = cv2.imread("./Stereo_3D_Reconstruction/cones1.png")
    img_right = cv2.imread("./Stereo_3D_Reconstruction/cones2.png")
    img_left_color = cv2.cvtColor(cv2.imread("./Stereo_3D_Reconstruction/cones1.png"), cv2.COLOR_BGR2RGB) # BGR
    point_cloud_cluster = 20
    focal_length_1, focal_length_2 = 360.0, 360.0
    cx, cy = 225, 188
    NNDR_THRESHOLD = 0.9
    
elif IMAGE_PAIR =="DOLLS":
    img_left = cv2.imread("./Stereo_3D_Reconstruction/dolls1.png")
    img_right = cv2.imread("./Stereo_3D_Reconstruction/dolls2.png")
    img_left_color = cv2.cvtColor(cv2.imread("./Stereo_3D_Reconstruction/dolls1.png"), cv2.COLOR_BGR2RGB) # BGR
    point_cloud_cluster = 300
    focal_length_1, focal_length_2 = 1512.0, 1512.0
    cx, cy = 695, 555
    NNDR_THRESHOLD = 0.85
    
elif IMAGE_PAIR =="MOEBIUS":
    img_left = cv2.imread("./Stereo_3D_Reconstruction/moebius1.png")
    img_right = cv2.imread("./Stereo_3D_Reconstruction/moebius2.png")
    img_left_color = cv2.cvtColor(cv2.imread("./Stereo_3D_Reconstruction/moebius1.png"), cv2.COLOR_BGR2RGB) # BGR
    point_cloud_cluster = 100
    focal_length_1, focal_length_2 = 1512.0, 1512.0
    cx, cy = 695, 555
    NNDR_THRESHOLD = 0.77
    
elif IMAGE_PAIR =="ROCKS":
    img_left = cv2.imread("./Stereo_3D_Reconstruction/rocks1.png")
    img_right = cv2.imread("./Stereo_3D_Reconstruction/rocks2.png")
    img_left_color = cv2.cvtColor(cv2.imread("./Stereo_3D_Reconstruction/rocks1.png"), cv2.COLOR_BGR2RGB) # BGR
    point_cloud_cluster = 30
    focal_length_1, focal_length_2 = 1512.0, 1512.0
    cx, cy = 638, 555
    NNDR_THRESHOLD = 0.9
    
elif IMAGE_PAIR =="CLOTH":
    img_left = cv2.imread("./Stereo_3D_Reconstruction/cloth1.png")
    img_right = cv2.imread("./Stereo_3D_Reconstruction/cloth2.png")
    img_left_color = cv2.cvtColor(cv2.imread("./Stereo_3D_Reconstruction/cloth1.png"), cv2.COLOR_BGR2RGB) # BGR
    point_cloud_cluster = 300
    focal_length_1, focal_length_2 = 1050.0, 1050.0
    cx, cy = 641, 555
    NNDR_THRESHOLD = 0.9
    
########################################### Camera Intrinsic Matrix
K = np.array([[focal_length_1, 0, cx],
              [0, focal_length_2, cy],
              [0, 0, 1]])

In [3]:
########################################### Import ESPNet Model Architecture
from espnet_model import ESPNet, ESPNet_Encoder, InputProjectionA, DilatedParllelResidualBlockB, DownSamplerB, CDilated, C, CB, BR, CBR
model = ESPNet().to(device)

### Load DEEP DETECT Model Trained for 3D Reconstruction
model = torch.load("DEEP_DETECT_Best_Model_3DReconstruction.pth", weights_only=False, map_location=torch.device('cuda'))

nndr_threshold = 0.99
my_threshold = 1.0
model_threshold = 0.5   ### Model's Prediction Threshold (Tau)

In [4]:
height_1, width_1 = img_left.shape[:2]
height_2, width_2 = img_right.shape[:2]

### Resize both to 480x480 pixels
img1 = cv2.resize(img_left, (480, 480), interpolation=cv2.INTER_CUBIC)
img2 = cv2.resize(img_right, (480, 480), interpolation=cv2.INTER_CUBIC)

img_1 = Image.fromarray(img1)
img_2 = Image.fromarray(img2)

transform = T.Compose([T.ToTensor()])
input_tensor_1 = transform(img_1).unsqueeze(0).to(device)  # [1, 3, 480, 480]
input_tensor_2 = transform(img_2).unsqueeze(0).to(device)  # [1, 3, 480, 480]

with torch.no_grad():
    pred_1, pred_2 = model(input_tensor_1), model(input_tensor_2)  # [1, 1, 480, 480]
    pred_1, pred_2 = torch.sigmoid(pred_1), torch.sigmoid(pred_2)  # Converting Logits to Probabilities

mask_1 = pred_1.cpu().squeeze().numpy()
mask_2 = pred_2.cpu().squeeze().numpy()
mask_1 = cv2.resize(mask_1, (width_1, height_1), interpolation=cv2.INTER_CUBIC)
mask_2 = cv2.resize(mask_2, (width_2, height_2), interpolation=cv2.INTER_CUBIC)
mask_1 = (mask_1 > model_threshold).astype(np.uint8)
mask_2 = (mask_2 > model_threshold).astype(np.uint8)

def mask_to_keypoints(mask, size=3):
    ys, xs = np.where(mask == 1) ### Getting Coordinates of ONES
    keypoints = [cv2.KeyPoint(float(x), float(y), size) for (y, x) in zip(ys, xs)]
    return keypoints

kp1_list = mask_to_keypoints(mask_1) ### Creating Keypoints from Masks
kp2_list = mask_to_keypoints(mask_2)

sift = cv2.SIFT_create()
kp1, des1 = sift.compute(img_left, kp1_list, None)  ### Computing SIFT Descriptors for Detected Keypoints
kp2, des2 = sift.compute(img_right, kp2_list, None) ### Computing SIFT Descriptors for Detected Keypoints

print(f"Image 1: {len(kp1)} keypoints \nImage 2: {len(kp2)} keypoints")

Image 1: 107834 keypoints 
Image 2: 106248 keypoints


In [5]:
########################################### Feature Matching (FLANN)
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=10)
search_params = dict(checks=100)

flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)

### NNDR based Matching (Lowe's Ratio Test)
good_matches = []
pts1 = []
pts2 = []

for m, n in matches:
    if m.distance < NNDR_THRESHOLD * n.distance:
        good_matches.append(m)
        pts1.append(kp1[m.queryIdx].pt)
        pts2.append(kp2[m.trainIdx].pt)
pts1 = np.array(pts1)
pts2 = np.array(pts2)
print(f"Quantity of Matches: {len(good_matches)}")

Quantity of Matches: 55820


In [6]:
########################################### Estimate Essential Matrix
E, mask = cv2.findEssentialMat(pts1, pts2, K, method=cv2.RANSAC, prob=0.99999, threshold=0.5, maxIters=100000)

pts1 = pts1[mask.ravel() == 1]
pts2 = pts2[mask.ravel() == 1]

print(f"Good Matches after Outlier Rejection: {pts1.shape[0]}")

### Recovering Camera Pose
_, R, t, _ = cv2.recoverPose(E, pts1, pts2, K)

### Projection matrices
P1 = K @ np.hstack((np.eye(3), np.zeros((3, 1))))
P2 = K @ np.hstack((R, t))

### Triangulation
pts1_h = pts1.T
pts2_h = pts2.T

points_4d = cv2.triangulatePoints(P1, P2, pts1_h, pts2_h)
points_3d = points_4d[:3] / points_4d[3]
points_3d = points_3d.T

Good Matches after Outlier Rejection: 49979


### **Colored 3D Reconstruction**

In [7]:
### Function for Projecting Points
def project_points(points_3d, K):
    points_h = np.hstack((points_3d, np.ones((points_3d.shape[0], 1))))
    proj = (K @ points_h[:, :3].T).T
    proj = proj[:, :2] / proj[:, 2:3]
    return proj

In [8]:
h, w, _ = img_left_color.shape
projected_pts = project_points(points_3d, K)
colors, valid_colors, valid_points = [], [], []

for i, (u, v) in enumerate(projected_pts):
    u, v = int(round(u)), int(round(v))
    if 0 <= u < w and 0 <= v < h:
        color = img_left_color[v, u] / 255.0  # normalize
        valid_points.append(points_3d[i])
        valid_colors.append(color)

pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(np.array(valid_points))
pcd.colors = o3d.utility.Vector3dVector(np.array(valid_colors))

### Remove Outliers
pcd, _ = pcd.remove_statistical_outlier(nb_neighbors=point_cloud_cluster, std_ratio=0.5)

### Front View
o3d.visualization.draw_geometries([pcd], zoom=0.1, front=[0.0, 0.0, -1.0], lookat=[0.0, 0.0, 0.0], up=[0.0, -1.0, 0.0])

### Create Visualizer
vis = o3d.visualization.Visualizer()
vis.create_window(visible=True, width=1280, height=720)
vis.add_geometry(pcd)

# Access Camera and Set Camera Parameters
ctr = vis.get_view_control()
ctr.set_zoom(0.1)
ctr.set_front([0.0, 0.0, -1.0])
ctr.set_lookat([0.0, 0.0, 0.0])
ctr.set_up([0.0, -1.0, 0.0])
vis.poll_events()
vis.update_renderer()

### Save 2D Snapshot
vis.capture_screen_image("point_cloud_DEEP_DETECT.png")
vis.destroy_window() # Cleanup