In [2]:
import open3d as o3d
import pyrealsense2 as rs


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:

# === pyrealsense2 === #

pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)

color_path = 'V00P00A00C00_rgb.avi'
depth_path = 'V00P00A00C00_depth.avi'
colorwriter = cv2.VideoWriter(color_path, cv2.VideoWriter_fourcc(*'XVID'), 30, (640,480), 1)
depthwriter = cv2.VideoWriter(depth_path, cv2.VideoWriter_fourcc(*'XVID'), 30, (640,480), 1)

pipeline.start(config)
try:
    while True:     
        
        frames = pipeline.wait_for_frames()        
        depth_frame = frames.get_depth_frame()
        color_frame = frames.get_color_frame()
        if not depth_frame or not color_frame:
            continue
        
        #convert images to numpy arrays
        depth_image = np.asanyarray(depth_frame.get_data())
        color_image = np.asanyarray(color_frame.get_data())
        depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
        
        colorwriter.write(color_image)
        depthwriter.write(depth_colormap)
        
        cv2.imshow('Stream', color_image)
        #cv2.imshow('Stream', depth_colormap)
        
        if cv2.waitKey(1) == ord("q"):
            break
finally:
    colorwriter.release()
    depthwriter.release()
    pipeline.stop()

In [None]:

# === open3d === #

import os
import open3d.ml as _ml3d
#import open3d.ml.torch as ml3d

cfg_file = "ml3d/configs/pointpillars_kitti.yml"
cfg = _ml3d.utils.Config.load_from_file(cfg_file)

model = ml3d.models.PointPillars(**cfg.model)
cfg.dataset['dataset_path'] = "/path/to/your/dataset"
dataset = ml3d.datasets.KITTI(cfg.dataset.pop('dataset_path', None), **cfg.dataset)
pipeline = ml3d.pipelines.ObjectDetection(model, dataset=dataset, device="gpu", **cfg.pipeline)

... 
# run inference on a single example.
result = pipeline.run_inference(data)

In [8]:
import cv2 as cv
import os
from IPython.display import display
from PIL import Image
import time
import numpy as np
import yolo

config_path = "/usr/local/lib/python3.8/dist-packages/yolo/yolov3.cfg" # the YOLO net weights file
weights_path = "/usr/local/lib/python3.8/dist-packages/yolo/yolov3.weights" # weights_path = "weights/yolov3-tiny.weights"
labels_path = "/usr/local/lib/python3.8/dist-packages/yolo/data/coco.names"
img_path2 = "/home/physine/Documents/FYP/images/original.jpg" # crowded sence
img_path = "/home/physine/Documents/FYP/images/single_biker.jpg" # single biker

WHITE = (255, 255, 255)
#img = None
#img0 = None
#outputs = None

# Load names of classes and get random colors
classes = open(labels_path).read().strip().split('\n')
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')

# Give the configuration and weight files for the model and load the network.
net = cv.dnn.readNetFromDarknet(config_path, weights_path)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
# net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

# determine the output layer
ln = net.getLayerNames()
#ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
ln = [ln[i-1] for i in net.getUnconnectedOutLayers()]


def load_image(path):
    #global img, img0, outputs, ln

    img0 = cv.imread(path)
    img = img0.copy()
    
    blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)

    net.setInput(blob)
    #t0 = time.time()
    output = net.forward(ln)
    #t = time.time() - t0

    # combine the 3 output groups into 1 (10647, 85)
    # large objects (507, 85)
    # medium objects (2028, 85)
    # small objects (8112, 85)
    ##output = np.vstack(output) # (10647, 85)
    return output
    #post_process(img, outputs, 0.5)
    #cv.imshow('window',  img)
    #cv.displayOverlay('window', f'forward propagation time={t:.3}')
    #cv.waitKey(0)

def post_process(img, outputs, conf):
    H, W = img.shape[:2]

    boxes = []
    confidences = []
    classIDs = []

    for output in outputs:
        scores = output[5:]
        classID = np.argmax(scores)
        confidence = scores[classID]
        if confidence > conf:
            x, y, w, h = output[:4] * np.array([W, H, W, H])
            p0 = int(x - w//2), int(y - h//2)
            p1 = int(x + w//2), int(y + h//2)
            boxes.append([*p0, int(w), int(h)])
            confidences.append(float(confidence))
            classIDs.append(classID)
            #cv.rectangle(img, p0, p1, WHITE, 1)

    indices = cv.dnn.NMSBoxes(boxes, confidences, conf, conf-0.1)
    if len(indices) > 0:
        for i in indices.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            color = [int(c) for c in colors[classIDs[i]]]
            cv.rectangle(img, (x, y), (x + w, y + h), color, 2)
            text = "{}: {:.4f}".format(classes[classIDs[i]], confidences[i])
            cv.putText(img, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

def trackbar(x):
    global img, img0
    conf = x/100
    img0 = cv.imread(img_path)
    img = img0.copy()
            
    post_process(img, outputs, conf)
    cv.displayOverlay('window', f'confidence level={conf}')
    cv.imshow('window', img)

cv.namedWindow('window')
#cv.createTrackbar('confidence', 'window', 50, 100, trackbar)

print("img 1")
output = load_image(img_path)
#print(output.shape)
scores = output[5:]
print(scores)

print("img 2")
output = load_image(img_path2)
#print(output)

# load_image('images/zoo.jpg')
# load_image('images/kitchen.jpg')
# load_image('images/airport.jpg')
# load_image('images/tennis.jpg')
# load_image('images/wine.jpg')
# load_image('images/bicycle.jpg')

cv.destroyAllWindows()

# -----------------------------------

# # test img
# display(Image.open(img_path))

# # load the net
# net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)

# # print the 254 components
# ln = net.getLayerNames()
# #print(len(ln), ln)

# # create a blob
# img = cv2.imread(img_path)
# resized = cv2.resize(img, dsize=(416, 416))
# blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)
# r = blob[0, 0, :, :]
# # It has the following parameters:
# # - the image to transform
# # - the scale factor (1/255 to scale the pixel values to [0..1])
# # - the size, here a 416x416 square image
# # - the mean value (default=0)
# # - the option swapBR=True (since OpenCV uses BGR)

# cv2.imshow('blob', r)
# text = f'Blob shape={blob.shape}'
# cv2.displayOverlay('blob', text)
# cv2.waitKey(1)

# net.setInput(blob)
# t0 = time.time()
# outputs = net.forward(ln)
# t = time.time()

# cv2.displayOverlay('window', f'forward propagation time={t-t0}')
# cv2.imshow('window',  img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# net.setInput(blob)
# outputs = net.forward(ln)



print("ok")

img 1
()
img 2
ok
