In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
!cp -r /content/drive/MyDrive/esmini.zip /content/esmini.zip
!unzip /content/esmini.zip

In [8]:
!apt-get install -y xvfb x11-utils
!pip install ultralytics

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libfontenc1 libxfont2 libxkbfile1 libxtst6 libxxf86dga1 x11-xkb-utils xfonts-base
  xfonts-encodings xfonts-utils xserver-common
Suggested packages:
  mesa-utils
The following NEW packages will be installed:
  libfontenc1 libxfont2 libxkbfile1 libxtst6 libxxf86dga1 x11-utils x11-xkb-utils xfonts-base
  xfonts-encodings xfonts-utils xserver-common xvfb
0 upgraded, 12 newly installed, 0 to remove and 49 not upgraded.
Need to get 8,046 kB of archives.
After this operation, 12.8 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libfontenc1 amd64 1:1.1.4-1build3 [14.7 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxfont2 amd64 1:2.0.5-1build1 [94.5 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxkbfile1 amd64 1:1.1.0-1build3 [71.8 kB]
Get:4 http://archive.ubuntu.

In [9]:
import sys, os
!wget -q https://raw.githubusercontent.com/yandexdataschool/Practical_RL/master/xvfb -O ../xvfb
if type(os.environ.get("DISPLAY")) is not str or len(os.environ.get("DISPLAY")) == 0:
    !bash ../xvfb start
    os.environ['DISPLAY'] = ':1'
!cp -r /content/drive/MyDrive/xvfb /content/xvfb
!chmod 755 /content/xvfb

Starting virtual X frame buffer: Xvfb.


In [10]:
!/content/xvfb start
%cd /content/esmini
!chmod 755 bin/esmini
%cd /content
!git clone https://github.com/patrickw16/sim_loop.git

Starting virtual X frame buffer: Xvfb.
/content/esmini
/content
Cloning into 'sim_loop'...
remote: Enumerating objects: 309, done.[K
remote: Counting objects: 100% (97/97), done.[K
remote: Compressing objects: 100% (91/91), done.[K
remote: Total 309 (delta 8), reused 87 (delta 4), pack-reused 212 (from 1)[K
Receiving objects: 100% (309/309), 76.27 MiB | 34.23 MiB/s, done.
Resolving deltas: 100% (9/9), done.


In [None]:
import ctypes as ct
import sys
import os
import cv2
import numpy as np

from ultralytics import YOLO
from google.colab.patches import cv2_imshow
from PIL import Image
from IPython.display import display


def calculate_distance_threshold(distances, dt, j, ego_deceleration):
    """
    Calculate the distance threshold based on the given distances, time step, 
    current index, and ego deceleration.

    Args:
        distances (list): A list of distances.
        dt (float): The time step.
        j (int): The current (time) index.
        ego_deceleration (float): The ego deceleration.

    Returns:
        float: The calculated distance threshold.
    """
    if not distances or len(distances) < 2:
        return 500  # default distance threshold

    distances_without_zeros = [item for item in distances if item != 0]
    if len(distances_without_zeros) < 2:
        return 500  # default distance threshold

    not_zero_indices = [index for index, value in enumerate(distances[0:-1]) if value != 0]
    if not not_zero_indices:
        return 500  # default distance threshold

    distances_delta = distances[-1] - distances[max(not_zero_indices)]
    time_delta = (j - max(not_zero_indices)) * dt
    delta_v = distances_delta / time_delta

    if delta_v == 0:
        return 500  # default distance threshold
    else:
        return np.square(delta_v) / (2 * ego_deceleration)



xosc_path = "/content/sim_loop/scenarios/cut-in.xosc"
lib_path = "/content/esmini"

lib_paths = {
    "linux": os.path.join(lib_path, "bin/libesminiLib.so"),
    "linux2": os.path.join(lib_path, "bin/libesminiLib.so"),
    "darwin": os.path.join(lib_path, "bin/libesminiLib.dylib"),
    "win32": os.path.join(lib_path, "esminiLib.dll"),
}
se = ct.CDLL(lib_paths[sys.platform])

# Definition of SE_ScenarioObjectState struct
class SESpeedActionStruct(ct.Structure):
    _fields_ = [
        ("id", ct.c_int),                # id of object to perform action
        ("speed", ct.c_float),
        ("transition_shape", ct.c_int),  # 0 = cubic, 1 = linear, 2 = sinusoidal, 3 = step
        ("transition_dim", ct.c_int),    # 0 = distance, 1 = rate, 2 = time
        ("transition_value", ct.c_float),
    ]

class SELaneChangeActionStruct(ct.Structure):
    _fields_ = [
        ("id", ct.c_int),                # id of object to perform action
        ("mode", ct.c_int),              # 0 = absolute, 1 = relative (own vehicle)
        ("target", ct.c_int),            # target lane id (absolute or relative)
        ("transition_shape", ct.c_int),  # 0 = cubic, 1 = linear, 2 = sinusoidal, 3 = step
        ("transition_dim", ct.c_int),    # 0 = distance, 1 = rate, 2 = time
        ("transition_value", ct.c_float),
    ]

class SELaneOffsetActionStruct(ct.Structure):
    _fields_ = [
        ("id", ct.c_int),                # id of object to perform action
        ("offset", ct.c_float),
        ("max_lateral_acc", ct.c_float),
        ("transition_shape", ct.c_int),  # 0 = cubic, 1 = linear, 2 = sinusoidal, 3 = step
    ]

class SEImage(ct.Structure):
    _fields_ = [
        ("width", ct.c_int),
        ("height", ct.c_int),
        ("pixelSize", ct.c_int),
        ("pixelFormat", ct.c_int),
        ("data", ct.POINTER(ct.c_ubyte)),
    ]

# specify some function return and argument types (needed for the floats)
se.SE_SetCameraMode.argtypes = [ct.c_int]
se.SE_SaveImagesToFile.argtypes = [ct.c_int]
se.SE_GetObjectNumberOfCollisions.argtypes = [ct.c_int]
se.SE_SaveImagesToRAM.argtypes = [ct.c_bool]
se.SE_FetchImage.argtypes = [ct.c_void_p]
se.SE_CollisionDetection.argtypes = [ct.c_bool]
se.SE_FetchImage.restype = ct.c_int

#For screenshots
#SE_SaveImagesToFile(int nrOfFrames) --> for testing purposes
#SE_SaveImageToRAM(bool state) --> also try

#Custom camera in front of vehicle, e.g. sensor mount position:
# ./bin/esmini --window 60 60 800 400 --osc ./resources/xosc/slow-lead-vehicle.xosc --custom_camera 3,0,0.6,0,0

# specify some arguments and return types of useful functions
se.SE_StepDT.argtypes = [ct.c_float]
se.SE_GetSimulationTime.restype = ct.c_float

# initialize some structs needed for actions
lane_offset_action = SELaneOffsetActionStruct()
lane_change_action = SELaneChangeActionStruct()
speed_action = SESpeedActionStruct()
img = SEImage()

# initialize esmini with provided scenario
#se.SE_Init(sys.argv[1].encode('ascii'), 0, 1, 0, 0)

se.SE_Init(xosc_path.encode(), 0, 3, 0, 1) #3 -> no viewer, but image generated
se.SE_SetCameraMode(5) #first person view

#se.SE_SaveImagesToFile(3)
se.SE_SaveImagesToRAM(True)
se.SE_CollisionDetection(True)

# Load a model
model = YOLO("/content/sim_loop/best.pt")

# Known width of the object (e.g., a car width in meters)
KNOWN_WIDTH = 2.0  # Example width in meters
# Focal length of the camera (calibrated)
FOCAL_LENGTH = 45 * 800 / 36  # Example focal length in pixels

j = 0
delta_v = 0
dt = 0.1
ego_deceleration = 5.0
distances = list()
flag_speed_action = False
flag_braking = False
while se.SE_GetQuitFlag() == 0 and se.SE_GetSimulationTime() < 17.0:
    flag = se.SE_FetchImage(ct.byref(img))
    coll_ego = se.SE_GetObjectNumberOfCollisions(0)
    if coll_ego > 0:
        exit(-1)
    if not flag:
        total_bytes = img.pixelSize * img.width * img.height
        img_data = np.ctypeslib.as_array(img.data, shape=(total_bytes,))
        img_array = img_data.reshape((img.height, img.width, img.pixelSize, ))

        img_array = np.flip(img_array, 0) # flip y axis
        #img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB) # change BGR to RGB
        #image_name = "output_" + str(j) + ".png"
        #cv2.imwrite(image_name, img_array)
        #results = model(f"images/{image_name}")
        #results = model(img_array)
        #results[0].save()
        results = []

        # Iterate through the results and calculate distances
        for r in results:
            for box in r.boxes:
                cls = box.cls
                conf = box.conf
                if conf >= 0.3:
                    # Calculate the width of the bounding box in pixels
                    box_width = box.xyxy[0][2] - box.xyxy[0][0]
                    ego_box_left_edge = 800/2 - box_width/2
                    object_right_edge = box.xyxy[0][2]
                    # Calculate the distance
                    distance = (KNOWN_WIDTH * FOCAL_LENGTH) / box_width
                    distances = distances.append(distance)
                    distance_threshold = calculate_distance_threshold(distances, dt, j, ego_deceleration)
                    if object_right_edge > ego_box_left_edge and distance < distance_threshold:
                        flag_braking = True

        if flag_braking and not flag_speed_action:
            print("Injecting speed action - brake")
            speed_action.id               = 0
            speed_action.speed            = 0.0
            speed_action.transition_shape = 0
            speed_action.transition_dim   = 1
            speed_action.transition_value = 7.0
            se.SE_InjectSpeedAction(ct.byref(speed_action))
            flag_speed_action = True

    se.SE_StepDT(dt)
    j += 1

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.

0: 320x640 (no detections), 46.9ms
Speed: 34.8ms preprocess, 46.9ms inference, 123.4ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 10.6ms
Speed: 2.9ms preprocess, 10.6ms inference, 0.6ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 13.4ms
Speed: 3.8ms preprocess, 13.4ms inference, 0.6ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 12.1ms
Speed: 2.5ms preprocess, 12.1ms inference, 0.5ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 20.2ms
Speed: 2.5ms preprocess, 20.2ms inference, 0.9ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detec