In [1]:
import pandas as pd
import os
import raillabel
import numpy as np

This jupyter notebooks create the .csv files "person_frames.csv", and "pedestrian_density_per_distance.csv", required for the Person Instances Pasting augmentation. <br>
**person_frames.csv** defines for each train set's frmae, whether it contains a person instance or not. <br>
**pedestrian_density_per_distance.csv** contains for each person instance, the distance from origin, the number of points constituing the instance, the average intensity, and the scene/frame characteristic


### Create 'person_frames.csv'

In [2]:
# Define the strings to add
suffixes = ['_nb_points', '_nb_annotations']

class_labels= ['person', 'crowd','train','wagons','bicycle','group_of_bicycles','motorcycle','road_vehicle',
'animal','group_of_animals','wheelchair','drag_shoe', 'track','transition', 'switch','catenary_pole',
'signal_pole', 'signal', 'signal_bridge', 'buffer_stop', 'flame', 'smoke']

OSDaR_path = "data/OSDaR23_dataset"

# Initialize the new list
df_column_names = []

# Iterate through each element in the original list
for item in class_labels:
    # Append the original element with each suffix
    for suffix in suffixes:
        df_column_names.append(item + suffix)

In [None]:
df = pd.DataFrame(columns=df_column_names)
train_set = ["1_calibration_1.2","3_fire_site_3.1","3_fire_site_3.3","4_station_pedestrian_bridge_4.3","5_station_bergedorf_5.1","6_station_klein_flottbek_6.2","8_station_altona_8.1","8_station_altona_8.2","9_station_ruebenkamp_9.1","12_vegetation_steady_12.1","14_signals_station_14.1","15_construction_vehicle_15.1","20_vegetation_squirrel_20.1","21_station_wedel_21.1","21_station_wedel_21.2"]

person_scene_list = []
person_frame_list = []
person_bool = []


for folder_name in train_set: # Iterate through each scene
    folder_path = os.path.join(OSDaR_path, folder_name)
    
    if os.path.isdir(folder_path): 
        json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]

        if len(json_files) > 1:
            print('More than one json file was found for scene:', folder_name, 'which is unexpected behaviour. Exiting loop')
            break
        else:
            print(json_files)
            json_file_path = os.path.join(folder_path, json_files[0])
            scene = raillabel.load(json_file_path)

            person_filtered = raillabel.filter(scene, include_annotation_types=['seg3d'], include_object_types=["person"])
            
            
            for frame in person_filtered.frames.keys():
                person_scene_list.append(folder_name)
                person_frame_list.append(frame)
                if len(person_filtered.frames[frame].annotations)>=1:
                    person_bool.append(True)
                else:
                    person_bool.append(False)

In [4]:
df = pd.DataFrame({"scene":person_scene_list, "frame":person_frame_list, "presence":person_bool})

df['frame'] = df['frame'].astype(str).str.zfill(3)

person_filtered_frames = df[df["presence"]==True]

person_filtered_frames.to_csv("/workspaces/baseline/exp/person_frames.csv")

### Create 'pedestrian_density_per_distance.csv'

In [None]:
train=["1_calibration_1.2","3_fire_site_3.1","3_fire_site_3.3","4_station_pedestrian_bridge_4.3","5_station_bergedorf_5.1","6_station_klein_flottbek_6.2","8_station_altona_8.1","8_station_altona_8.2","9_station_ruebenkamp_9.1","12_vegetation_steady_12.1","14_signals_station_14.1","15_construction_vehicle_15.1","20_vegetation_squirrel_20.1","21_station_wedel_21.1","21_station_wedel_21.2"]
val=["2_station_berliner_tor_2.1","3_fire_site_3.4","4_station_pedestrian_bridge_4.2","4_station_pedestrian_bridge_4.5","6_station_klein_flottbek_6.1","7_approach_underground_station_7.2","9_station_ruebenkamp_9.3","9_station_ruebenkamp_9.4","9_station_ruebenkamp_9.5","9_station_ruebenkamp_9.7","11_main_station_11.1","14_signals_station_14.2","14_signals_station_14.3","18_vegetation_switch_18.1","21_station_wedel_21.3"]
test=["1_calibration_1.1","3_fire_site_3.2","4_station_pedestrian_bridge_4.1","4_station_pedestrian_bridge_4.4","5_station_bergedorf_5.2","7_approach_underground_station_7.1","7_approach_underground_station_7.3","8_station_altona_8.3","9_station_ruebenkamp_9.2","9_station_ruebenkamp_9.6","10_station_suelldorf_10.1","13_station_ohlsdorf_13.1","16_under_bridge_16.1","17_signal_bridge_17.1","19_vegetation_curve_19.1"]


# Uncomment block below to reprocess the dataframe
person_instance_distance = []
person_instance_nb_points = []
person_instance_mean_intensity = []
scenes_name_list = []

scene_progress=0
for folder_scene_name in os.listdir(OSDaR_path): # Iterate through each scene
    print("Scene progress =", scene_progress, ". Scene name:",folder_scene_name)
    folder_scene_path = os.path.join(OSDaR_path, folder_scene_name)
    lidar_folder_scene_path = os.path.join(folder_scene_path,"lidar")

    scene_name = os.path.basename(os.path.normpath(folder_scene_path))

    if scene_name not in train+val+test:
        #The file should be there, skip
        continue

    scene_number = float(scene_name.rsplit("_",1)[1])

    label_scene_path = os.path.join(folder_scene_path, scene_name+"_labels.json")

    if os.path.exists(label_scene_path):
        scene = raillabel.load(label_scene_path) # Load json annotations for scene

    for lidar_frame_name in os.listdir(lidar_folder_scene_path):
        lidar_frame_path = os.path.join(lidar_folder_scene_path,lidar_frame_name)
        # Returns the frame number as int '037'-> 37 
        frame_nb = int(lidar_frame_name.split('_')[0])

        scene_filtered = raillabel.filter(scene, include_frames=[frame_nb], include_annotation_types=['seg3d'], include_object_types=["person"])

        if frame_nb not in scene_filtered.frames: # One of the point cloud doesnt have any frame annotation
            print("Skipped frame")
            continue
 
        if frame_nb in scene_filtered.frames and len(scene_filtered.frames[frame_nb].annotations)==0:
            print("No pededestrian in that frame, skip")
            continue # There are not person annotation in this pcd, skip

        with open(lidar_frame_path, "r") as pcd:
            scan = np.loadtxt(pcd, skiprows=11, usecols=(0,1,2,3))
        coord = scan[:, :3]     # The x,y,z coordinates of the points 
        strength = scan[:, -1].reshape([-1, 1]) # The intensity of the points 
        point_distances = np.linalg.norm(coord[:,:2],axis=1)
        # If point has no particular label, it is background
        point_labels = np.full(len(scan), 0)

        if frame_nb in scene_filtered.frames: # One of the point cloud doesnt have any frame annotation
            frame_objects = scene_filtered.frames[frame_nb].annotations.keys()
            
            for object in frame_objects:  
                pts_idx = scene_filtered.frames[frame_nb].annotations[object].point_ids # Points index for the object
                instance_coords = coord[pts_idx]
                x_min, y_min= instance_coords[:,0:2].min(axis=0)
                x_max, y_max= instance_coords[:,0:2].max(axis=0)
                center_x = x_min+(x_max-x_min)/2
                center_y = y_min+(y_max-y_min)/2
                center_distance = np.linalg.norm([center_x, center_y])

                person_instance_distance.append(center_distance)
                person_instance_nb_points.append(len(pts_idx))
                person_instance_mean_intensity.append(strength[pts_idx].mean())
                scenes_name_list.append(scene_name)

        else:
            pass # Keep all points of the point cloud as ignore_index
        
    scene_progress += 1

split_df = pd.DataFrame({"scene_name": train+val+test,
                         "split": ["train"]*len(train)+["val"]*len(val)+["test"]*len(test)})

pedestrian_df = pd.DataFrame({"dist":person_instance_distance, 
                                "nb_points":person_instance_nb_points, 
                                "mean_intensity":person_instance_mean_intensity,
                                "scene_name": scenes_name_list})

pedestrian_df = pedestrian_df.merge(split_df, how="left", on="scene_name")

In [None]:
# Uncomment to save CSV
pedestrian_df.to_csv("/workspaces/baseline/exp/csv_stats/pedestrian_density_per_distance.csv")