__Dependencies__

In [1]:
import copy  # copy big/deep objects by value
import datetime  # datetime operations
import itertools  # operate with iterators
import json  # read/write from/into json format
import os  # OS operations (read/write files/folders)
import warnings  # hide warnings

# process parallelization
from multiprocessing import Lock, Manager, Pool, RawArray, cpu_count

import matplotlib.pyplot as plt  # mother of plots focr Python
import matplotlib.ticker as ticker  # matplotlib ticker utils
import numpy as np  # array/matrix operations (e.g. linear algebra)
import pandas as pd  # operate with dataframes
import seaborn as sns  # matplotlib plotting nice with shortcuts
from IPython.display import display  # print nicely
from tqdm.notebook import tqdm  # mother of progressbars for Python

# from matplotlib.ticker import FormatStrFormatter  # tick formatter

__Options and constants__

In [2]:
warnings.filterwarnings("ignore")
# set default float display format to 2 decimals
pd.options.display.float_format = "{:.3f}".format

STYLE = "darkgrid"
sns.set_style(STYLE)  # set seaborn plotting style

PATH_RAYC = "./data/raw/CsvData"
PATH_PROC = "./data/processed"

# hit origin structure (parsed/new)
HIT_ORIGIN = [
    "frame",
    "originX",
    "originY",
    "originZ",
    "rotationX",
    "rotationY",
    "rotationZ",
    "rotationW",
    "directionX",
    "directionY",
    "directionZ",
    "angleX",
    "angleY",
    "angleZ",
]

# hit information structure (all objects)
HIT_INFO = [
    "frame",
    "name",
    "group",
    "distance",
    "objectX",
    "objectY",
    "objectZ",
    "hitX",
    "hitY",
    "hitZ",
]

GRP_REPLACE = {
    "ADV_linux(Clone)": "Inside Car",
    "CarsManager": "Dynamic Cars",
    "InsideCar": "Inside Car",
    "Parking Cars": "Static Cars",
    "Path_ MS Wissenschaft": "Roads",
    "PedestriansManager": "Dynamic Pedestrians",
    "WestRide Buildings": "Buildings",
    "WestRide Street Prob": "Street Objects",
    "WestRide Trees/Rocks/Nature": "Nature",
    "Westdrive Crosswalks": "Crosswalks",
    "Westdrive Static NPCs": "Static Pedestrians",
}

# x, y, and z axis direction vectors
X_DIR = np.array([1, 0, 0])
Y_DIR = np.array([0, 1, 0])
Z_DIR = np.array([0, 0, 1])

# progress bar customized format
B_FORMAT = """📄 {n_fmt} of {total_fmt} {desc} processed: {bar} 
            {percentage:3.0f}% ⏱️{elapsed} ⏳{remaining} ⚙️{rate_fmt}{postfix}"""

CORES = cpu_count()  # number of cpu threads for multiprocessing
print(f"Total CPU threads: {CORES}")

Total CPU threads: 16


__Helper functions__

In [3]:
def pbar_fork_hack(l=None):
    """
    Hack to enforce progress bars to be displayed by fork processes on
    IPython Apps like Jupyter Notebooks.

    Avoids [IPKernelApp] WARNING | WARNING: attempted to send message from fork

    Important: pass this function as argument for the initializer parameter
    while initializing a multiprocessing pool to make it work. E.g.:

    pool = Pool(processes=N_CORES, initializer=pbar_fork_hack)

    Source:
     - https://github.com/ipython/ipython/issues/11049#issue-306086846
     - https://github.com/tqdm/tqdm/issues/485#issuecomment-473338308
    """
    if l:
        global lock
        lock = l
    print(" ", end="", flush=True)


def angle(a, b):
    """
    Angle (in degrees) between two vectors.

    Given two 3D vectors (a and b):
    a = [xa, ya, za] , b = [xb, yb, zb]

    And the basic geometric formula for the dot product:
    a · b = |a| * |b| * cos(α) => α = arccos[(a · b) / (|a| * |b|)]

    In other words:
    angle = arccos[(xa * xb + ya * yb + za * zb) / (√(xa2 + ya2 + za2) * √(xb2 + yb2 + zb2))]

    Source: https://www.omnicalculator.com/math/angle-between-two-vectors

    Parameters:
        a (numpy.array): First vector
        b (numpy.array): Second vector
    Returns:
        float: Angle between vectors a and b in degrees.
    """
    # a · b
    dotp = np.dot(a, b)

    # magnitude vectors |a| and |b|
    m_a = np.linalg.norm(a)
    m_b = np.linalg.norm(b)

    # |a| * |b|
    m_prod = m_a * m_b

    # (a * b) / (|a| * |b|)
    res = dotp / m_prod

    # α = arccos[(a · b) / (|a| * |b|)]
    an = np.arccos(res)

    # calculated angle from radians to degrees
    return np.degrees(an)


def parse_cordinates(cords):
    """
    Get N cordinate values given the string representation "(x, y, z, w...)".

    Parameters:
        cords (string): Cordinates expressed as (x, y, z, w...)
    Returns:
        list: Parsed cordinates, N float values.
    """
    # remove parenthesis from string
    cords = cords.replace("(", "").replace(")", "")
    # split string by comma, cast values as floats
    return [float(v) for v in cords.split(",")]

__Read participants cleaned data__

In [4]:
parts = pd.read_csv("./participants_clean.csv")
parts = parts.set_index("id")  # set uid column as index (remove default)
parts

Unnamed: 0_level_0,date,expo,side,condition,questionnaire,nulls_%
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4f583872ffed48249874b00f3d389cfc,2019-05-14 13:21:00,MSW,right,TaxiDriver,No,1.610
33d68ad55ef544fab9fd08108e3913ca,2019-05-14 16:44:00,MSW,right,AVAS,No,0.037
8598a83e2af441b8bcd0ae5d84beb875,2019-05-14 16:51:00,MSW,right,RadioTalk,No,0.037
37d7e67934974217830bb429bba7fd76,2019-05-14 16:54:00,MSW,left,AVAS,No,0.936
42bac596059749b5b8e8e83ae61de9b4,2019-05-14 16:56:00,MSW,right,TaxiDriver,No,0.225
...,...,...,...,...,...,...
cfe9482181f74f80b88cd4b1c048ab94,2019-12-30 15:29:00,BMBF,right,AVAS,No,0.300
611d35a7c3e940cc82495e53c2a8532d,2020-01-03 16:12:00,BMBF,right,TaxiDriver,No,0.674
3b6fda285d9e412eb081986b2f22a2e3,2020-01-03 16:13:00,BMBF,left,AVAS,No,1.984
18ffb0abdc8642098c479380bfa533d1,2020-01-03 16:15:00,BMBF,left,RadioTalk,Yes,0.150


Participant data example to process

In [5]:
selected = parts.iloc[0]
uid = pd.Index(selected).name

file = f"Path_ MS Wissenschaft-{selected.condition}-{uid}.csv"
df = pd.read_csv(f"{PATH_RAYC}/{file}")
df = df.set_index("frameNumber")
df

Unnamed: 0_level_0,trackerPosition,trackerRotation,noseVector,hitObjectNames,hitObjectPositions,hitPositionOnObjects,hitObjectGroups
frameNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
400,"(-193.8, 102.3, -131.8)","(0.0, 0.7, 0.1, 0.7)","(1.0, 0.2, 0.1)","Start, tree_2 (10), Sky_side","(-166.9, 99.11023, -132), (74.1333, 114.3709, ...","(-177.7123, 104.7654, -130.4461), (73.2329, 14...","Start, WestRide Trees/Rocks/Nature, Sky"
401,"(-193.8, 102.3, -131.8)","(0.0, 0.7, 0.1, 0.7)","(1.0, 0.2, 0.1)","Start, tree_2 (10), Sky_side","(-166.9, 99.11023, -132), (74.1333, 114.3709, ...","(-177.7123, 104.8219, -130.527), (73.2329, 144...","Start, WestRide Trees/Rocks/Nature, Sky"
402,"(-193.8, 102.3, -131.8)","(0.0, 0.7, 0.1, 0.7)","(1.0, 0.2, 0.1)","Start, tree_2 (10), Sky_side","(-166.9, 99.11023, -132), (74.1333, 114.3709, ...","(-177.7123, 104.7937, -130.5624), (73.77478, 1...","Start, WestRide Trees/Rocks/Nature, Sky"
403,"(-193.7, 102.3, -131.8)","(0.0, 0.7, 0.1, 0.7)","(1.0, 0.2, 0.1)","Start, Sky_side","(-166.9, 99.11023, -132), (775.8169, 268.5637,...","(-177.7123, 104.7296, -130.5805), (775.8169, 2...","Start, Sky"
404,"(-193.7, 102.3, -131.8)","(0.0, 0.7, 0.1, 0.7)","(1.0, 0.2, 0.1)","Start, Sky_side","(-166.9, 99.11023, -132), (775.8169, 268.5637,...","(-177.7123, 104.6884, -130.6121), (775.8169, 2...","Start, Sky"
...,...,...,...,...,...,...,...
3066,"(-41.4, 102.2, -199.3)","(0.0, 0.8, 0.0, -0.7)","(-1.0, 0.0, -0.1)","Sky_side, 3841_combined_LB_building-2 [Legacy...","(-769.1831, 268.5637, -11.36621), (-89.89999, ...","(-769.1831, 101.5359, -305.9646), (-71.96298, ...","Sky, WestRide Buildings"
3067,"(-41.7, 102.2, -199.3)","(0.0, 0.8, 0.0, -0.7)","(-1.0, 0.0, -0.1)","Sky_side, 3841_combined_LB_building-2 [Legacy...","(-769.1831, 268.5637, -11.36621), (-89.89999, ...","(-769.1831, 101.6931, -300.4803), (-71.96298, ...","Sky, WestRide Buildings"
3068,"(-41.9, 102.2, -199.3)","(0.0, 0.8, 0.0, -0.7)","(-1.0, 0.0, -0.1)","Sky_side, 3841_combined_LB_building-2 [Legacy...","(-769.1831, 268.5637, -11.36621), (-89.89999, ...","(-769.1831, 101.9324, -297.968), (-71.96298, 1...","Sky, WestRide Buildings"
3069,"(-42.1, 102.2, -199.3)","(0.0, 0.8, 0.0, -0.7)","(-1.0, 0.0, -0.1)","Sky_side, 3841_combined_LB_building-2 [Legacy...","(-769.1831, 268.5637, -11.36621), (-89.89999, ...","(-769.1831, 101.8583, -293.1292), (-71.96298, ...","Sky, WestRide Buildings"


In [6]:
def process_hits(uid):
    """
    Process raw raycasted hit data as hit origin and information
    separately given a participant UID:

    - Disentangle origin and direction coordinates
    - Disentangle object and group names
    - Calculate direction (nose vector) angles and hit distances
    - Order object hits within the same frame by distance
    - Rename/rearrange object groups
    - Fix objects belonging to wrong group
    - Store as processed hit origin and hit info

    Parameters:
        uid (str): Participant UID to process.
    """

    cond = parts.loc[uid].condition
    file = f"Path_ MS Wissenschaft-{cond}-{uid}.csv"
    df_part = pd.read_csv(f"{PATH_RAYC}/{file}", keep_default_na=False)

    # # temporary fix (null object name and group -> Sky)
    # idxs = pd.isnull(df_part.hitObjectNames)
    # sky_fix = df_part.loc[idxs]
    # cols = [
    #     "hitObjectNames",
    #     "hitObjectGroups",
    #     "hitObjectPositions",
    #     "hitPositionOnObjects",
    # ]
    # sky_fix[cols] = ["Sky", "Sky", "(0, 0, 0)", "(0, 0, 0)"]
    # df_part.loc[idxs] = sky_fix

    # generate the new dict (=> dataFrame) structure
    orgs = {c: [] for c in HIT_ORIGIN}
    hits = {c: [] for c in HIT_INFO}

    # df indices (all frames)
    frames = df_part.index.tolist()

    for f in frames:  # iterate over frames

        cur = df_part.loc[f]  # current row (frame)
        # parse and store origin
        origin = cur.trackerPosition
        origX, origY, origZ = parse_cordinates(origin)
        # parse and store rotation
        rotation = cur.trackerRotation
        rotX, rotY, rotZ, rotW = parse_cordinates(rotation)
        # parse and store nose vector (direction)
        direction = cur.noseVector
        directX, directY, directZ = parse_cordinates(direction)
        noseVector = np.array([directX, directY, directZ])
        # calculate angles between nose vector and X-Y-Z axes
        x_angle = angle(noseVector, X_DIR)
        y_angle = angle(noseVector, Y_DIR)
        z_angle = angle(noseVector, Z_DIR)

        # store hit origin info
        orgs["frame"].append(f)
        orgs["originX"].append(origX)
        orgs["originY"].append(origY)
        orgs["originZ"].append(origZ)
        orgs["rotationX"].append(rotX)
        orgs["rotationY"].append(rotY)
        orgs["rotationZ"].append(rotZ)
        orgs["rotationW"].append(rotW)
        orgs["directionX"].append(directX)
        orgs["directionY"].append(directY)
        orgs["directionZ"].append(directZ)
        # store horizontal and vertical angles
        orgs["angleX"].append(x_angle)
        orgs["angleY"].append(y_angle)
        orgs["angleZ"].append(z_angle)

        # get hit names to check hits in frame
        names = cur.hitObjectNames
        groups = cur.hitObjectGroups
        hit_pos = cur.hitPositionOnObjects
        obj_pos = cur.hitObjectPositions
        if names == "null":  # no hit
            names = [None]
            groups = [None]
            dist = None
            objX, objY, objZ = [None, None, None]
            hitX, hitY, hitZ = [None, None, None]
        elif "," not in names:  # single hit
            # encapsulate single values on lists
            names = [names]
            groups = [groups]
            obj_pos = [obj_pos]
            hit_pos = [hit_pos]
        else:  # more than 1 hit on the same frame
            # parse object names (also remove left and right whitespaces)
            names = [n.strip() for n in names.split(", ")]
            # parse object groups (also remove left and right whitespaces)
            groups = [g.strip() for g in groups.split(", ")]
            # parse object and hit positions (3D cordinate groups)
            obj_pos = obj_pos.split("),")
            hit_pos = hit_pos.split("),")

        n_objects = len(names)
        for i in range(n_objects):  # for each object
            # get name and group
            name = names[i]
            group = groups[i]
            if name:
                # parse and store object and hit position cordinates
                objX, objY, objZ = parse_cordinates(obj_pos[i])
                hitX, hitY, hitZ = parse_cordinates(hit_pos[i])
                # calculate hit distance
                orig = np.array([origX, origY, origZ])
                hit = np.array([hitX, hitY, hitZ])
                dist = np.linalg.norm(hit - orig)
            # store frame number, origin, and direction values
            hits["frame"].append(f)
            # store object name, group, and distance
            hits["name"].append(name)
            hits["group"].append(group)
            hits["distance"].append(dist)
            # store object and hit cordinates
            hits["objectX"].append(hitX)
            hits["objectY"].append(hitY)
            hits["objectZ"].append(hitZ)
            hits["hitX"].append(hitX)
            hits["hitY"].append(hitY)
            hits["hitZ"].append(hitZ)

    # dict to df
    hits = pd.DataFrame(hits)
    orgs = pd.DataFrame(orgs)
    orgs = orgs.set_index("frame")

    # rename/rearrange group names
    hits.group = hits.group.replace(GRP_REPLACE)

    # Specific objects in wrong group fixes (car objects inside events and
    # street objects)
    # select indices (True/False) with dynamic car on "Events"
    car_event = (hits.group == "Events") & (hits.name == "U_SUV03(Clone)")
    # if any dynamic car on "Events" set group to "Dynamic Cars"
    if any(car_event):
        # for some strange reason the values of the column name "group" cannot
        # be modified using a selection of rows. But by modifying it on a copy
        # and then reassigning it to the original selection the change works.
        sel = hits[car_event]
        sel.group = "Dynamic Cars"
        hits[car_event] = sel
    # select indices (True/False) with static car on "Street Objects"
    # object group selection
    str_obj = hits.group == "Street Objects"
    # object name selection
    car_in = hits.name.str.contains("car (", regex=False, na=False)
    car_str_obj = str_obj & car_in  # combined selection (both conditions)
    # if any static car on "Street Objects" set group to "Static Cars"
    if any(car_str_obj):
        # same trick as above
        sel = hits[car_str_obj]
        sel.group = "Static Cars"
        hits[car_str_obj] = sel

    # ensure all hit objects are ordered by distance whitin a frame.
    new_idxs = []  # to collect new ordered indices
    for f in frames:  # iterate over frames
        cur = hits[hits.frame == f]  # select rows with current frame
        idxs = cur.index.tolist()  # unordered indices
        cur = cur.sort_values(by="distance")  # sort series
        ordered = cur.index.tolist()  # ordered indices
        for idx in ordered:  # iterate over ordered indices
            new_idxs.append(idx)  # store them

    # reorder dataframe according to the indices stored
    hits = hits.reindex(new_idxs)

    # store into CSV
    orgs.to_csv(f"./data/processed/origins/{uid}.csv")
    hits.to_csv(f"./data/processed/hits/{uid}.csv", index=False)


# initialize a multiprocessing pool with the available cpu cores
pool = Pool(processes=CORES)

# participants uids to process
uids = parts.index.tolist()

# participants progress bar
parts_progress = tqdm(
    iterable=pool.imap_unordered(func=process_hits, iterable=uids),
    total=len(uids),
    desc="📂 participants",
    dynamic_ncols=True,
    bar_format=B_FORMAT,
)

# loop necessary for displaying properly the progressbar with multiprocessing
# source: https://stackoverflow.com/a/40133278
for _ in parts_progress:
    pass

# close pool instance, no more work to submit
pool.close()
# wait for the worker processes to terminate
pool.join()

📄 0 of 24356 📂 participants processed:                                                                        …