In [9]:
import pandas as pd
import numpy as np
import os

In [12]:
path = '/Users/anzhunie/Desktop/Pedestrian_Training/df_all.csv'

In [11]:
df = pd.read_csv(path)

In [30]:
def label_crowd_radius(file_path):
    crowd_radius = 2.58

    # Read the CSV file
    df = pd.read_csv(file_path)

    # Convert only the numeric columns to numeric values, handling invalid entries
    numeric_columns = df.select_dtypes(include=['object']).columns  # Get object columns (likely text)
    df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')  # Coerce invalid values to NaN

    # Initialize 'Crowd_Radius_Label' column as NaN
    df['Crowd_Radius_Label'] = np.nan

    # Iterate over each unique (ID, Trajectory) pair, excluding NaN IDs
    for (participant_id, trajectory) in df[['ID', 'Trajectory']].dropna().drop_duplicates().itertuples(index=False):
        # Extract the subset for this (ID, Trajectory) pair
        trajectory_section = df[(df['ID'] == participant_id) & (df['Trajectory'] == trajectory)].copy()

        # Step 1: Label all 'Inside' points (Distance ≤ crowd_radius)
        df.loc[trajectory_section.index[trajectory_section['Distance'] <= crowd_radius], 'Crowd_Radius_Label'] = 'Inside'

        # Step 2: Label all 'Outside' points (Distance > crowd_radius)
        df.loc[trajectory_section.index[trajectory_section['Distance'] > crowd_radius], 'Crowd_Radius_Label'] = 'Outside'

        # Step 3: Find the last 'Outside' index for this (ID, Trajectory)
        last_outside_idx = (
            trajectory_section[trajectory_section['Distance'] > crowd_radius].index[-1] 
            if not trajectory_section[trajectory_section['Distance'] > crowd_radius].empty 
            else None
        )

        # Step 4: Change all 'Inside' points before the last 'Outside' to 'Inside-Out'
        if last_outside_idx is not None:
            # Apply both conditions together to avoid index mismatches
            inside_before_last_outside = trajectory_section.loc[
                (trajectory_section.index <= last_outside_idx) & (trajectory_section['Distance'] <= crowd_radius)
            ].index

            # Update labels for these points
            df.loc[inside_before_last_outside, 'Crowd_Radius_Label'] = 'Inside-Out'

    for (participant_id, trajectory) in df[['ID', 'Trajectory']].dropna().drop_duplicates().itertuples(index=False):
        # Extract the subset for this (ID, Trajectory) pair
        trajectory_section = df[(df['ID'] == participant_id) & (df['Trajectory'] == trajectory)].copy()

        # Step 1: Check if the first 'Crowd_Radius_Label' is 'Outside'
        first_idx = trajectory_section.index[0]
        if trajectory_section.loc[first_idx, 'Crowd_Radius_Label'] == 'Outside':
            # If the first row is 'Outside', find the first row that is not 'Outside'
            non_outside_rows = trajectory_section[~trajectory_section['Crowd_Radius_Label'].isin(['Outside'])]
            
            # Step 2: Mask all rows after the first non-Outside label except 'Time'
            if not non_outside_rows.empty:
                first_non_outside_idx = non_outside_rows.index.min()
                # Mask all rows from `first_non_outside_idx` onward except 'Time' within the current (ID, Trajectory) group
                cols_to_mask = [col for col in df.columns if col != 'Time']
                df.loc[trajectory_section.loc[first_non_outside_idx:].index, cols_to_mask] = np.nan
        else:
            # Step 3: If the first row is not 'Outside', mask the entire trajectory group except 'Time'
            cols_to_mask = [col for col in df.columns if col != 'Time']
            df.loc[trajectory_section.index, cols_to_mask] = np.nan

    return df  # Return the modified DataFrame with labels


In [31]:
df_all = label_crowd_radius(path)
df_all

Unnamed: 0,Time,ID,Positionx,Positionz,Positiony,Yaw,Up,Right,Down,Left,Trajectory,Distance,Speed,Direction,Speed Change,Direction Change,Cluster,Crowd_Radius_Label
0,0.0,,,,,,,,,,,,,,,,,
1,0.5,,,,,,,,,,,,,,,,,
2,1.0,,,,,,,,,,,,,,,,,
3,1.5,,,,,,,,,,,,,,,,,
4,2.0,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74438,669.0,,,,,,,,,,,,,,,,,
74439,669.5,,,,,,,,,,,,,,,,,
74440,670.0,,,,,,,,,,,,,,,,,
74441,670.5,,,,,,,,,,,,,,,,,


In [33]:
# Count non-NaN rows per (ID, Trajectory) group
retained_rows = df_all.groupby(['ID', 'Trajectory']).apply(lambda x: x.dropna().shape[0])

# Display retained row counts
print(retained_rows)

ID     Trajectory
40.0   2.0           13
       3.0           22
       4.0           19
       5.0           12
       6.0           15
                     ..
283.0  18.0           8
287.0  1.0           16
       2.0           20
       3.0            5
       4.0            7
Length: 1259, dtype: int64
