In [8]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("/Users/anzhunie/Desktop/Pedestrian_Training/Prediction/dataset_with_cluster_masked.csv")
df = df.dropna(subset=['ID'])  

In [3]:
df

Unnamed: 0,Time,ID,Positionx,Positionz,Positiony,Yaw,Up,Right,Down,Left,Trajectory,Distance,Speed,Direction,Cluster,Speed Change,Direction Change,exp_num,Crowd_Radius_Label
32,16.0,78.0,-4.75,0.0,-8.80,0.861796,0.0,1.0,0.0,0.0,2.0,10.00,,,2.0,,,1.0,Outside
33,16.5,78.0,-4.28,0.0,-8.76,0.869519,0.0,0.0,1.0,0.0,2.0,9.75,0.94,0.08,2.0,,,1.0,Outside
34,17.0,78.0,-4.30,0.0,-8.61,0.935340,0.0,0.0,0.0,1.0,2.0,9.62,0.30,1.70,2.0,-0.64,1.62,1.0,Outside
35,17.5,78.0,-3.74,0.0,-8.67,1.005221,0.0,0.0,0.0,0.0,2.0,9.44,1.13,-0.11,2.0,0.83,-1.81,1.0,Outside
36,18.0,78.0,-3.57,0.0,-8.48,1.042301,0.0,0.0,0.0,0.0,2.0,9.20,0.51,0.84,2.0,-0.62,0.95,1.0,Outside
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74414,657.0,170.0,-0.70,0.0,-5.42,-4.574143,0.0,0.0,0.0,0.0,27.0,5.47,0.22,1.75,1.0,-2.00,1.75,3.0,Outside
74415,657.5,170.0,-0.50,0.0,-4.20,1.407027,0.0,0.0,0.0,0.0,27.0,4.23,2.47,1.41,1.0,2.25,-0.34,3.0,Outside
74416,658.0,170.0,-0.46,0.0,-3.84,1.252944,0.0,0.0,0.0,0.0,27.0,3.87,0.72,1.46,1.0,-1.75,0.05,3.0,Outside
74417,658.5,170.0,-0.44,0.0,-3.77,0.672055,0.0,0.0,0.0,0.0,27.0,3.80,0.15,1.29,1.0,-0.57,-0.17,3.0,Outside


In [4]:
def get_neighbors_tensor(df, time_seq, target_id, traj_id, exp_num, max_neighbors=5):
    neighbor_features = ['Positionx', 'Positiony', 'Distance', 
                         'Speed', 'Speed Change', 'Direction', 'Direction Change']

    T = len(time_seq)
    F = len(neighbor_features)
    N = max_neighbors

    neighbors_tensor = torch.zeros(T, N, F)
    neighbor_mask = torch.zeros(T, N)

    df = df.copy()
    df['Time_rounded'] = df['Time'].round(2)

    for t_idx, t in enumerate(time_seq):
        t_round = round(t, 2)

        # Frame of current time and experiment
        frame = df[(df['Time_rounded'] == t_round) & (df['exp_num'] == exp_num)]
        frame = frame.dropna(subset=neighbor_features + ['ID', 'Trajectory'])

        # Get target agent's position
        target_row = df[
            (df['Time_rounded'] == t_round) & 
            (df['ID'] == target_id) &
            (df['Trajectory'] == traj_id) &
            (df['exp_num'] == exp_num)
        ]
        if target_row.empty:
            continue

        target_x = target_row.iloc[0]['Positionx']
        target_y = target_row.iloc[0]['Positiony']

        # Exclude target and sort neighbors
        frame = frame[frame['ID'] != target_id]
        frame.loc[:, 'dist_to_target'] = np.sqrt(
            (frame['Positionx'] - target_x)**2 +
            (frame['Positiony'] - target_y)**2
        )
        frame = frame.sort_values(by='dist_to_target').head(N)

        for n_idx, (_, row) in enumerate(frame.iterrows()):
            neighbor_id = row['ID']
            neighbor_traj = row['Trajectory']

            neighbor_segment = df[
                (df['ID'] == neighbor_id) &
                (df['Trajectory'] == neighbor_traj) &
                (df['exp_num'] == exp_num) &
                (df['Time_rounded'].isin([round(x, 2) for x in time_seq]))
            ]
            step = neighbor_segment[neighbor_segment['Time_rounded'] == t_round]
            if not step.empty:
                neighbors_tensor[t_idx, n_idx] = torch.tensor(
                    step[neighbor_features].values[0], dtype=torch.float32
                )
                neighbor_mask[t_idx, n_idx] = 1.0

    return neighbors_tensor, neighbor_mask
  

In [5]:
def get_drop_count(feature_list):
    drop_1 = {'Speed', 'Direction'}
    drop_2 = {'Speed Change', 'Direction Change'}

    count = 0
    if any(f in feature_list for f in drop_1):
        count = 1
    if any(f in feature_list for f in drop_2):
        count = 2
    return count


In [6]:
traj_length = 15
grouped = df.groupby(['ID', 'Trajectory','exp_num'])
trajectory_features = ['Positionx', 'Positiony', 'Distance', 
                       'Speed', 'Speed Change', 'Direction', 'Direction Change']

In [9]:
drop_count = get_drop_count(trajectory_features)

data = []

for (pid, traj_id, exp_num), group in grouped:
    group = group.sort_values(by='Time')
    if group.shape[0] < traj_length + drop_count:
        continue

    cluster = int(group['Cluster'].iloc[0])
    traj_np = group[trajectory_features].values
    time_seq = group['Time'].tolist()

    for start in range(drop_count, len(traj_np) - traj_length + 1 + drop_count):
        segment = traj_np[start:start + traj_length]
        time_segment = time_seq[start:start + traj_length]
        traj_tensor = torch.tensor(segment, dtype=torch.float32)

        neighbors_tensor, neighbor_mask = get_neighbors_tensor(
            df, time_segment, pid, traj_id, exp_num
        )

        sample = {
            'trajectory': traj_tensor,
            'neighbors': neighbors_tensor,
            'neighbor_mask': neighbor_mask,
            'cluster': cluster,
            'id': int(pid),
            'exp_num': int(exp_num)
        }

        data.append(sample)


In [10]:
# Save to .pt
torch.save(data, "/Users/anzhunie/Desktop/Pedestrian_Training/Prediction/social_lstm_with_neighbors_full.pt")

In [11]:
# train and test
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Save to .pt
torch.save(train_data, "/Users/anzhunie/Desktop/Pedestrian_Training/Prediction/train_social_lstm_full.pt")
torch.save(test_data, "/Users/anzhunie/Desktop/Pedestrian_Training/Prediction/test_social_lstm_full.pt")

print(f"Train: {len(train_data)} samples, Test: {len(test_data)} samples.")


Train: 5725 samples, Test: 1432 samples.


In [12]:
from collections import Counter
labels = [d['cluster'] for d in data]
print(Counter(labels))


Counter({1: 4443, 2: 2714})
