In [3]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split

In [4]:
df = pd.read_csv("./dataset_with_cluster_masked.csv")
df = df.dropna(subset=['ID'])  
df

Unnamed: 0,Time,ID,Positionx,Positionz,Positiony,Yaw,Up,Right,Down,Left,Trajectory,Distance,Speed,Direction,Cluster,Speed Change,Direction Change,exp_num,Crowd_Radius_Label
32,16.0,78.0,-4.75,0.0,-8.80,0.861796,0.0,1.0,0.0,0.0,2.0,10.00,,,2.0,,,1.0,Outside
33,16.5,78.0,-4.28,0.0,-8.76,0.869519,0.0,0.0,1.0,0.0,2.0,9.75,0.94,0.08,2.0,,,1.0,Outside
34,17.0,78.0,-4.30,0.0,-8.61,0.935340,0.0,0.0,0.0,1.0,2.0,9.62,0.30,1.70,2.0,-0.64,1.62,1.0,Outside
35,17.5,78.0,-3.74,0.0,-8.67,1.005221,0.0,0.0,0.0,0.0,2.0,9.44,1.13,-0.11,2.0,0.83,-1.81,1.0,Outside
36,18.0,78.0,-3.57,0.0,-8.48,1.042301,0.0,0.0,0.0,0.0,2.0,9.20,0.51,0.84,2.0,-0.62,0.95,1.0,Outside
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74414,657.0,170.0,-0.70,0.0,-5.42,-4.574143,0.0,0.0,0.0,0.0,27.0,5.47,0.22,1.75,1.0,-2.00,1.75,3.0,Outside
74415,657.5,170.0,-0.50,0.0,-4.20,1.407027,0.0,0.0,0.0,0.0,27.0,4.23,2.47,1.41,1.0,2.25,-0.34,3.0,Outside
74416,658.0,170.0,-0.46,0.0,-3.84,1.252944,0.0,0.0,0.0,0.0,27.0,3.87,0.72,1.46,1.0,-1.75,0.05,3.0,Outside
74417,658.5,170.0,-0.44,0.0,-3.77,0.672055,0.0,0.0,0.0,0.0,27.0,3.80,0.15,1.29,1.0,-0.57,-0.17,3.0,Outside


In [5]:
# trajectory_features = ['Positionx', 'Positiony', 'Distance', 'Speed', 'Speed Change', 'Direction', 'Direction Change']
trajectory_features = ['Positionx', 'Positiony', 'Direction']

traj_length = 15

df = df.dropna(subset=trajectory_features + ['ID', 'Trajectory', 'Time', 'Cluster', 'exp_num'])

In [6]:
import torch
import numpy as np

def get_neighbors_tensor(df, time_seq, target_id, traj_id, exp_num, features):
    neighbor_features = features

    df = df.copy()

    df[neighbor_features] = df[neighbor_features].apply(pd.to_numeric, errors='coerce')
    df['Time_rounded'] = df['Time'].round(2)

    neighbors_tensor = []
    neighbor_mask = []

    for t in time_seq:
        t_round = round(t, 2)

        frame = df[(df['Time_rounded'] == t_round) & (df['exp_num'] == exp_num)]
        frame = frame.dropna(subset=neighbor_features + ['ID', 'Trajectory'])

        target_row = df[
            (df['Time_rounded'] == t_round) &
            (df['ID'] == target_id) &
            (df['Trajectory'] == traj_id) &
            (df['exp_num'] == exp_num)
        ]

        if target_row.empty:
            neighbors_tensor.append(torch.zeros(0, len(neighbor_features)))
            neighbor_mask.append(torch.zeros(0))
            continue

        frame = frame[frame['ID'] != target_id]

        neighbors_t = []
        mask_t = []

        for _, row in frame.iterrows():
            try:
                values = row[neighbor_features].astype(float).values
                neighbors_t.append(torch.tensor(values, dtype=torch.float32))
                mask_t.append(1.0)
            except (ValueError, TypeError):
                continue

        if neighbors_t:
            neighbors_tensor.append(torch.stack(neighbors_t))  # shape: [N, F]
            neighbor_mask.append(torch.tensor(mask_t))         # shape: [N]
        else:
            neighbors_tensor.append(torch.zeros(0, len(neighbor_features)))
            neighbor_mask.append(torch.zeros(0))

    return neighbors_tensor, neighbor_mask


In [7]:
data = []
grouped = df.groupby(['ID', 'Trajectory', 'exp_num'])

df['Time_rounded'] = df['Time'].round(2)

for (pid, traj_id, exp_num), group in grouped:
    group = group.sort_values(by='Time')
    traj_np = group[trajectory_features].values
    time_seq = group['Time'].tolist()

    total_len = len(traj_np)
    num_segments = total_len // traj_length

    for seg_idx in range(num_segments):
        start = seg_idx * traj_length
        end = start + traj_length

        segment = traj_np[start:end]
        time_segment = time_seq[start:end]

        if len(segment) < traj_length:
            continue

        traj_tensor = torch.tensor(segment, dtype=torch.float32)

        neighbors_tensor, neighbor_mask = get_neighbors_tensor(
            df, time_segment, pid, traj_id, exp_num, features = trajectory_features
        )

        sample = {
            'trajectory': traj_tensor,
            'neighbors': neighbors_tensor,
            'neighbor_mask': neighbor_mask,
            'cluster': int(group['Cluster'].iloc[0]),
            'id': int(pid),
            'exp_num': int(exp_num)
        }

        data.append(sample)


print(f"Saved {len(data)} segments to social_lstm_with_neighbors_dir.pt")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Time_rounded'] = df['Time'].round(2)


Saved 811 segments to social_lstm_with_neighbors_dir.pt


In [8]:
# Save to .pt
torch.save(data, "./social_lstm_with_neighbors_dir.pt")

In [9]:
# train and test
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Save to .pt
torch.save(train_data, "./train_social_lstm_dir.pt")
torch.save(test_data, "./test_social_lstm_dir.pt")

print(f"Train: {len(train_data)} samples, Test: {len(test_data)} samples.")


Train: 648 samples, Test: 163 samples.


In [10]:
from collections import Counter
labels = [d['cluster'] for d in data]
print(Counter(labels))


Counter({1: 517, 2: 294})
