In [11]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split

In [12]:
df = pd.read_csv("/Users/anzhunie/Desktop/local_Pedestrian_Training/entire_dataset_with_cluster_masked.csv")
df = df.dropna(subset=['ID'])  

In [None]:
traj_length = 15
grouped = df.groupby(['ID', 'Trajectory'])
trajectory_features = ['Positionx', 'Positiony', 'Distance', 
                       'Speed', 'Speed Change', 'Direction', 'Direction Change']

In [14]:
data = []
total_segments = 0

for (pid, traj_id), group in grouped:
    group = group.sort_values(by='Time')

    if group.shape[0] < traj_length:
        continue
    

    num_segments = len(group) - traj_length + 1
    total_segments += num_segments
    print(f"ID {pid}, Trajectory {traj_id}: {num_segments} segments")

print(f"TOTAL SEGMENTS GENERATED: {total_segments}")


ID 40.0, Trajectory 3.0: 9 segments
ID 40.0, Trajectory 4.0: 6 segments
ID 40.0, Trajectory 6.0: 2 segments
ID 40.0, Trajectory 8.0: 6 segments
ID 40.0, Trajectory 9.0: 29 segments
ID 40.0, Trajectory 16.0: 11 segments
ID 41.0, Trajectory 3.0: 11 segments
ID 41.0, Trajectory 6.0: 3 segments
ID 41.0, Trajectory 7.0: 8 segments
ID 41.0, Trajectory 8.0: 5 segments
ID 41.0, Trajectory 9.0: 1 segments
ID 41.0, Trajectory 10.0: 8 segments
ID 41.0, Trajectory 11.0: 3 segments
ID 41.0, Trajectory 12.0: 6 segments
ID 41.0, Trajectory 13.0: 5 segments
ID 41.0, Trajectory 14.0: 1 segments
ID 41.0, Trajectory 15.0: 2 segments
ID 41.0, Trajectory 16.0: 2 segments
ID 41.0, Trajectory 17.0: 1 segments
ID 41.0, Trajectory 20.0: 2 segments
ID 41.0, Trajectory 21.0: 2 segments
ID 41.0, Trajectory 22.0: 2 segments
ID 41.0, Trajectory 24.0: 2 segments
ID 41.0, Trajectory 25.0: 6 segments
ID 41.0, Trajectory 26.0: 2 segments
ID 43.0, Trajectory 3.0: 3 segments
ID 43.0, Trajectory 4.0: 1 segments
ID 43.0, T

In [15]:

data = []
total_segments = 0

for (pid, traj_id), group in grouped:
    group = group.sort_values(by='Time')

    if group.shape[0] < traj_length:
        continue

    cluster = int(group['Cluster'].iloc[0])

    traj_np = group[trajectory_features].values
    up_np    = group['Up'].values
    right_np = group['Right'].values
    down_np  = group['Down'].values
    left_np  = group['Left'].values

    for start in range(len(traj_np) - traj_length + 1):
        segment = traj_np[start:start + traj_length]
        up_seg    = up_np[start:start + traj_length]
        right_seg = right_np[start:start + traj_length]
        down_seg  = down_np[start:start + traj_length]
        left_seg  = left_np[start:start + traj_length]

        sample = {
            'trajectory': torch.tensor(segment, dtype=torch.float32),  
            'up':    torch.tensor(up_seg, dtype=torch.float32),        
            'right': torch.tensor(right_seg, dtype=torch.float32),
            'down':  torch.tensor(down_seg, dtype=torch.float32),
            'left':  torch.tensor(left_seg, dtype=torch.float32),
            'cluster': cluster,
            'id': int(pid)
        }

        data.append(sample)


torch.save(data, "social_lstm_structured.pt")
print(f"Saved {len(data)} trajectory segments.")

# train and test
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# 保存为两个 pt 文件
torch.save(train_data, "train_social_lstm.pt")
torch.save(test_data, "test_social_lstm.pt")

print(f"Train: {len(train_data)} samples, Test: {len(test_data)} samples.")


Saved 7385 trajectory segments.
Train: 5908 samples, Test: 1477 samples.


In [16]:
from collections import Counter
labels = [d['cluster'] for d in data]
print(Counter(labels))


Counter({1: 4610, 2: 2775})


In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

def evaluate_classifier(model, test_data):
    y_true = []
    y_pred = []

    model.eval()
    with torch.no_grad():
        for sample in test_data:
            traj = sample['trajectory'].unsqueeze(0)  # [1, 15, D]
            neighbors = torch.stack([
                sample['up'], sample['right'],
                sample['down'], sample['left']
            ], dim=-1).unsqueeze(0)  # [1, 15, 4]

            label = sample['cluster']
            logits = model(traj, neighbors)
            pred = logits.argmax(dim=1).item()

            y_true.append(label)
            y_pred.append(pred)

    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Classification Report:")
    print(classification_report(y_true, y_pred, digits=3))
    print(" Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
