In [3]:
import os
import pandas as pd
import numpy as np
import torch
import tsl

from torch_geometric.data import Data
from tsl.data import SpatioTemporalDataset
from tsl.datasets import DatetimeDataset

In [4]:
# additional things to possibly implement
#       - sinusoidal time encoding for both daily and weekly patterns
#       - one hot encoding for day of week
#       - links for distance between nodes or distance attributes in movement links (but might be already captured by node coordinates)

# things to still implement
#       - morphological node attributes
#       - weather node attributes
#       - better grouping of positions into nodes


In [26]:
#importing the node and position link csvs
base_dir = 'D:/Desktop/MSc Thesis - Copy/'

nodes_df = pd.read_csv(os.path.join(base_dir, 'shapefiles/_network_parts_1/nodes.csv'), header = 0)
positions_links_df = pd.read_csv(os.path.join(base_dir, 'shapefiles/_network_parts_1/positions_links.csv'), header = 0)




In [27]:
nodes_df.drop(columns=['geometry'], inplace=True)
for col in nodes_df.select_dtypes(include=['bool']).columns:
    nodes_df[col] = nodes_df[col].astype(int)

#converting time stamps
positions_links_df["merged_datetime"] = pd.to_datetime(positions_links_df['merged_datetime'], format='ISO8601').astype(int) // 10**9    # converting to Unix timestamp
positions_links_df["timestamp"] = positions_links_df["merged_datetime"].astype(float)                                                   # copying the timestamp to a new column
#positions_links_df.set_index('merged_datetime', inplace=True)                                                                           # setting time as index


positions_links_df['id_participant'].astype(int)
positions_links_df["node_id"] = positions_links_df["node_id"].astype(int)
positions_links_df["prev_node_id"] = positions_links_df["prev_node_id"].astype(int)

In [28]:
edge_index = torch.tensor(
    np.vstack([
        positions_links_df["prev_node_id"].values,
        positions_links_df["node_id"].values
    ]),
    dtype=torch.long
)

edge_attr = torch.tensor(
    np.vstack([
        positions_links_df['id_participant'].values,
        positions_links_df['speed'].values
    ]).T,
    dtype=torch.float
)

In [29]:

node_features = torch.tensor(nodes_df.drop(columns=['node_id']).values, dtype=torch.float)


In [30]:
#initializing the target as an three dimensional array 
target = target = np.zeros((len(positions_links_df), len(nodes_df), node_features.shape[1]))

for i, row in positions_links_df.iterrows():
    node_idx = int(row["node_id"])
    target[i, node_idx, :] = node_features[node_idx].numpy()

target = torch.tensor(target, dtype=torch.float)



In [32]:
print("Edge index shape:", edge_index.shape)
print("Unique nodes in edge_index:", torch.unique(edge_index).shape[0])

Edge index shape: torch.Size([2, 13489])
Unique nodes in edge_index: 3823


In [34]:
test_dataset_st = SpatioTemporalDataset(
    target = target[:, :torch.unique(edge_index).shape[0], :],  # ✅ Now includes time, nodes, and features
    index=positions_links_df["merged_datetime"],
    connectivity=(edge_index, torch.ones(edge_index.shape[1])),  # ✅ Graph structure
    window=12,
    horizon=1
)

In [35]:
print (test_dataset_st)

SpatioTemporalDataset(n_samples=13477, n_nodes=3823, n_channels=5)


In [None]:
# number of steps to predict
k = 5

target_columns = [column for column in positions_links_df.columns if column.startswith]

In [10]:
from torch_geometric.utils import dense_to_sparse

# Convert `edge_index` and `edge_attr` into a valid adjacency matrix
adj_matrix = torch.zeros((len(nodes_df), len(nodes_df)))
adj_matrix[edge_index[0], edge_index[1]] = edge_attr[:, 0]  # Assuming first edge attribute is weight

# Convert to sparse format
sparse_edge_index, sparse_edge_attr = dense_to_sparse(adj_matrix)

# ✅ Create dataset with correct `connectivity`
dataset_st = SpatioTemporalDataset(
    target=positions_links_df["node_id"].values,
    index=positions_links_df["merged_datetime"],
    connectivity=(sparse_edge_index, sparse_edge_attr),  # ✅ Graph structure with edge attributes
    covariates={  # ✅ Include node and edge attributes here
        "participant_id": positions_links_df["id_participant"].values,
        "speed": positions_links_df["speed"].values,
        "node_features": node_features.numpy(),  # ✅ Add node features here
    },
    window=12,
    horizon=1
)

NameError: name 'node_features' is not defined

In [None]:
node_features = torch.tensor(nodes_df[['node_x', 'node_y']].values, dtype=torch.long)
edge_index = torch.tensor([positions_links_df['prev_node_id'].values, positions_links_df['node_id'].values], dtype=torch.long)
edge_attr = torch.tensor(
    np.vstack([
        positions_links_df['id_participant'].values,
        positions_links_df['timestamp'].values,
        positions_links_df['speed'].values
    ]).T,
    dtype=torch.float
)


In [None]:
DatetimeDataset.similarity_options.add(None)

target = positions_links_df[['node_id']].astype(int)
target.index = pd.MultiIndex.from_frame(positions_links_df[['merged_datetime', 'node_id']]) 

dataset_ts = DatetimeDataset(
    target=positions_links_df[['node_id']],  
    covariates={},                                  # i can put the node weather values and the sinusoidal time encoding here
    freq=None,
    sort_index=True,
    force_synchronization=True,                     # might need to look into this
    similarity_score=None
)


In [None]:
print(dataset_ts)
print("Number of nodes in dataset:", dataset_ts.n_nodes)

print("\nTarget Preview:")
print(dataset_ts.target.head())

print("\nCovariates Keys:")
print(dataset_ts.covariates.keys())

print("\nFirst Few Time Steps:")
print(dataset_ts.index[:5])

In [None]:
target = positions_links_df[['node_id']]

print(target.index[:5])  # Should show timestamps
print(target.columns)  # Should show ['node_id']
print(target.shape)  # Should match expected (num_time_steps, num_features)

In [None]:
print(dataset_ts.target.shape)
print("Unique nodes in dataset:", positions_links_df["node_id"].nunique())


