In [1]:
import numpy as np
import torch

def load_array(filename, task):
    datapoint = np.load(filename)
    if task == 'task 1':
        initial_state = datapoint['initial_state']
        terminal_state = datapoint['terminal_state']
        return initial_state, terminal_state
    elif task == 'task 2' or task == 'task 3':
        whole_trajectory = datapoint['trajectory']
        # change shape: (num_bodies, attributes, time) ->  num_bodies, time, attributes
        whole_trajectory = np.swapaxes(whole_trajectory, 1, 2)
        initial_state = whole_trajectory[:, 0]
        target = whole_trajectory[:, 1:, 1:]  # drop the first timepoint (second dim) and mass (last dim) for the prediction task
        return initial_state, target
    else:
        raise NotImplementedError("'task' argument should be 'task 1', 'task 2' or 'task 3'!")

In [2]:
#### Create adjacency matrix

# Define distance metrics
def euclidean_distance(x, y):
    return torch.sqrt(torch.sum((x - y)**2))

def inverse_distance(x, y):
    return 1 / euclidean_distance(x, y)

# Create adjacency matrix function
def create_adjacency_matrix(data, distance_metric):
    n = data.shape[0]
    adjacency_matrix = torch.zeros((n, n))
    for i in range(n):
        for j in range(n):
            if i != j:  # we don't calculate the distance of the object to itself
                # we extract the position [x, y] for both objects i and j
                position_i = data[i, 1:3]
                position_j = data[j, 1:3]
                adjacency_matrix[i, j] = distance_metric(position_i, position_j)
    return adjacency_matrix

# Validate input
def validate_input(X, adjacency_matrix):
    # X should be a 2D tensor
    assert X.dim() == 2, f"X must be 2D, but got shape {X.shape}"

    # The number of nodes should be the same in X and the adjacency matrix
    assert X.shape[0] == adjacency_matrix.shape[0] == adjacency_matrix.shape[1], \
        f"Mismatch in number of nodes: got {X.shape[0]} nodes in X, but {adjacency_matrix.shape[0]} nodes in adjacency matrix"

    # The adjacency matrix should be square
    assert adjacency_matrix.shape[0] == adjacency_matrix.shape[1], \
        f"Adjacency matrix must be square, but got shape {adjacency_matrix.shape}"

    print("All checks passed.")


In [3]:
"""
This cell gives an example of loading a datapoint with numpy for task 2 / 3.

The arrays returned by the function are structures as follows:
initial_state: shape (n_bodies, [mass, x, y, v_x, v_y])
remaining_trajectory: shape (n_bodies, time, [x, y, v_x, v_y])

Note that for this task, you are asked to evaluate performance only with regard to the predictions of the positions (x and y).
If you use the velocity of the remaining trajectory for training,
this use should be purely auxiliary for the goal of predicting the positions [x,y] over time. 
While testing performance of your model on the test set, you do not have access to v_x and v_y of the remaining trajectory.

"""

example = load_array('data/task 2_3/train/trajectory_0.npz', task='task 2')

initial_state, remaining_trajectory = example
print(f'shape of initial state (model input): {initial_state.shape}')
print(f'shape of terminal state (to be predicted by model): {remaining_trajectory.shape}')

body_idx = 2
time_idx = 30
print(f'The y-coordinate of the body with index {body_idx} at time with index {time_idx} in remaining_trajectory was {remaining_trajectory[body_idx, time_idx, 1]}')

test_example = load_array('data/task 2_3/test/trajectory_900.npz', task='task 3')
test_initial_state, test_remaining_trajectory = test_example
print(f'the shape of the input of a test data example is {test_initial_state.shape}')
print(f'the shape of the target of a test data example is {test_remaining_trajectory.shape}')
print(f'values of the test data example at time {time_idx}:\n {test_remaining_trajectory[:, time_idx]}')
print('note: velocity values are unobserved (NaNs) in the test data!')

shape of initial state (model input): (8, 5)
shape of terminal state (to be predicted by model): (8, 49, 4)
The y-coordinate of the body with index 2 at time with index 30 in remaining_trajectory was -0.3861544940435097
the shape of the input of a test data example is (8, 5)
the shape of the target of a test data example is (8, 49, 4)
values of the test data example at time 30:
 [[-5.85725792 -5.394571           nan         nan]
 [-6.03781257 -5.72445953         nan         nan]
 [-0.90623054 -6.93416278         nan         nan]
 [ 2.83149339 -7.50100819         nan         nan]
 [-2.85586881  1.77667501         nan         nan]
 [ 4.04424526  4.00563603         nan         nan]
 [-5.24887713 -4.83081005         nan         nan]
 [-5.81391023 -5.1109838          nan         nan]]
note: velocity values are unobserved (NaNs) in the test data!


In [27]:
import torch
import torch.nn as nn
from torch_geometric.nn import SAGEConv

class GraphSAGELSTM(nn.Module):
    def __init__(self, num_features, hidden_channels, num_classes, num_timesteps):
        super(GraphSAGELSTM, self).__init__()
        self.lstm = nn.LSTM(num_features, hidden_channels, batch_first=True)
        self.conv1 = SAGEConv(num_features, hidden_channels*2)
        self.conv2 = SAGEConv(hidden_channels*2, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, num_classes)
        self.dropout = torch.nn.Dropout(p=0.3)
        self.num_timesteps = num_timesteps

    def forward(self, x):
        x, edge_index, batch = x.x, x.edge_index, x.batch

        # 1st GraphSAGE layer
        x = self.conv1(x, edge_index)
        x = torch.relu(x)

        # 2nd GraphSAGE layer
        x = self.conv2(x, edge_index)
        x = torch.relu(x)

        # Reshape to (num_nodes, num_timesteps, hidden_channels)
        x = x.view(batch.shape[0], self.num_timesteps, x.size(1))

        # LSTM layer
        x, _ = self.lstm(x)

        # Reshape back to (num_nodes * num_timesteps, hidden_channels)
        x = x.view(-1, x.size(2))

        # Linear layer
        x = self.linear(x)

        return x


In [5]:
# DataLoader

from torch_geometric.data import Dataset, Data, DataLoader

class MyDataset(Dataset):
    def __init__(self, root, filenames, transform=None, pre_transform=None):
        self.filenames = filenames
        super(MyDataset, self).__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        return self.filenames

    def len(self):
        return len(self.filenames)

    def get(self, idx):
        X, y = load_array(self.filenames[idx], task='task 2')
        X = torch.tensor(X, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)

        adjacency_matrix = create_adjacency_matrix(X, inverse_distance)
        edge_index = adjacency_matrix.nonzero().t()
        #edge_attr = idx/10.0 #Storing the timestamp as attribute of the edge

        data = Data(x=X, y=y, edge_index=edge_index)
        #print(f"X shape: {X.shape}, y shape: {y.shape}, edge_index shape: {edge_index.shape}")
        return data

filenames = [f'data/task 2_3/train/trajectory_{i}.npz' for i in range(900)]
dataset = MyDataset(root='data/task 2/train', filenames=filenames)

# Prepare for validation data set

val_filenames = [f'data/task 2_3/test/trajectory_{i}.npz' for i in range(901, 1000)]
val_dataset = MyDataset(root='data/task 2/test', filenames=val_filenames)
val_dataloader = DataLoader(val_dataset, batch_size=32)

dataloader = DataLoader(dataset, batch_size=32)



In [16]:
val_dataloader.dataset[0].x.shape[0]


9

In [28]:
device="cuda" if torch.cuda.is_available() else "cpu"
#device = "mps" #(I've no mps support on my pc)
hidden_channels = 16
model = GraphSAGELSTM(num_features=5, hidden_channels=hidden_channels, num_classes=2, num_timesteps=50).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.MSELoss()  # we use Mean Squared Error loss for regression tasks


for epoch in range(100):  # run for 100 epochs
    # Training
    model.train()
    for batch in dataloader:
        batch = batch.to(device)  # move batch to the device
        optimizer.zero_grad()  # set gradients to zero
        out = model(batch)  # forward pass
        loss = criterion(out, batch.y)  # compute loss
        loss.backward()  # backward pass (compute gradients)
        optimizer.step()  # update model parameters

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_dataloader:
            batch = batch.to(device)
            out = model(batch)
            val_loss += criterion(out, batch.y).item() * batch.num_graphs

    val_loss /= len(val_dataset)  # compute average validation loss

    print(f'Epoch: {epoch+1}, Training Loss: {loss.item()}, Test Loss: {val_loss}')



RuntimeError: shape '[222, 50, 16]' is invalid for input of size 3552

In [None]:
# Calculate static and linear baselines (formula on ANS)
# THIS IS CORRECT! DO NOT CHANGE!

def static_baseline(X):
    return X[:, 1:3]  # initial x,y coordinates

def linear_baseline(X):
    return X[:, 1:3] + X[:, 3:5] * 5  # initial x,y coordinates plus velocity times time

def compute_baseline_loss(baseline_fn, dataloader):
    total_loss = 0
    criterion = torch.nn.MSELoss()  # we use Mean Squared Error loss for regression tasks

    for batch in dataloader:
        batch = batch.to(device)
        predictions = baseline_fn(batch.x).to(device)
        total_loss += criterion(predictions, batch.y).item() * batch.num_graphs

    return total_loss / len(dataloader.dataset)

train_loss_static = compute_baseline_loss(static_baseline, dataloader)
train_loss_linear = compute_baseline_loss(linear_baseline, dataloader)

val_loss_static = compute_baseline_loss(static_baseline, val_dataloader)
val_loss_linear = compute_baseline_loss(linear_baseline, val_dataloader)


 # now print out with filler spaces to make it easier to read
print(f'Training Loss   - Static Baseline: {train_loss_static:0.4f}, Linear Baseline: {train_loss_linear:0.4f}')
print(f'Validation Loss - Static Baseline: {val_loss_static:0.4f}, Linear Baseline: {val_loss_linear:0.4f}')


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (2) must match the size of tensor b (4) at non-singleton dimension 2