# SHANTIH MLP

# CODE TO START NORMALIZING TEST AND RUN ALL

In [26]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import os

In [27]:
train_file = np.load('./cse-251-b-2025/train.npz')

train_data = train_file['data']
print("train_data's shape", train_data.shape)
test_file = np.load('./cse-251-b-2025/test_input.npz')

test_data = test_file['data']
print("test_data's shape", test_data.shape)

train_data's shape (10000, 50, 110, 6)
test_data's shape (2100, 50, 50, 6)


### Test data

In [28]:
test_data = test_file['data']
test_x = test_data[:, 0, :50, :2]
initial_test_x = test_x[:, 0:1, :].copy()
test_x -= initial_test_x

print(test_x.shape)

(2100, 50, 2)


### Code for autoregressive stuff

In [29]:
# CHANGE THESE FOR THE NUMBER OF TIMESTEPS TO USE AND THE NUMBER OF TIMESTEPS AHEAD TO PREDICT
num_features = 50
num_labels = 10

def do_autoregressive(train_data, num_features, num_labels):
    ar_train_x, ar_train_y = [], []

    for s in range(train_data.shape[0]):
        for p in range(num_features, 110 - num_labels):
            train_x, train_y = train_data[s, 0, p-num_features:p, :2], train_data[s, 0, p:p+num_labels, :2] 
            initial_train_x, = train_x[0:1, :].copy()
            train_x -= initial_train_x
            train_y -= initial_train_x
            ar_train_x.append(train_x)
            ar_train_y.append(train_y)
    ar_train_x = np.stack(ar_train_x, axis=0)
    ar_train_y = np.stack(ar_train_y, axis=0)
    return ar_train_x, ar_train_y

### Training and validation sets

In [None]:
#SET TO FALSE IF YOU DONT WANT AUTOREGRESSIVE
#auto_regressive = False
auto_regressive = True

train_x, train_y = train_data[:, 0, :50, :2], train_data[:, 0, 50:, :2]
initial_train_x, initial_train_y = train_x[:, 0:1, :].copy(), train_y[:, 0:1, :].copy()
train_x -= initial_train_x
train_y -= initial_train_x

print(train_x.shape)

if auto_regressive:
    train_x, train_y = do_autoregressive(train_data, num_features, num_labels)
    print(train_x.shape, train_y.shape)

ratio_validation = 0.2
perm = torch.randperm(train_x.shape[0])
idx = int(ratio_validation * train_x.shape[0])

new_train_x = train_x[perm[idx:]]
new_train_y = train_y[perm[idx:]]

val_x = train_x[perm[:idx]]
val_y = train_y[perm[:idx]]

print(train_x.shape, train_y.shape)
print(new_train_x.shape, new_train_y.shape)
print(val_x.shape, val_y.shape)

(10000, 50, 2)
(10000, 50, 2) (10000, 60, 2)
(8000, 50, 2) (8000, 60, 2)
(2000, 50, 2) (2000, 60, 2)


### Code for adding closest agent feature (in progress)

In [31]:
def dist(x1, y1, x2, y2):
    return Math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)

def add_closest_agent(x):
    #x is the features.
    #I want to take them from (num_scenes, num_agents, num_timesteps, num_dimensions) to (num_scenes, num_timesteps, num_dimensions * 2) by adding the dimensions of the closest agent

    new_x = []
    for s in x.shape[0]:
        each_scene = []
        for t in x.shape[2]:
            ca = 1
            for a in x.shape[1]:
                if dist(x[s][0][t][0], x[s][0][t][1], x[s][a][t][0], x[s][a][t][1]) < dist(x[s][0][t][0], x[s][0][t][1], x[s][ca][t][0], x[s][ca][t][1]):
                    ca = a
            each_scene.append([x[s][0][t][0], x[s][0][t][1], x[s][ca][t][0], x[s][ca][t][1]])
        new_x.append(each_scene)

    return new_x

### Play with the number of layers and sizes. My computer takes too long

In [32]:
class ComplexMLP(nn.Module):
    def __init__(self, input_features, output_features):
        super(ComplexMLP, self).__init__()

        # Define the layers
        self.input_features = input_features
        self.output_features = output_features
        self.flatten = nn.Flatten()
        self.mlp = nn.Sequential(
            nn.Linear(input_features, 1024),
            nn.ReLU(),
            #nn.Dropout(0.1),

            nn.Linear(1024, 512),
            nn.ReLU(),
            #nn.Dropout(0.1),

            nn.Linear(512, 256),
            nn.ReLU(),
            #nn.Dropout(0.1),

            nn.Linear(256, output_features)
        )

    def forward(self, x):
        x = self.flatten(x)
        x = self.mlp(x)
        return x

In [33]:
def predict(model, x):
    model.eval()
    with torch.no_grad():
        x_tensor = torch.FloatTensor(x).reshape((-1, input_features))
        predictions = model(x_tensor).reshape((-1, int(output_features / 2), 2))
        return predictions.numpy()


In [34]:
def evaluate_model(model, x, y, dataset):
    pred_y = predict(model, x)
        
    mse = ((pred_y - y) ** 2).mean()
    print("Model MSE evaluated on", dataset, ":", mse.item())
    return mse

In [35]:
# Example of how to prepare data and train the model

def new_train_model(model, criterion, optimizer, train_x, train_y, val_x, val_y, batch_size=64, epochs=10):
    # Convert numpy arrays to PyTorch tensors
    X_train_tensor = torch.FloatTensor(train_x).reshape((-1, model.input_features))
    y_train_tensor = torch.FloatTensor(train_y).reshape((-1, model.output_features))
    print(X_train_tensor.shape)
    print(y_train_tensor.shape)

    # Create dataset and dataloader
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Training loop
    for epoch in range(epochs):
        running_loss = 0.0

        for batch_X, batch_y in train_loader:
            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(batch_X)

            # Calculate loss
            loss = criterion(outputs, batch_y)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Print epoch statistics
        #print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}')
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}')
        evaluate_model(model, train_x, train_y, "TRAIN")
        evaluate_model(model, val_x, val_y, "VALIDATION")

    return model


### Run this to train the model. You can interrupt it if it takes too long and continue on

In [36]:
input_features = 50 * 2  # = 100
output_features = 60 * 2

if auto_regressive:
    input_features = num_features * 2
    output_features = num_labels * 2

print(input_features, output_features)

norm_model = ComplexMLP(input_features, output_features)

# Define loss function and optimizer
criterion = nn.MSELoss()

optimizer = optim.Adam(norm_model.parameters(), lr=0.001)

norm_model = new_train_model(norm_model, criterion, optimizer, new_train_x, new_train_y, val_x, val_y, batch_size=64, epochs=20)

100 120
torch.Size([8000, 100])
torch.Size([8000, 120])
Epoch 1, Loss: 157.1762
Model MSE evaluated on TRAIN : 53.42602901237716
Model MSE evaluated on VALIDATION : 52.9545423526227
Epoch 2, Loss: 43.9422
Model MSE evaluated on TRAIN : 45.00088687284408
Model MSE evaluated on VALIDATION : 44.3307604812709
Epoch 3, Loss: 39.3458
Model MSE evaluated on TRAIN : 38.906961109990895
Model MSE evaluated on VALIDATION : 39.004875587928694
Epoch 4, Loss: 37.9428
Model MSE evaluated on TRAIN : 32.87745021870372
Model MSE evaluated on VALIDATION : 33.46889555617869
Epoch 5, Loss: 36.5004
Model MSE evaluated on TRAIN : 31.960838469630744
Model MSE evaluated on VALIDATION : 32.28646478842156
Epoch 6, Loss: 33.1266
Model MSE evaluated on TRAIN : 35.57486355569097
Model MSE evaluated on VALIDATION : 35.776053653390015
Epoch 7, Loss: 32.3264
Model MSE evaluated on TRAIN : 37.91527700048772
Model MSE evaluated on VALIDATION : 38.03283982450787
Epoch 8, Loss: 34.5367
Model MSE evaluated on TRAIN : 30.20

KeyboardInterrupt: 

In [12]:
evaluate_model(norm_model, train_x, train_y, "TRAIN")
evaluate_model(norm_model, val_x, val_y, "VALIDATION")

100 20 10.0
Model MSE evaluated on TRAIN : 0.3644307232142692
100 20 10.0
Model MSE evaluated on VALIDATION : 0.36405780795426457


0.36405780795426457

### Need this method for autoregressive inference. Basically doing and shifting thing

In [54]:
def ar_inference(norm_model, feature):
    print('in', feature.shape)
    answer = []
    for i in range(int(60 / num_labels)):
        pred = predict(norm_model, feature)
        answer.append(pred)
        feature = np.concatenate((feature, pred), axis=1)
        feature = feature[:, -num_features:, :]
    answer = np.concatenate(answer, axis=1)
    return answer

## Remember you need to delete the shantih_mlp.csv if you want to make a new one. I don't think it rewrites it

In [55]:
print(test_x.shape)
if not auto_regressive:
    pred_y = predict(norm_model, test_x)
else:
    pred_y = ar_inference(norm_model, test_x)

pred_y += initial_test_x

# Code to write the prediction to file
pred_output = pred_y.reshape(-1, 2)
output_df = pd.DataFrame(pred_output, columns=['x', 'y'])

output_df.index.name = 'index'

output_df.to_csv('shantih_mlp.csv')

(2100, 50, 2)
in (2100, 50, 2)
100 20 10.0
100 20 10.0
100 20 10.0
100 20 10.0
100 20 10.0
100 20 10.0
