In [4]:
import numpy as np
import wandb
import time
import torch
from torch import nn
from sklearn.model_selection import train_test_split

# Data loading
out = np.load('../training-val-test-data.npz')
th_train = out['th']
u_train = out['u']
# data = np.load('hidden-test-prediction-submission-file.npz')
# upast_test = data['upast']
# thpast_test = data['thpast']

# Function to create input-output data
def create_IO_data(u, y, na, nb):
    X, Y = [], []
    for k in range(max(na, nb), len(y)):
        X.append(np.concatenate([u[k-nb:k], y[k-na:k]]))
        Y.append(y[k])
    return np.array(X), np.array(Y)

# Define the neural network
class Network(nn.Module):
    def __init__(self, n_hidden_nodes=40, n_in=20):
        super(Network, self).__init__()
        self.layer1 = nn.Linear(n_in, n_hidden_nodes).double()
        self.layer2 = nn.Linear(n_hidden_nodes, n_hidden_nodes).double()
        self.layer3 = nn.Linear(n_hidden_nodes, 1).double()

    def forward(self, u):
        x1 = torch.nn.functional.leaky_relu(self.layer1(u))
        x2 = torch.nn.functional.leaky_relu(self.layer2(x1))
        y = self.layer3(x2)[:, 0]
        return y

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Sweep loop for na and nb
for na in range(16):
    for nb in range(16):
        X, Y = create_IO_data(u_train, th_train, na, nb)
        Xtemp, Xtest_final, Ytemp, Ytest_final = train_test_split(X, Y, test_size=0.15, random_state=42)
        Xtrain_final, Xval, Ytrain_final, Yval = train_test_split(Xtemp, Ytemp, test_size=0.1765, random_state=42)

        wandb.init(
            project="5SC28-NN-grid",
            config={
                "na": na,
                "nb": nb,
                "epochs": 280,
                "n_hidden_nodes": 40,
                "learning_rate": 0.001,
                "batch_size": 256
            }
        )

        config = wandb.config
        model = Network(n_hidden_nodes=config.n_hidden_nodes, n_in=na+nb).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

        Xtrain_tensor = torch.tensor(Xtrain_final).to(device)
        Ytrain_tensor = torch.tensor(Ytrain_final).to(device)
        Xval_tensor = torch.tensor(Xval).to(device)
        Yval_tensor = torch.tensor(Yval).to(device)

        for epoch in range(config.epochs):
            model.train()
            for i in range(0, len(Xtrain_tensor), config.batch_size):
                optimizer.zero_grad()
                y_pred = model(Xtrain_tensor[i:i+config.batch_size])
                loss = torch.mean((y_pred - Ytrain_tensor[i:i+config.batch_size])**2)
                loss.backward()
                optimizer.step()

            model.eval()
            with torch.no_grad():
                val_pred = model(Xval_tensor)
                val_loss = torch.mean((val_pred - Yval_tensor)**2).item()
                val_nrms = val_loss / torch.std(Yval_tensor).item()
                train_nrms = loss.item() / torch.std(Ytrain_tensor).item()

            wandb.log({
                "epoch": epoch,
                "train_loss": loss.item(),
                "train_NRMS": train_nrms,
                "val_loss": val_loss,
                "val_NRMS": val_nrms
            })

        torch.save(model.state_dict(), f"model_na{na}_nb{nb}.pt")
        # wandb.save(f"model_na{na}_nb{nb}.pt")
        wandb.finish()


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇████
train_NRMS,▄▆▆▇▇██▇▇▇▅▄▄▄▃▃▃▃▃▃▂▃▂▃▂▂▂▂▂▂▁▁▁▁▁▂▃▁▁▁
train_loss,▅▆▆███▇▇▅▆▅▅▄▄▄▄▄▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁
val_NRMS,▅███▇▆▄▄▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▅▆▆▇████▇▆▄▄▄▄▃▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,279.0
train_NRMS,0.39551
train_loss,0.18971
val_NRMS,0.48013
val_loss,0.23057


0,1
epoch,▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
train_NRMS,▂▄▇▅█████▇▅▄▃▃▄▃▃▃▃▃▃▂▃▂▂▂▁▄▂▁▁▁▁▁▁▁▁▁▂▁
train_loss,▃▂▂▂▃▇▇█████▇▇▆▄▄▃▆▄▃▃▃▂▂▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_NRMS,▂▃▄▄▅██▆▆▅▅▂▂▂▂▃▂▂▂▂▂▂▂▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▂▅▅▆▇█▇▇▇▆▃▃▃▂▅▂▂▂▂▂▂▂▂▂▂▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,279.0
train_NRMS,0.39551
train_loss,0.18971
val_NRMS,0.48013
val_loss,0.23057


0,1
epoch,▁▁▁▁▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇████
train_NRMS,█▅▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▂▂▂
train_loss,█▄▄▁▁▁▂▂▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃
val_NRMS,▂███████▆▆▅▇▅▅▅▂▂▂▂▂▂▂▆▂▂▂▂▃▂▂▂▁▁▄▁▁▁▁▂▁
val_loss,▅▇███▇▇▇▇▅▄▄▄▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▁▁▂▁▁▁▂▃

0,1
epoch,279.0
train_NRMS,0.5037
train_loss,0.23999
val_NRMS,0.47845
val_loss,0.23236


0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇███
train_NRMS,███▇▇▇▇▆▆▆▅▅▅▅▄▄▃▄▄▄▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
train_loss,█▇▇▇▆▆▆▅▅▅▅▅▅▅▅▅▅▄▄▄▄▃▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁
val_NRMS,▅█▇▆▆▄▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁
val_loss,█▆▆▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,279.0
train_NRMS,0.41968
train_loss,0.20151
val_NRMS,0.3745
val_loss,0.17627


0,1
epoch,▁▁▁▁▁▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇█████
train_NRMS,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,▆▆███▇▇▆▅▅▄▄▄▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_NRMS,█▄▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,279.0
train_NRMS,0.28105
train_loss,0.13468
val_NRMS,0.25811
val_loss,0.12197


KeyboardInterrupt: 

