TODO: Split data in train, validation, test 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
data_dir = "./data/preprocessed/"
# omega is the size of the time block that we give to the model
omega = 20
batch_size = 16

In [3]:
# Import dataset
from dataset import SimulationDataset
simulationdataset = SimulationDataset(data_dir, omega)

In [4]:
# Import sampler that uses Time Adjusted Sampling as described in Appendix B
from sampler import TimeAdjustedSampler
sampler = TimeAdjustedSampler(simulationdataset, batch_size=batch_size)

In [5]:
# Setup dataloader
import torch

def custom_collate(batch):
    X, Y = [], []

    for x,f,y in batch:
        X.append(torch.cat((x, f), dim=2))
        Y.append(y)

    X = torch.stack(X)
    Y = torch.stack(Y)

    return (X,Y)

from torch.utils.data import DataLoader 
dataloader = DataLoader(simulationdataset, batch_sampler=sampler, collate_fn=custom_collate)

In [6]:
for batch in dataloader:
    break

Batches are a list. Each list contains **batch_size (n)** amount of samples. \
So, `batches = [batch_1, batch_2, ...., batch_n]`. \
Each batch is a list of three items. `[X, F, Y]`. \
Where, 
- `X` is the state of the tokamak from time `t - w` to time `t`, i.e. `t-w:t`. Size: (`w`, 500, 6)
- `F` is the forcing from time `t-w` to time `t + w`, i.e. `t-w:t+w`. (2*`w`, 500, 6)
- `Y` is the state of the tokamak from time `t` to time `t + w`, i.e. `t:t+w`. Size: (`w`, 500, 6)

`w` is the size of the time blocks and the amount of timesteps we input in the model and expect the model to output.

**Sampling:** \
Based on `w` we sample the dataset. \
We first sample the length of the distribution we want to sample from. \
Then, we sample a simulation uniformly from all simulations with that given lenght of timesteps. \

There are more combinations of timeblocks in simulations with larger length. Therefore, the chance that a certain block gets selected given simulations with large numer of timesteps diminished for large number of timesteps - we have chosen to give higher chance to sample a simulation with longer timesteps as opposed to a simulation with fewer timesteps.

In [None]:
# X
print("X shape: ", batch[0].shape)

# Y
print("Y shape: ", batch[1].shape)

In [None]:
from models import Forward, Prior, Posterior, Decoder
from train import train_step
from tqdm.auto import tqdm
import os, json
os.environ["KERAS_BACKEND"] = "torch"
import keras

# Instantiate models
forward_t = Forward()
forward_tplus1 = Forward()
prior = Prior()
posterior = Posterior()
decoder = Decoder()
opt = keras.optimizers.AdamW()

# Loop over epochs
train_loss_history = [] 
os.makedirs("./results/basic0", exist_ok=True)
for i in tqdm(range(1000)):
    # Loop over batches
    epoch_loss_history = {"kl_loss": [], "rec_loss": []}
    for j, (x_t, x_tplus1) in enumerate(dataloader):
        # Move to gpu
        x_t = x_t.to("cuda")
        x_tplus1 = x_tplus1.to("cuda")
        # Initialize scaler
        if (i==0) and (j==0):
            forward_t.layers[0].adapt(x_t)
            forward_tplus1.layers[0].adapt(x_tplus1)
        # Train
        kl_loss, rec_loss = train_step(x_t, x_tplus1, forward_t, forward_tplus1, prior, posterior, decoder, opt)
        epoch_loss_history["kl_loss"].append(keras.ops.mean(kl_loss).item())
        epoch_loss_history["rec_loss"].append(keras.ops.mean(rec_loss).item())
    train_loss_history.append(epoch_loss_history)
    # Save models
    forward_t.save("./results/basic0/forward_t.keras")
    forward_tplus1.save("./results/basic0/forward_tplus1.keras")
    prior.save("./results/basic0/prior.keras")
    posterior.save("./results/basic0/posterior.keras")
    decoder.save("./results/basic0/decoder.keras")
    # Save training history
    with open("./results/basic0/history.json", "w") as f:
        json.dump(train_loss_history, f)