In [17]:
import matplotlib.pyplot as plt
from typing import List, Dict, Tuple, Any
from pathlib import Path
from tqdm import tqdm as tqdm
from ray import tune
from ray.air import Checkpoint, session
from ray.tune.schedulers import ASHAScheduler
from numpy.typing import NDArray
from .lib import *
import torch, os, pickle, time, json

In [19]:
torch.set_default_device("cuda:0")
with open("constraints.p", "rb") as f:
    (input_constraints, output_constraints) = pickle.load(f)
    input_constraints = InputConstraints(input_constraints.means, input_constraints.stds)
    output_constraints = OutputConstraints(output_constraints.means, output_constraints.stds)
train_dataset = SimData("./train.bin", in_memory=True)
test_dataset = SimData("./test.bin", in_memory=True)

In [None]:
###################
# Train/Test Loop #
###################

epochs = 1000
k = 5  # for validation loss plots
n = epochs // k
batch_size = 4096
num_layers = 1
num_neurons = 512
learning_rate = 0.025
verbose = False
filename = None  # to load a pre-trained model
save_files = True
show_plots = True

config_dict = {
    'epochs' : epochs,
    'k' : k,
    'batch_size' : batch_size,
    'num_layers' : num_layers,
    'num_neurons' : num_neurons,
    'learning_rate' : learning_rate,
    'verbose' : verbose,
    'filename' : filename,
    'save_files' : save_files
}

# timestamp at start of training
timestr = time.strftime("%d-%m-%Y-%H:%M:%S")
output_directory = Path("./") / f"training-{timestr}"
if save_files:
    os.mkdir(output_directory)
    os.mkdir(output_directory / "train")
    os.mkdir(output_directory / "test")

# construct or load the model
if filename is None:
    model = VehicleModel(
        input_constraints,
        output_constraints,
        num_neurons,
        num_layers,
        verbose=verbose
    )
else:
    model = torch.load(filename)

# initialise the optimiser
optimizer = torch.optim.Adam(
    model.parameters(),
    lr = learning_rate    
)

# train the model
training_loss = []
validation_loss = []

model.train()

for i in tqdm(range(n), "Training", disable=False):
    training_loss += model.train_loop(
        train_dataset,
        batch_size,
        k,
        optimizer
    )
    vl = model.test_loop(
        test_dataset,
        batch_size
    )
    validation_loss += [vl]

print(f"Final Validation Loss: {validation_loss[-1]}")
print(f"Final Training Loss: {training_loss[-1]}")
plt.plot(
    list(range(1, epochs + 1, 1)),
    training_loss,
    color="blue"
)
plt.plot(
    list(range(k, epochs + 1, k)),
    validation_loss,
    color="red"
)
plt.yscale("log")
if save_files:
    plt.savefig(output_directory / "loss.png")
    torch.save(model, output_directory / "model.pt")
    with open(output_directory / "meta.json", "w") as f: json.dump(config_dict, f, indent=1)
if show_plots == True:
    plt.show()
model.plot_predictions(
    train_dataset,
    output_directory / "train" if save_files else None,
    show = show_plots
)
model.plot_predictions(
    test_dataset,
    output_directory / "test" if save_files else None,
    show = show_plots
)

In [None]:
########################
# Parameter Experiment #
########################

def train(config):
    torch.set_default_device("cuda:0")
    train_dataset = SimData("/home/joe/epsrc_vehicle_model/train.bin", in_memory=True)
    test_dataset = SimData("/home/joe/epsrc_vehicle_model/test.bin", in_memory=True)

    epochs = 1000
    model = VehicleModel(
        input_constraints,
        output_constraints,
        config["num_neurons"],
        config["num_layers"],
        False
    )
    model.cuda()

    optimizer = torch.optim.Adam(
        model.parameters(),
        lr = config["lr"]
    )

    model.train()

    train_err = model.train_loop(
        train_dataset,
        config["batch_size"],
        epochs,
        optimizer
    )[-1]

    test_err = model.test_loop(
        test_dataset,
        config["batch_size"]
    )

    session.report({
        "test_rmse" : test_err,
        "train_rmse" : train_err
    })

config = {
    "num_layers" : tune.choice([1]),
    "num_neurons" : tune.choice(2**i for i in range(4, 13)),
    "lr" : tune.loguniform(1e-3, 1e-1),
    "batch_size" : tune.choice([4096])
}

scheduler = ASHAScheduler(
    metric="test_rmse",
    mode="min",
)

result = tune.run(
    train,
    config=config,
    scheduler=scheduler,
    num_samples=250,
    resources_per_trial={'cpu' : 4, 'gpu' : 1}
)

pickle.dump(result, open(f"parameter-experiment-{time.strftime('%d-%m-%Y-%H:%M:%S')}.p", "wb"))

In [None]:
df = result.dataframe()
df = df.sort_values("train_rmse")
data = df[["train_rmse", "config/num_neurons", "config/lr"]]
data.head()