In [1]:
import torch
import torch.nn as nn
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import ml_models
import data_pipeline2 as dp
import training_models as tm

import os
import seaborn as sns
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from torch_geometric.nn import GCNConv
from torch_geometric.nn.models import GCN

import pinns

from functools import partial
import tempfile
from pathlib import Path
import torch.nn.functional as F
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
from ray import tune
from ray import train
from ray.train import Checkpoint, get_checkpoint
from ray.tune.schedulers import ASHAScheduler
import ray.cloudpickle as pickle

In [12]:
def train_pinn_1(config,data_dir = None):
    
    data_dir = 'C:/Users/lefti/OneDrive - KTH/phd_kth/PINNs'
    
    output = config['output_size']
    inputs = config['input_size']
    
    state_dict = torch.load(f'{data_dir}/models/mlp/output_size{output}input_size{inputs}.pt')
    mod = tm.train_model(config, it_amt=0, model_fnc=ml_models.MLP, data_dir=data_dir+'/data_synthetic')
    mod.load_state_dict(state_dict)

    model= pinns.decay_pinn(mod)


    #model = pinns.train_pinn_1(config, it_amt=100, model=pinn_mod, data_dir = data_dir+'/data_synthetic')
    
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    model.to(device)
    
    optimizer = optim.Adam(model.parameters(), lr = config['lr'])
    criterion = nn.MSELoss()
    
    checkpoint = get_checkpoint()
    if checkpoint:
        with checkpoint.as_directory() as checkpoint_dir:
            data_path = Path(checkpoint_dir) / "data.pkl"
            with open(data_path, "rb") as fp:
                checkpoint_state = pickle.load(fp)
            start_epoch = checkpoint_state["epoch"]
            model.load_state_dict(checkpoint_state["net_state_dict"])
            optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
    else:
        start_epoch = 0
        
    file_nr = 0
    chosen_sensor = 11
    train_percentage = 80
    

    df_train, df_test = dp.get_cmapss_data(file_nr, train_percentage,  data_dir = data_dir+'/data_synthetic', chosen_sensor=chosen_sensor)

    trainloader, idx = dp.get_loaded_data(df_train, config['input_size'], config['output_size'], config['batch_size'])
    testloader, idx = dp.get_loaded_data(df_test, config['input_size'], config['output_size'], config['batch_size'])

    best_val_loss = 100
    for epoch in range(start_epoch, 500):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0

        for i, data in enumerate(trainloader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels, tx, ty, sensor = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            #ty = ty.clone().detach().requires_grad_(True)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)            
            loss = criterion(outputs, labels)
            
            # Compute physics loss (parameters mu and k in the ODE)
            decay = model.decay
            
            #x_phys = model(t_phys)
            

            xdot = pinns.finite_difference_derivative(outputs, ty)
            #print(xdot)
            
            ode_residual = xdot + decay*ty**(decay-1)
            #print(torch.mean(ode_residual**2))
            
            loss += config['lambda_'] * torch.mean(ode_residual**2)
            
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(testloader, 0):
            with torch.no_grad():
                inputs, labels, tx, ty, sensor = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                
                loss = criterion(outputs, labels)

                val_loss += loss.cpu().numpy()
                val_steps += 1
                
        checkpoint_data = {
            "epoch": epoch,
            "net_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
        with tempfile.TemporaryDirectory() as checkpoint_dir:
            data_path = Path(checkpoint_dir) / "data.pkl"
            with open(data_path, "wb") as fp:
                pickle.dump(checkpoint_data, fp)

            checkpoint = Checkpoint.from_directory(checkpoint_dir)
            train.report(
                {"loss": val_loss / val_steps},
                checkpoint=checkpoint,
            )

    print("Finished Training")

    
    return model

In [13]:
def test_best_model(best_result, smoke_test=False):
    best_trained_model = train_pinn_1(config)
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    best_trained_model.to(device)

    checkpoint_path = os.path.join(best_result.checkpoint.to_directory(), "checkpoint.pt")

    model_state, optimizer_state = torch.load(checkpoint_path)
    best_trained_model.load_state_dict(model_state)


    testloader, idx = dp.get_loaded_data(df_test, best_result.config['input_size'], best_result.config['output_size'], best_result.config['batch_size'])

    val_loss = 0.0
    val_steps = 0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)

            loss = criterion(outputs, labels)
            val_loss += loss.cpu().numpy()
            val_steps += 1


    print("Best trial test set loss: {}".format(val_loss / val_steps))

In [14]:
# Set this to True for a smoke test that runs with a small synthetic dataset.
SMOKE_TEST = False

def custom_trial_name(trial):
    return f"trial_{trial.trial_id}"


In [15]:
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
    #data_dir = os.path.abspath("./data_synthetic")
    #load_data(data_dir)
    config = {
        'input_size' : 5,
        'lr' : tune.loguniform(1e-6,1e-1),
        'batch_size' : 32,
        'hidden_size' : 8,
        'output_size' : 40,
        'layer_amt' : 3,
        'lambda_' : tune.loguniform(1e-6,1e-1)
        }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2,
    )
    
    result = tune.run(
        partial(train_pinn_1, data_dir='delete'),
        resources_per_trial={"cpu": 1, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        trial_dirname_creator=custom_trial_name
    )

    best_trial = result.get_best_trial("loss", "min", "last")
    print(f"Best trial config: {best_trial.config}")
    print(f"Best trial final validation loss: {best_trial.last_result['loss']}")
    
    best_trained_model = train_pinn_1(best_trial.config["input_size"], best_trial.config["hidden_size"], best_trial.config["layer_amt"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint = result.get_best_checkpoint(trial=best_trial, metric="accuracy", mode="max")
    with best_checkpoint.as_directory() as checkpoint_dir:
        data_path = Path(checkpoint_dir) / "data.pkl"
        with open(data_path, "rb") as fp:
            best_checkpoint_data = pickle.load(fp)

        best_trained_model.load_state_dict(best_checkpoint_data["net_state_dict"])
        

        test_acc = test_accuracy(best_trained_model, best_trial.config, device)
        print("Best trial test set accuracy: {}".format(test_acc))
        
    df = result.get_dataframe()
    
    return df


if __name__ == "__main__":
    # You can change the number of GPUs per trial here:
    df = main(num_samples=50, max_num_epochs=30, gpus_per_trial=0)

2025-03-07 04:06:47,623	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2025-03-07 04:19:38
Running for:,00:12:50.49
Memory:,13.1/15.7 GiB

Trial name,status,loc,lambda_,lr,iter,total time (s),loss
train_pinn_1_351f8_00000,TERMINATED,127.0.0.1:17756,6.31813e-06,0.0351234,8,27.6941,0.322487
train_pinn_1_351f8_00001,TERMINATED,127.0.0.1:15572,0.0545256,0.000530647,30,98.8258,0.312517
train_pinn_1_351f8_00002,TERMINATED,127.0.0.1:20584,0.00016281,0.000520125,30,98.6828,0.313851
train_pinn_1_351f8_00003,TERMINATED,127.0.0.1:21200,0.000661847,0.0091479,1,4.16035,0.325403
train_pinn_1_351f8_00004,TERMINATED,127.0.0.1:13968,4.17069e-05,0.0144673,2,8.34007,0.322224
train_pinn_1_351f8_00005,TERMINATED,127.0.0.1:14872,6.26522e-05,6.351e-06,8,27.737,0.313079
train_pinn_1_351f8_00006,TERMINATED,127.0.0.1:18748,0.00376885,4.07263e-05,8,29.0599,0.313113
train_pinn_1_351f8_00007,TERMINATED,127.0.0.1:19872,0.00435397,0.00689492,1,5.76003,0.316114
train_pinn_1_351f8_00008,TERMINATED,127.0.0.1:22872,0.019589,5.57055e-06,30,94.2962,0.313076
train_pinn_1_351f8_00009,TERMINATED,127.0.0.1:13848,0.0346523,0.00073391,8,29.9094,0.313187


[36m(func pid=17756)[0m Finished Training


Trial name,loss,should_checkpoint
train_pinn_1_351f8_00000,0.322487,True
train_pinn_1_351f8_00001,0.312517,True
train_pinn_1_351f8_00002,0.313851,True
train_pinn_1_351f8_00003,0.325403,True
train_pinn_1_351f8_00004,0.322224,True
train_pinn_1_351f8_00005,0.313079,True
train_pinn_1_351f8_00006,0.313113,True
train_pinn_1_351f8_00007,0.316114,True
train_pinn_1_351f8_00008,0.313076,True
train_pinn_1_351f8_00009,0.313187,True


[36m(func pid=17756)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00000/checkpoint_000000)


[36m(func pid=21200)[0m Finished Training[32m [repeated 3x across cluster][0m


[36m(func pid=17756)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00000/checkpoint_000002)[32m [repeated 7x across cluster][0m
[36m(func pid=17756)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00000/checkpoint_000004)[32m [repeated 6x across cluster][0m
[36m(func pid=17756)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00000/checkpoint_000006)[32m [repeated 6x across cluster][0m
[36m(func pid=15572)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00001/checkpoint_000008)[32m [repeated 6x across cluster][0m
[36m(func pid=15572)[0m Checkpoint successfully created at: Checkpoint(fil

[36m(func pid=13968)[0m Finished Training


[36m(func pid=15572)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00001/checkpoint_000018)[32m [repeated 5x across cluster][0m
[36m(func pid=15572)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00001/checkpoint_000020)[32m [repeated 5x across cluster][0m


[36m(func pid=14872)[0m Finished Training


[36m(func pid=14872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00005/checkpoint_000000)[32m [repeated 4x across cluster][0m
[36m(func pid=20584)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00002/checkpoint_000023)[32m [repeated 5x across cluster][0m
[36m(func pid=14872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00005/checkpoint_000003)[32m [repeated 4x across cluster][0m
[36m(func pid=14872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00005/checkpoint_000005)[32m [repeated 6x across cluster][0m
[36m(func pid=14872)[0m Checkpoint successfully created at: Checkpoint(fil

[36m(func pid=18748)[0m Finished Training


[36m(func pid=18748)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00006/checkpoint_000000)[32m [repeated 3x across cluster][0m
[36m(func pid=18748)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00006/checkpoint_000002)[32m [repeated 2x across cluster][0m
[36m(func pid=18748)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00006/checkpoint_000004)[32m [repeated 2x across cluster][0m


[36m(func pid=19872)[0m Finished Training


[36m(func pid=18748)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00006/checkpoint_000006)[32m [repeated 2x across cluster][0m
[36m(func pid=22872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00008/checkpoint_000000)[32m [repeated 4x across cluster][0m
[36m(func pid=13848)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00009/checkpoint_000002)[32m [repeated 3x across cluster][0m
[36m(func pid=22872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00008/checkpoint_000003)[32m [repeated 3x across cluster][0m
[36m(func pid=13848)[0m Checkpoint successfully created at: Checkpoint(fil

[36m(func pid=16280)[0m Finished Training[32m [repeated 3x across cluster][0m


[36m(func pid=22872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00008/checkpoint_000012)[32m [repeated 2x across cluster][0m
[36m(func pid=22872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00008/checkpoint_000014)[32m [repeated 5x across cluster][0m
[36m(func pid=22872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00008/checkpoint_000016)[32m [repeated 4x across cluster][0m


[36m(func pid=28480)[0m Finished Training[32m [repeated 2x across cluster][0m


[36m(func pid=28480)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00012/checkpoint_000000)[32m [repeated 4x across cluster][0m
[36m(func pid=22872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00008/checkpoint_000020)[32m [repeated 5x across cluster][0m
[36m(func pid=22872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00008/checkpoint_000022)[32m [repeated 4x across cluster][0m


[36m(func pid=26864)[0m Finished Training


[36m(func pid=26864)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00013/checkpoint_000000)[32m [repeated 4x across cluster][0m
[36m(func pid=22872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00008/checkpoint_000026)[32m [repeated 5x across cluster][0m
[36m(func pid=22872)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00008/checkpoint_000028)[32m [repeated 4x across cluster][0m


[36m(func pid=3860)[0m Finished Training


[36m(func pid=3860)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00014/checkpoint_000000)[32m [repeated 4x across cluster][0m
[36m(func pid=16280)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00011/checkpoint_000020)[32m [repeated 4x across cluster][0m
[36m(func pid=16280)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00011/checkpoint_000022)[32m [repeated 2x across cluster][0m


[36m(func pid=29196)[0m Finished Training


[36m(func pid=16280)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00011/checkpoint_000024)[32m [repeated 3x across cluster][0m
[36m(func pid=16280)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00011/checkpoint_000026)[32m [repeated 2x across cluster][0m


[36m(func pid=10036)[0m Finished Training


[36m(func pid=10036)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00016/checkpoint_000000)[32m [repeated 2x across cluster][0m


[36m(func pid=18232)[0m Finished Training


[36m(func pid=10036)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00016/checkpoint_000002)[32m [repeated 4x across cluster][0m
[36m(func pid=10036)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00016/checkpoint_000004)[32m [repeated 4x across cluster][0m


[36m(func pid=26972)[0m Finished Training


[36m(func pid=10036)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00016/checkpoint_000006)[32m [repeated 4x across cluster][0m
[36m(func pid=10036)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00016/checkpoint_000008)[32m [repeated 3x across cluster][0m
[36m(func pid=10036)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00016/checkpoint_000010)[32m [repeated 2x across cluster][0m


[36m(func pid=21856)[0m Finished Training


[36m(func pid=10036)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00016/checkpoint_000012)[32m [repeated 2x across cluster][0m
[36m(func pid=10036)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00016/checkpoint_000014)[32m [repeated 3x across cluster][0m


[36m(func pid=19980)[0m Finished Training


[36m(func pid=19980)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00020/checkpoint_000000)[32m [repeated 2x across cluster][0m


[36m(func pid=11448)[0m Finished Training


[36m(func pid=19980)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00020/checkpoint_000002)[32m [repeated 2x across cluster][0m
[36m(func pid=19980)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00020/checkpoint_000004)[32m [repeated 4x across cluster][0m
[36m(func pid=19980)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00020/checkpoint_000006)[32m [repeated 2x across cluster][0m


[36m(func pid=16320)[0m Finished Training


[36m(func pid=16320)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00022/checkpoint_000000)[32m [repeated 2x across cluster][0m


[36m(func pid=28460)[0m Finished Training


[36m(func pid=28460)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00023/checkpoint_000000)[32m [repeated 2x across cluster][0m


[36m(func pid=22412)[0m Finished Training


[36m(func pid=28460)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00023/checkpoint_000002)[32m [repeated 2x across cluster][0m
[36m(func pid=22412)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00024/checkpoint_000001)[32m [repeated 3x across cluster][0m
[36m(func pid=22412)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00024/checkpoint_000003)[32m [repeated 4x across cluster][0m


[36m(func pid=17896)[0m Finished Training


[36m(func pid=22412)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00024/checkpoint_000005)[32m [repeated 5x across cluster][0m


[36m(func pid=15388)[0m Finished Training


[36m(func pid=22412)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00024/checkpoint_000007)[32m [repeated 5x across cluster][0m
[36m(func pid=17896)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00025/checkpoint_000005)[32m [repeated 3x across cluster][0m
[36m(func pid=17896)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00025/checkpoint_000007)[32m [repeated 2x across cluster][0m


[36m(func pid=29432)[0m Finished Training


[36m(func pid=29432)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00027/checkpoint_000000)
[36m(func pid=29432)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00027/checkpoint_000001)


[36m(func pid=3856)[0m Finished Training


[36m(func pid=29432)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00027/checkpoint_000002)
[36m(func pid=29432)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00027/checkpoint_000003)[32m [repeated 3x across cluster][0m
[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00028/checkpoint_000003)[32m [repeated 4x across cluster][0m
[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00028/checkpoint_000005)[32m [repeated 2x across cluster][0m
[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_re

[36m(func pid=17008)[0m Finished Training[32m [repeated 2x across cluster][0m


[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00028/checkpoint_000011)[32m [repeated 3x across cluster][0m
[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00028/checkpoint_000013)[32m [repeated 3x across cluster][0m
[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00028/checkpoint_000015)[32m [repeated 2x across cluster][0m
[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00028/checkpoint_000017)[32m [repeated 2x across cluster][0m


[36m(func pid=13456)[0m Finished Training
[36m(func pid=332)[0m Finished Training


[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00028/checkpoint_000019)[32m [repeated 2x across cluster][0m
[36m(func pid=332)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00031/checkpoint_000001)[32m [repeated 4x across cluster][0m
[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00028/checkpoint_000022)[32m [repeated 2x across cluster][0m
[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00028/checkpoint_000024)[32m [repeated 2x across cluster][0m
[36m(func pid=3856)[0m Checkpoint successfully created at: Checkpoint(filesyste

[36m(func pid=2652)[0m Finished Training


[36m(func pid=2652)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00033/checkpoint_000000)[32m [repeated 2x across cluster][0m
[36m(func pid=2652)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00033/checkpoint_000002)[32m [repeated 2x across cluster][0m


[36m(func pid=27144)[0m Finished Training


[36m(func pid=27144)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00034/checkpoint_000000)[32m [repeated 2x across cluster][0m


[36m(func pid=14432)[0m Finished Training[32m [repeated 2x across cluster][0m


[36m(func pid=14432)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00036/checkpoint_000000)[32m [repeated 4x across cluster][0m


[36m(func pid=11500)[0m Finished Training


[36m(func pid=11500)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00037/checkpoint_000000)


[36m(func pid=20604)[0m Finished Training


[36m(func pid=20604)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00039/checkpoint_000000)
[36m(func pid=17844)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00038/checkpoint_000002)[32m [repeated 3x across cluster][0m


[36m(func pid=20756)[0m Finished Training[32m [repeated 2x across cluster][0m


[36m(func pid=20756)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00040/checkpoint_000000)[32m [repeated 2x across cluster][0m
[36m(func pid=20756)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00040/checkpoint_000002)[32m [repeated 2x across cluster][0m


[36m(func pid=23892)[0m Finished Training
[36m(func pid=26348)[0m Finished Training


[36m(func pid=23892)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00041/checkpoint_000001)[32m [repeated 3x across cluster][0m
[36m(func pid=23892)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00041/checkpoint_000003)[32m [repeated 3x across cluster][0m


[36m(func pid=1320)[0m Finished Training


[36m(func pid=1320)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00043/checkpoint_000000)
[36m(func pid=1320)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00043/checkpoint_000001)
[36m(func pid=1320)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00043/checkpoint_000002)
[36m(func pid=1320)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00043/checkpoint_000003)
[36m(func pid=1320)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00043/checkpoint_000004)
[36m(func pid=1320)[0m Checkpoint succ

[36m(func pid=26696)[0m Finished Training


[36m(func pid=1320)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00043/checkpoint_000009)
[36m(func pid=26696)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00044/checkpoint_000000)


[36m(func pid=3404)[0m Finished Training
[36m(func pid=17980)[0m Finished Training


[36m(func pid=3404)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00046/checkpoint_000000)[32m [repeated 3x across cluster][0m
[36m(func pid=3404)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00046/checkpoint_000002)[32m [repeated 6x across cluster][0m
[36m(func pid=3404)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00046/checkpoint_000004)[32m [repeated 6x across cluster][0m
[36m(func pid=3404)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00046/checkpoint_000006)[32m [repeated 4x across cluster][0m
[36m(func pid=3404)[0m Checkpoint successfully created at: Checkpoint(filesyst

[36m(func pid=25228)[0m Finished Training


[36m(func pid=1320)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00043/checkpoint_000029)[32m [repeated 4x across cluster][0m


[36m(func pid=29004)[0m Finished Training


[36m(func pid=3404)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00046/checkpoint_000020)[32m [repeated 5x across cluster][0m
[36m(func pid=3404)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00046/checkpoint_000022)[32m [repeated 5x across cluster][0m
[36m(func pid=3404)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00046/checkpoint_000024)[32m [repeated 2x across cluster][0m
[36m(func pid=3404)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00046/checkpoint_000026)[32m [repeated 2x across cluster][0m


[36m(func pid=18460)[0m Finished Training


[36m(func pid=3404)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47/trial_351f8_00046/checkpoint_000028)[32m [repeated 2x across cluster][0m
2025-03-07 04:19:38,143	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/lefti/ray_results/train_pinn_1_2025-03-07_04-06-47' in 0.0833s.
2025-03-07 04:19:38,188	INFO tune.py:1041 -- Total run time: 770.56 seconds (770.39 seconds for the tuning loop).


Best trial config: {'input_size': 5, 'lr': 0.0005306468844050074, 'batch_size': 32, 'hidden_size': 8, 'output_size': 40, 'layer_amt': 3, 'lambda_': 0.05452555236989918}
Best trial final validation loss: 0.3125165038638645


TypeError: train_pinn_1() takes from 1 to 2 positional arguments but 3 were given

In [None]:
input_sizes = [40]
output_sizes = [5,10,20,40]


for output in output_sizes:

    config['output_size'] = output

    for inputs in input_sizes:

        config['input_size'] = inputs    
        
        
        
        state_dict = torch.load(f'models/mlp/output_size{output}input_size{inputs}.pt')
        mod = tm.train_model(config, it_amt=0, model_fnc=ml_models.MLP, data_dir='data_synthetic')
        mod.load_state_dict(state_dict)

        pinn_mod = pinns.decay_pinn(mod)
        
        pinn_mod = pinns.train_pinn_1(config, it_amt=100, model=pinn_mod, data_dir = 'data_synthetic')
        
        torch.save(obj = mod.state_dict(), f = 'pinn_models/mlp/'+'output_size' + str(output) + 'input_size' + str(inputs) + '.pt')

In [None]:
input_sizes = [40]
output_sizes = [5,10,20,40]


for output in output_sizes:

    config['output_size'] = output

    for inputs in input_sizes:

        config['input_size'] = inputs    
        
        
        
        state_dict = torch.load(f'models/cnn/output_size{output}input_size{inputs}.pt')
        mod = tm.train_model(config, it_amt=0, model_fnc=ml_models.CNN, data_dir='data_synthetic')
        mod.load_state_dict(state_dict)

        pinn_mod = decay_pinn(mod)
        
        pinn_mod = train_pinn_1(config, it_amt=100, model=pinn_mod, lambda_ = 1e-5, data_dir = 'data_synthetic')
        
        torch.save(obj = mod.state_dict(), f = 'pinn_models/cnn/'+'output_size' + str(output) + 'input_size' + str(inputs) + '.pt')

In [None]:
input_sizes = [40]
output_sizes = [5,10,20,40]


for output in output_sizes:

    config['output_size'] = output

    for inputs in input_sizes:

        config['input_size'] = inputs    
        
        
        
        state_dict = torch.load(f'models/lstm/output_size{output}input_size{inputs}.pt')
        mod = tm.train_model(config, it_amt=0, model_fnc=ml_models.LSTM, data_dir='data_synthetic')
        mod.load_state_dict(state_dict)

        pinn_mod = decay_pinn(mod)
        
        pinn_mod = train_pinn_1(config, it_amt=100, model=pinn_mod, lambda_ = 1e-5, data_dir = 'data_synthetic')
        
        torch.save(obj = mod.state_dict(), f = 'pinn_models/lstm/'+'output_size' + str(output) + 'input_size' + str(inputs) + '.pt')