Load data

In [1]:
import sys
sys.path.insert(1, '/home/mlfm/tam/python_graph_utilities_v2/Codes/')
import torch
from dataset import OneDDatasetBuilder, OneDDatasetLoader
from trainV1 import train, eval
from plot import *
from preprocessing import dataset_split_to_loader
from networks_lstm import PARC
import os

os.environ["CUDA_VISIBLE_DEVICES"]="4"
CUDA_LAUNCH_BLOCKING=1
torch.cuda.empty_cache()

In [2]:
# define arguments
class objectview(object):
    def __init__(self, d) -> None:
        self.__dict__ = d
    def setattr(self, attr_name, attr_value):
        self.__dict__[attr_name] = attr_value

args = objectview(d={
    'total_time': 4.8,
    'n_times': 201,
    'batch_size': 1000,
    'batch_n_times': 50,
    'batch_step': 1,
    'batch_recursive': True,
    'device': torch.device("cuda:1" if torch.cuda.is_available() else "cpu"),
    'lr': 5e-7,
    'weight_decay': 5e-4,
    'epoch': 100,
    'criterion': torch.nn.MSELoss(),
    'n_hiddenfields': 64,
    'n_hiddens': 10
})

In [3]:
# Build a dataset
# dataset = OneDDatasetBuilder(
#     raw_dir='/data1/tam/datasets',
#     root_dir='/data1/tam/downloaded_datasets_new',
#     data_names='all',
#     time_names=[str(i).zfill(3) for i in range(201)],
#     data_type=torch.float32
# )

In [4]:
# Load to normalize and batch
dataset = OneDDatasetLoader(
    root_dir='/data1/tam/downloaded_datasets_new',
    sub_dir='normed_and_batched',
    data_names='all',
    time_names=[str(i).zfill(3) for i in range(201)]
)

# normalized_dataset = dataset.normalizing(
#     sub_dir='normalized',
#     scalers = {
#         'node_attr' : ['minmax_scaler', 0],
#         'edge_attr' : ['quantile_transformer', 0],
#         'pressure' : ['quantile_transformer', None],
#         'flowrate' : ['quantile_transformer', None]
#     }
# )

# batched_dataset = normalized_dataset.batching(
#     batch_size = args.batch_size,
#     batch_n_times = args.batch_n_times, 
#     recursive = args.batch_recursive, 
#     sub_dir='/normed_and_batched', 
#     step=args.batch_step
# )

Train

In [5]:
## Prepare data
train_loader, test_loader = dataset_split_to_loader(
    dataset = dataset,
    subset_ids = {
        'train': list(range(0, 10)),
        'test': list(range(20, 30))
    },
    n_datas_per_batch = 6
)
train_loader, test_loader

(<torch_geometric.loader.dataloader.DataLoader at 0x7fd9f6d3c610>,
 <torch_geometric.loader.dataloader.DataLoader at 0x7fd9f6765250>)

In [6]:
## Prepare model
_test_data = dataset[0]
model = PARC(
    n_fields=2,
    n_timesteps=_test_data.pressure.size(1),
    n_meshfields=(_test_data.node_attr.size(1),_test_data.edge_attr.size(1)), # Tuple(n_node_fields, n_mesh_fields)
    n_bcfields=1,
    n_hiddenfields=args.n_hiddenfields,
    n_hiddens=args.n_hiddens
).to(args.device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
args.setattr(attr_name='optimizer', attr_value=optimizer)

In [7]:
model.derivative_solver.node_encoder

Sequential(
  (0): Linear(in_features=3, out_features=64, bias=True)
  (1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  (2): ReLU()
  (3): Linear(in_features=64, out_features=64, bias=True)
)

In [8]:
# Training
total_train_loss = []
total_eval_loss = []
# batch = enumerate(list(range(0,10)))
for epoch in range(args.epoch):
    torch.cuda.empty_cache()
    train_loss = 0
    # for data in train_dataset:
    for i in range(train_loader.__len__()):
        data = next(iter(train_loader))
        train_loss += train(model=model, data=data, args=args)

    train_loss /= train_loader.__len__() # len(train_dataset)
    total_train_loss.append(train_loss)

    eval_loss = 0
    # for data in eval_dataset:
    for i in range(test_loader.__len__()):
        data = next(iter(test_loader))
        eval_loss += eval(model=model, data=data, args=args)
    eval_loss /= test_loader.__len__() #len(eval_dataset)
    total_eval_loss.append(eval_loss)
    
    # if (epoch > 25):
    #     args.train_lambda = 0.5
    print(f'Epoch {epoch}: train loss = {train_loss}; eval loss = {eval_loss}')
    if (epoch+1) % 25 == 0:
        torch.save(model.state_dict(), f'models/parc_v1_epoch{epoch+1}.pth')

Epoch 0: train loss = 602.2242705059535; eval loss = 139.743755050117
Epoch 1: train loss = 118.87491770323157; eval loss = 223.91126038217303
Epoch 2: train loss = 255.6846462578943; eval loss = 194.48642327942824
Epoch 3: train loss = 97.43796755214633; eval loss = 45.207445164017265
Epoch 4: train loss = 51.64960468117961; eval loss = 73.37836138730121
Epoch 5: train loss = 73.07506272756508; eval loss = 58.69086557959542
Epoch 6: train loss = 34.051194994582744; eval loss = 22.435280514247527
Epoch 7: train loss = 26.20055476057953; eval loss = 29.937368083121207
Epoch 8: train loss = 34.00693237963062; eval loss = 31.08453346387989
Epoch 9: train loss = 19.953155670069197; eval loss = 10.980107007292927
Epoch 10: train loss = 8.77344737561221; eval loss = 8.126261771632935
Epoch 11: train loss = 7.928302824194661; eval loss = 8.972786363611366
Epoch 12: train loss = 7.999344060868781; eval loss = 8.527865625274968
Epoch 13: train loss = 7.867796152376282; eval loss = 8.37850514281

In [None]:
import matplotlib.pyplot as plt
plt.plot(total_train_loss[:100])
plt.plot(total_eval_loss[:100])
# plt.ylim(0,10)
plt.show()

In [None]:
# Load to evaluate
dataset = OneDDatasetLoader(
    root_dir='/data1/tam/downloaded_datasets_new',
    sub_dir='normalized',
    data_names='all',
    time_names=[str(i).zfill(3) for i in range(201)]
)

data = dataset[0]

model = PARC(
    n_fields=2,
    n_timesteps=data.pressure.size(1),
    n_meshfields=(data.node_attr.size(1),_test_data.edge_attr.size(1)), # Tuple(n_node_fields, n_mesh_fields)
    n_bcfields=1,
    n_hiddenfields=args.n_hiddenfields,
    n_hiddens=args.n_hiddens
).to(args.device)
model.load_state_dict(torch.load(
    'models/parc_v1_epoch50.pth',
    map_location={'cuda:1': 'cuda:0'}
))




model.n_timesteps = data.pressure.size(1)
timestep = args.total_time / model.n_timesteps

## Connectivity/edge_index: Tensor(2, n_edges)
edge_index = torch.cat([
    data.edge_index, 
    torch.flip(data.edge_index, dims=[0]
)], dim=1).to(args.device)

## Mesh features: Tuple(NodeTensor, EdgeTensor)
mesh_features = (
    data.node_attr.to(args.device).float(),            
    torch.cat([data.edge_attr.to(args.device).float()]*2,dim=0)
)

## Fields tensor(pressure, flowrate): Tensor(n_nodes, n_times, n_fields)
F_initial = torch.cat([
    data.pressure[:,0].unsqueeze(1), 
    data.flowrate[:,0].unsqueeze(1)
], dim=-1).to(args.device).float() # concat pressure and flowrate

## Boundary value tensor: Tensor(n_nodes, n_times)
F_bc = torch.zeros((data.number_of_nodes, model.n_timesteps))
F_bc[0,:] = data.flowrate[0,:]
F_bc = F_bc.to(args.device).float()

## Predict output
with torch.no_grad():
    Fs, F_dots = model(
        F_initial=F_initial, 
        mesh_features=mesh_features, 
        edge_index=edge_index, 
        F_boundary=F_bc, 
        timestep=timestep
    )

    ## Ground truth Fields tensor(pressure, flowrate): Tensor(n_nodes, 1:n_times, n_fields)
    Fs_hat = torch.cat([
        data.pressure.unsqueeze(-1), 
        data.flowrate.unsqueeze(-1)
    ], dim=-1).to(args.device).float() # concat pressure and flowrate
    Fs_hat = Fs_hat[:,1:,:]
    
    ## Ground truth Fields time derivative tensor

import matplotlib.pyplot as plt


for i_node in [1000, 2000, 10000]:
    i_field = 1
    y_pred = Fs.cpu().numpy()[i_node,:,i_field]
    y_true = Fs_hat.cpu().numpy()[i_node,:,i_field]
    x = [i * 4.0 /200 for i in range(y_pred.shape[0])]
    # print(data.node_attr.numpy()[i_node, 6])
    # plt.ylim(-1,1)
    plt.plot(x, y_pred, c='red', label='GNN Crank-Nicolson')
    plt.plot(x, y_true, c='blue', linestyle='dashdot', label='ground_truth')
    # plt.ylim([-1,1])
    plt.legend(loc='upper right')
    plt.ylabel('Flowrate', fontsize=20)
    plt.xlabel('Time', fontsize=20)
    plt.show()