In [1]:
%cd /home/ltchen/gnnpp
import sys
import json
from exploration.graph_creation import *
from torch_geometric.loader import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.optim import AdamW
import wandb
from pytorch_lightning.loggers import WandbLogger
from utils.data import (
    load_dataframes,
    load_distances,
    normalize_features_and_create_graphs,
    rm_edges,
    summary_statistics,
)
from models.graphensemble.multigraph import *

/home/ltchen/gnnpp


In [11]:
PROJECTNAME = "reproduce_gnn"

In [2]:
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))
DIRECTORY = os.getcwd()
SAVEPATH = os.path.join(DIRECTORY, "explored_models/gnn_new_attr_24h/models")
JSONPATH = os.path.join(DIRECTORY, "trained_models/best_24h/params.json")

In [3]:
with open(JSONPATH, "r") as f:
    print(f"[INFO] Loading {JSONPATH}")
    args_dict = json.load(f)
config = args_dict
print(config)
print(config['lr'])
print(config['max_dist'])
print(type(config))
print(type(config['lr']))
print(type(config['gnn_hidden']))
'''{"batch_size":8,
"gnn_hidden":265,
"gnn_layers":2,
"heads":8,
"lr":0.0002,
"max_dist":100,
"max_epochs": 31}'''

[INFO] Loading /home/ltchen/gnnpp/trained_models/best_24h/params.json
{'batch_size': 8, 'gnn_hidden': 265, 'gnn_layers': 2, 'heads': 8, 'lr': 0.0002, 'max_dist': 100, 'max_epochs': 31}
0.0002
100
<class 'dict'>
<class 'float'>
<class 'int'>


'{"batch_size":8,\n"gnn_hidden":265,\n"gnn_layers":2,\n"heads":8,\n"lr":0.0002,\n"max_dist":100,\n"max_epochs": 31}'

In [4]:
dataframes = load_dataframes(mode="eval", leadtime= "24h") # load newly created dataframes
dataframes = summary_statistics(dataframes)
dist = load_distances(dataframes["stations"])


#self-created
l_graphs_train_rf, l_tests = normalize_features_and_create_graphs1(df_train=dataframes['train'], df_valid_test=[dataframes['test_rf'], dataframes['test_f']], station_df=dataframes['stations'], attributes=["geo"], edges=[("geo", 100)], sum_stats = True)

l_graphs_test_rf, l_graphs_test_f = l_tests

l_graphs_test = l_graphs_test_rf

#moritz
m_graphs_train_rf, m_tests = normalize_features_and_create_graphs(
    training_data=dataframes["train"],
    valid_test_data=[dataframes["test_rf"], dataframes["test_f"]],
    mat=dist,
    max_dist=config['max_dist'],
)
m_graphs_test_rf, m_graphs_test_f = m_tests

m_graphs_test = m_graphs_test_rf

# print(graphs_test_rf[0].x.shape) (1342, 36)



[INFO] Dataframes exist. Will load pandas dataframes.
[INFO] Calculating summary statistics for train
[INFO] Calculating summary statistics for test_rf
[INFO] Calculating summary statistics for test_f
[INFO] Loading distances from file...
[INFO] Normalizing features...
fit_transform
transform 1
transform 2
[INFO] Converting temperature values...


100%|██████████| 3448/3448 [00:16<00:00, 204.35it/s]
100%|██████████| 732/732 [00:02<00:00, 271.53it/s]
100%|██████████| 730/730 [00:02<00:00, 254.49it/s]


[INFO] Normalizing features...
[INFO] Creating graph data...


In [5]:
l_train_loader = DataLoader(l_graphs_train_rf, batch_size=config['batch_size'], shuffle=True)
m_train_loader = DataLoader(m_graphs_train_rf, batch_size=config['batch_size'], shuffle=True)

In [7]:
print(l_graphs_train_rf[0])
print(m_graphs_train_rf[0])

graph1 = l_graphs_train_rf[0]
graph2 = m_graphs_train_rf[0]

print(type(graph1))
print(type(graph2))
comparison = np.array(graph1.x) == np.array(graph2.x)
diff_indices = np.where(comparison == False)[0]
print(np.array(graph1.x) ==np.array(graph2.x))
print(diff_indices)
print("a[diff]:", graph1.x[diff_indices])
print("b[diff]:", graph2.x[diff_indices])
print(np.allclose(graph1.x, graph2.x, atol=1e-1))

Data(x=[122, 65], edge_index=[2, 1420], edge_attr=[1420, 1], y=[122, 1], pos=[122, 2], timestamp=1997-01-02 00:00:00, n_idx=[122])
Data(x=[122, 65], edge_index=[2, 1420], edge_attr=[1420, 1], y=[122], timestamp=1997-01-02 00:00:00, n_idx=[122])
<class 'torch_geometric.data.data.Data'>
<class 'torch_geometric.data.data.Data'>
[[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]]
[  1   4   8  12  13  13  14  15  15  16  17  21  27  30  30  30  31  32
  33  34  34  34  35  35  35  36  36  37  38  39  40  40  41  41  42  42
  43  43  43  43  44  44  44  44  45  47  47  48  48  49  50  51  51  52
  55  56  56  56  56  57  59  60  61  61  62  62  63  63  66  67  67  68
  69  70  70  71  71  72  73  73  74  74  74  75  76  77  77  77  78  78
  79  80  83  84  88  89  90  91  92  93  94

In [8]:
emb_dim=20
# edge_dim=l_graphs_test_f[0].num_edge_features
edge_dim = 1
in_channels = m_graphs_train_rf[0].x.shape[1] + emb_dim - 1

In [12]:
with wandb.init(
    project=PROJECTNAME, id=f"training_run_24h", config=args_dict, tags=["final_training"], reinit=True
):
    config = wandb.config

    multigraph = Multigraph(
        embedding_dim=emb_dim,
        # edge_dim=edge_dim,
        in_channels=in_channels,
        hidden_channels_gnn=config['gnn_hidden'],
        out_channels_gnn=config['gnn_hidden'],
        num_layers_gnn=config['gnn_layers'],
        heads=config['heads'],
        hidden_channels_deepset=config['gnn_hidden'],
        optimizer_class=AdamW,
        optimizer_params=dict(lr=config['lr']),
    )
    torch.compile(multigraph)

    # understand what this is
    batch = next(iter(m_train_loader))
    # batch = batch  # .to("cuda")
    # multigraph  # .to("cuda")
    multigraph.forward(batch)

    wandb_logger = WandbLogger(project=PROJECTNAME)
    checkpoint_callback = ModelCheckpoint(
        dirpath=SAVEPATH, filename=f"run_24h", monitor="train_loss", mode="min", save_top_k=1
    )

    # print("[INFO] Training model...")
    trainer = L.Trainer(
        max_epochs=config['max_epochs'],
        log_every_n_steps=1,
        accelerator="gpu",
        devices=1,
        enable_progress_bar=True,
        logger=wandb_logger,
        callbacks=checkpoint_callback,
    )

    trainer.fit(model=multigraph, train_dataloaders=m_train_loader) # trainer speichern und entweder neuladen oder
wandb.finish()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /home/ltchen/gnnpp/explored_models/gnn_new_attr_24h/models exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name        | Type              | Params | Mode 
----------------------------------------------------------
0 | encoder  

Epoch 30: 100%|██████████| 431/431 [00:13<00:00, 31.56it/s, v_num=_24h, train_loss_step=2.170, train_loss_epoch=1.360]

`Trainer.fit` stopped: `max_epochs=31` reached.


Epoch 30: 100%|██████████| 431/431 [00:13<00:00, 30.91it/s, v_num=_24h, train_loss_step=2.170, train_loss_epoch=1.360]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇██
train_loss_epoch,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▄▃▃▂▂▁▂▁▁▁▂▁▂▁▂▁▁▂▁▁▂▂▁▁▂▂▂▁▁▁▁▁▁▂▂▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇██

0,1
epoch,30.0
train_loss_epoch,1.3614
train_loss_step,2.17139
trainer/global_step,13360.0


In [20]:
emb_dim=20
# edge_dim=l_graphs_test_f[0].num_edge_features
edge_dim = 1
in_channels = m_graphs_train_rf[0].x.shape[1] + emb_dim - 1

tensor([[ 0.0000e+00, -7.3829e-01, -7.6410e-01,  ...,  6.1836e-01,
          9.9941e-01,  3.4422e-02],
        [ 1.0000e+00, -7.3700e-01, -7.7860e-01,  ...,  1.5635e+00,
          9.9941e-01,  3.4422e-02],
        [ 2.0000e+00, -7.3185e-01, -7.3317e-01,  ...,  7.6520e-01,
          9.9941e-01,  3.4422e-02],
        ...,
        [ 1.1900e+02,  2.3386e+00,  6.4645e-01,  ..., -6.1813e-01,
          9.9941e-01,  3.4422e-02],
        [ 1.2000e+02,  4.7996e+00,  3.9940e+00,  ..., -6.0907e-01,
          9.9941e-01,  3.4422e-02],
        [ 1.2100e+02,  5.9139e+00,  4.3452e+00,  ..., -4.7565e-01,
          9.9941e-01,  3.4422e-02]])

TypeError: 'method' object is not iterable