In [1]:
import torch
import einops
import torch.nn.functional as F
import numpy as np
from torch_geometric.data import Data
from tqdm import tqdm
import pandas as pd

import lovely_tensors as lt
lt.monkey_patch()



In [2]:
dataframe = pd.read_parquet('data/df_optimized_6.parquet', engine='pyarrow')
dataframe

Unnamed: 0,scenario,time,PNode_1,PNode_2,PNode_3,PNode_4,PNode_5,PNode_6,PNode_7,PNode_8,...,fLink_28,fLink_29,fLink_30,fLink_31,fLink_32,fLink_33,fLink_34,leak,location_1,location_2
0,1,1,0.0,69.887001,66.305000,66.033997,65.552002,65.267998,65.247002,65.165001,...,28.799999,100.800003,46.799999,-7.2,-72.000000,90.000000,223.199997,0,0,0
1,1,2,0.0,69.908997,67.016998,66.791000,66.386002,66.151001,66.132004,66.070999,...,25.200001,90.000000,50.400002,-10.8,-68.400002,82.800003,208.800003,0,0,0
2,1,3,0.0,69.930000,67.703003,67.528000,67.216003,67.033997,67.019997,66.972000,...,21.600000,79.199997,43.200001,-3.6,-61.200001,72.000000,187.199997,0,0,0
3,1,4,0.0,69.938004,67.961998,67.810997,67.542000,67.384003,67.373001,67.333000,...,21.600000,72.000000,39.599998,-7.2,-57.599998,68.400002,165.600006,0,0,0
4,1,5,0.0,69.947998,68.299004,68.171997,67.944000,67.810997,67.801003,67.765999,...,25.200001,64.800003,36.000000,-7.2,-50.400002,61.200001,151.199997,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17519995,334,17212,0.0,69.763000,66.357002,65.560997,65.139000,64.942001,64.849998,64.791000,...,129.600006,201.600006,129.600006,14.4,-79.199997,108.000000,331.200012,0,0,0
17519996,334,17213,0.0,69.779999,66.610001,65.889000,65.504997,65.321999,65.235001,65.179001,...,136.800003,201.600006,126.000000,14.4,-79.199997,108.000000,309.600006,0,0,0
17519997,334,17214,0.0,69.814003,67.151001,66.530998,66.203003,66.049004,65.973999,65.926003,...,118.800003,176.399994,108.000000,14.4,-64.800003,90.000000,295.200012,0,0,0
17519998,334,17215,0.0,69.844002,67.603996,67.089996,66.818001,66.691002,66.629997,66.593002,...,111.599998,162.000000,93.599998,14.4,-61.200001,86.400002,259.200012,0,0,0


In [3]:
node_features = dataframe[filter(lambda x: x.startswith("PNode"), dataframe.columns)]
print(node_features)
node_features = torch.tensor(node_features.to_numpy())
print(node_features)

          PNode_1    PNode_2    PNode_3    PNode_4    PNode_5    PNode_6  \
0             0.0  69.887001  66.305000  66.033997  65.552002  65.267998   
1             0.0  69.908997  67.016998  66.791000  66.386002  66.151001   
2             0.0  69.930000  67.703003  67.528000  67.216003  67.033997   
3             0.0  69.938004  67.961998  67.810997  67.542000  67.384003   
4             0.0  69.947998  68.299004  68.171997  67.944000  67.810997   
...           ...        ...        ...        ...        ...        ...   
17519995      0.0  69.763000  66.357002  65.560997  65.139000  64.942001   
17519996      0.0  69.779999  66.610001  65.889000  65.504997  65.321999   
17519997      0.0  69.814003  67.151001  66.530998  66.203003  66.049004   
17519998      0.0  69.844002  67.603996  67.089996  66.818001  66.691002   
17519999      0.0  69.867996  67.966003  67.536003  67.308998  67.200996   

            PNode_7    PNode_8    PNode_9   PNode_10  ...   PNode_23  \
0         65.24

In [4]:
edge_attr = dataframe[filter(lambda x: x.startswith("fLink"), dataframe.columns)]
print(edge_attr)
edge_attr = torch.tensor(edge_attr.to_numpy())
print(edge_attr)

              fLink_1      fLink_2      fLink_3      fLink_4      fLink_5  \
0         3337.199951  3186.000000  1245.599976  1220.400024  1119.599976   
1         2973.600098  2836.800049  1130.400024  1112.400024  1011.599976   
2         2584.800049  2462.399902   982.799988   964.799988   882.000000   
3         2419.199951  2311.199951   907.200012   892.799988   813.599976   
4         2196.000000  2095.199951   828.000000   817.200012   741.599976   
...               ...          ...          ...          ...          ...   
17519995  5529.600098  5281.200195  2156.399902  2124.000000  1911.599976   
17519996  5317.200195  5079.600098  2044.800049  2016.000000  1839.599976   
17519997  4849.200195  4626.000000  1886.400024  1854.000000  1674.000000   
17519998  4406.399902  4212.000000  1702.800049  1677.599976  1504.800049   
17519999  4032.000000  3855.600098  1548.000000  1522.800049  1382.400024   

              fLink_6      fLink_7      fLink_8     fLink_9    fLink_10  ..

In [6]:
scenario = torch.tensor(dataframe[["scenario"]].to_numpy())

In [7]:
y_loc = dataframe[["location_1", "location_2"]].to_numpy().astype(np.int64)
y_loc = torch.tensor(y_loc)
y_loc = F.one_hot(y_loc[:, 0])[:, 1:] + F.one_hot(y_loc[:, 1])[:, 1:]

In [8]:
edge_index = pd.read_csv("edge_index.csv")
edge_index = torch.tensor(edge_index[["Node1", "Node2"]].to_numpy() - 1)

In [9]:
torch_dataset = {
    "scenario": scenario,
    "node_features": node_features,
    "edge_attr": edge_attr,
    "edge_index": edge_index,
    "y": y_loc
}

In [10]:
torch.save(torch_dataset, "data/graph_data_torch.pt")