Notes from https://github.com/Junyoungpark/GNNAtoZ 

# 0. Bring Your Own Graphs

In [None]:
## Instantiating a graph  
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
G = nx.Graph() # instantiate an empty container for handling graph
G.add_node(1) # Add node in the graph
G.add_node(2) # Add node in the graph
G.add_node(3) # Add node in the graph

G.add_edge(1,2)
G.add_edge(2,3)

nx.draw(G)

In [None]:
## DGL

import dgl
import torch

In [None]:
# edges 0->1, 0->2, 0->3, 1->3
u, v = torch.tensor([0, 0, 0, 1]), torch.tensor([1, 2, 3, 3])
g = dgl.graph((u, v))
print(g)

In [None]:
g = dgl.graph((u, v), num_nodes=8)
print(g)

In [None]:
node_feat_dim = 7 # the node feature dim
edge_feat_dim = 3 # the edge feature dim

g.ndata['feat'] = torch.randn(g.number_of_nodes(), node_feat_dim)
g.edata['feat'] = torch.randn(g.number_of_edges(), edge_feat_dim)
print(g)

In [None]:
def plot_dgl_graph(g):
    G = dgl.to_networkx(g)
    plt.figure(figsize=[10,5])
    options = {
        'node_color': 'blue',
        "edge_color": "red",
        'node_size': 20,
        'width': 1,
    }
    # pos = nx.spring_layout(g)
    # nx.draw_networkx(g, pos, **options)
    nx.draw(G, **options)
    plt.show()

In [None]:
plot_dgl_graph(g)

# 1. Message Passing on DGL

In [None]:
import dgl
import torch
import torch.nn as nn

In [None]:
u, v = torch.tensor([0, 0, 0, 1]), torch.tensor([1, 2, 3, 3])
g = dgl.graph((u, v), num_nodes=8)
g = dgl.add_self_loop(g)

node_feat_dim = 32
edge_feat_dim = 3

g.ndata["feat"] = torch.randn(g.number_of_nodes(), node_feat_dim)
g.edata["feat"] = torch.randn(g.number_of_edges(), edge_feat_dim)

print(g)

In [None]:
class MessagePassingGCN(nn.Module):
    def __init__(self, input_dim: int, output_dim: int):
        super(MessagePassingGCN, self).__init__()
        self.linear = nn.Linear(in_features=input_dim, out_features=output_dim, bias=False)

    def forward(self, g, nf):
        g = g.local_var() # make a local graph
        z = self.linear(nf)
        g.ndata["z"] = z

        g.pull(v=g.nodes(), message_func=self.message_func, reduce_func=self.reduce_func)
        
        # The operations happens in `g.pull`
        # 1. Message generation: In this case, the message is "source node feature."
        # 2. 'Push' the message to the destination nodes
        # 3. 'Reduce' the messages from the destination nodes
        # 4. Perform node update: In this case, we don't do.
        
        # For further details, refer to the dgl's API documents. 
        return g.ndata["h"]

    def message_func(self, edges):
        return {"z": edges.src["z"]}

    def reduce_func(self, nodes):
        return {"h": torch.sum(nodes.mailbox["z"], dim=1)}


In [None]:
gc_out_dim = 256
gc = MessagePassingGCN(node_feat_dim, gc_out_dim)

In [None]:
h_updated = gc(g, g.ndata['feat'])
print(h_updated.shape)

In [None]:
%%timeit
h_updated = gc(g, g.ndata['feat'])

In [None]:
# A slightly optimized verision of the simple GCN with dgl.function

class MessagePassingGCN(nn.Module):
    def __init__(self, input_dim: int, output_dim: int):
        super(MessagePassingGCN, self).__init__()
        self.linear = nn.Linear(in_features=input_dim, out_features=output_dim, bias=False)
        self.message_func = dgl.function.copy_src("z", "z")
        self.reduce_func = dgl.function.sum("z", "h")

    def forward(self, g, nf):
        g = g.local_var()
        z = self.linear(nf)
        g.ndata["z"] = z

        g.pull(v=g.nodes(), message_func=self.message_func, reduce_func=self.reduce_func)
        return g.ndata["h"]


In [None]:
gc = MessagePassingGCN(node_feat_dim, gc_out_dim)

In [None]:
h_updated = gc(g, g.ndata['feat'])
print(h_updated.shape)

In [None]:
%%timeit
h_updated = gc(g, g.ndata['feat'])

# 2. Implement Famous GNNs

In [None]:
import dgl
import torch
import dgl.nn.pytorch.conv as dglconv

In [None]:
u, v = torch.tensor([0, 0, 0, 1]), torch.tensor([1, 2, 3, 3])
g = dgl.graph((u, v), num_nodes=8)
g = dgl.add_self_loop(g)

node_feat_dim = 32
edge_feat_dim = 3

g.ndata['feat'] = torch.randn(g.number_of_nodes(), node_feat_dim)
g.edata['feat'] = torch.randn(g.number_of_edges(), edge_feat_dim)

In [None]:
gc_out_dim = 16

gc = dglconv.GraphConv(in_feats=node_feat_dim, out_feats=gc_out_dim)

In [None]:
h_updated = gc(g, g.ndata['feat'])

In [None]:
# Graph Attention Networks
gat = dglconv.GATConv(in_feats=node_feat_dim,
                      out_feats=gc_out_dim,
                      num_heads=4)

In [None]:
h_updated_gat = gat(g, g.ndata['feat'])

In [None]:
h_updated_gat.shape # [#.nodes x #. attn head x # out dim]

In [None]:
g.ndata['feat']

In [None]:
# mini batch

batched_g = dgl.batch([g, g])

In [None]:
print("Number of graphs in the batched graphs : {} \n".format(batched_g.batch_size))

print("Total number of nodes : {}".format(batched_g.num_nodes()))
print("Total number of edges : {} \n".format(batched_g.num_edges()))
n_nodes = [i.item() for i in batched_g.batch_num_nodes()]
n_edges = [i.item() for i in batched_g.batch_num_edges()]

print("Per graph number of nodes : {}".format(n_nodes))
print("Per graph number of edges : {} \n".format(n_edges))

In [None]:
h_updated_batched = gc(batched_g, batched_g.ndata['feat'])
print(h_updated_batched.shape) # take a look at that the first dimension of output is now doubled.

In [None]:
# Graph Readouts

readout = h_updated.sum(dim=0)
print(readout.shape)
print(readout)

In [None]:
g.ndata['updated_h'] = h_updated
readout = dgl.readout_nodes(g, 'updated_h', op='sum')

In [None]:
print(readout.shape)
print(readout)

# 3. How to train GraphSAGE

In [None]:
import torch
import dgl
import torch.nn as nn
import dgl.nn as dglnn
import torch.nn.functional as F

In [None]:
dataset = dgl.data.CiteseerGraphDataset()
graph = dataset[0] # since it only has one graph :)

In [None]:
plot_dgl_graph(graph)

In [None]:
%%latex
\begin{equation}
h=\text{GraphSAGE}^{(2)}(\text{ReLU}((\text{GraphSAGE}^{(1)}(\mathcal{G}, X))))
\end{equation}

In [None]:
class SAGE(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats):
        super().__init__()
        self.conv1 = dglnn.SAGEConv(in_feats, hid_feats, aggregator_type="mean")
        self.conv2 = dglnn.SAGEConv(hid_feats, out_feats, aggregator_type="mean")

    def forward(self, g, inputs):
        h = self.conv1(g, inputs)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

In [None]:
graph.ndata.keys()

In [None]:
node_features = graph.ndata["feat"]
node_labels = graph.ndata["label"]
train_mask = graph.ndata["train_mask"]
val_mask = graph.ndata["val_mask"]
test_mask = graph.ndata["test_mask"]

n_features = node_features.shape[1]
n_labels = len(node_labels.unique())

In [None]:
def evaluate(model, graph, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(graph, features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)

In [None]:
model = SAGE(in_feats=n_features, hid_feats=100, out_feats=n_labels)
opt = torch.optim.Adam(model.parameters())

for epoch in range(100):
    model.train()
    # forward prop by using all nodes
    logits = model(graph, node_features)
    # compute loss
    loss = F.cross_entropy(logits[train_mask], node_labels[train_mask])
    
    # compute train & validation accuracy
    train_acc = evaluate(model, graph, node_features, node_labels, train_mask)
    val_acc = evaluate(model, graph, node_features, node_labels, val_mask)
    # backward prop
    opt.zero_grad()
    loss.backward()
    opt.step()
    print("Epoch {:05d} | Loss {:.4f} | Train Acc {:.4f} | Val Acc {:.4f}".format(
        epoch, loss.item(), train_acc, val_acc))

In [None]:
model(graph, node_features)

In [None]:
logits = model(graph, node_features)
logits

In [None]:
logits.max()

# Data

In [37]:
import dgl

In [38]:
import pandas as pd
df = pd.read_parquet("data/wind/2019-01-24_outlier_removed.parquet")

In [39]:
df = df[~df["rt_plant_id"].isin([2397, 2420, 2538])]
df

Unnamed: 0,epoch,production,rt_plant_id,forecast_dt,UGRD_80.m.above.ground.SW,UGRD_80.m.above.ground.NW,UGRD_80.m.above.ground.NE,UGRD_80.m.above.ground.SE,VGRD_80.m.above.ground.SW,VGRD_80.m.above.ground.NW,VGRD_80.m.above.ground.NE,VGRD_80.m.above.ground.SE,production_lower,production_upper,is_outlier,production_cleaned
0,1548288000,10.0,1503,2019-01-24 03:00:00+03:00,-2.381990,-1.811990,-1.301990,-1.89199,17.66920,15.35920,15.56920,10.63920,0.0,10.0,False,10.0
1,1548291600,10.0,1503,2019-01-24 04:00:00+03:00,-3.177610,-2.647610,-2.297610,-3.02761,16.50070,14.29070,14.46070,10.24070,0.0,10.0,False,10.0
2,1548295200,9.0,1503,2019-01-24 05:00:00+03:00,-3.593300,-3.043300,-2.573300,-3.40330,15.63730,13.43730,13.17730,9.39729,0.0,10.0,False,9.0
3,1548298800,2.7,1503,2019-01-24 06:00:00+03:00,-4.027460,-3.627460,-2.697460,-3.66746,14.39660,13.14660,11.37660,8.86664,0.0,10.0,False,2.7
4,1548302400,1.3,1503,2019-01-24 07:00:00+03:00,-4.512970,-4.192970,-2.722970,-3.82297,13.09370,12.96370,9.64367,8.58367,0.0,10.0,False,1.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2650409,1643299200,20.0,2374,2022-01-27 19:00:00+03:00,-0.014146,-0.894145,0.225854,-1.14415,-9.65097,-5.86097,-5.42097,-4.08097,0.0,20.0,False,20.0
2650410,1643302800,18.5,2374,2022-01-27 20:00:00+03:00,0.542844,-0.977156,-0.307156,-1.18716,-9.24466,-5.59466,-4.40466,-3.31466,0.0,20.0,False,18.5
2650411,1643306400,18.1,2374,2022-01-27 21:00:00+03:00,-0.170332,-1.980330,-0.790332,-1.39033,-9.34740,-5.69740,-3.84740,-3.20740,0.0,20.0,False,18.1
2650412,1643310000,13.7,2374,2022-01-27 22:00:00+03:00,-0.388333,-2.448330,-0.778333,-1.53833,-8.65053,-5.44053,-3.42053,-2.99053,0.0,20.0,False,13.7


In [40]:
# corr based adjacency

data = df[["forecast_dt", "rt_plant_id", "production"]]
data = pd.pivot_table(data, index="forecast_dt", columns="rt_plant_id", values="production")

In [41]:
THRESHOLD = 0.5
A = abs(data.corr())
A = A[A > THRESHOLD].fillna(0)
A

rt_plant_id,672,749,757,968,969,1194,1459,1460,1470,1472,...,2180,2184,2224,2225,2235,2288,2291,2323,2326,2374
rt_plant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
672,1.000000,0.0,0.818082,0.554406,0.698107,0.720863,0.000000,0.000000,0.000000,0.000000,...,0.647598,0.000000,0.000000,0.752212,0.00000,0.0,0.820361,0.554928,0.526020,0.000000
749,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.00000,0.0,0.000000,0.000000,0.000000,0.000000
757,0.818082,0.0,1.000000,0.625912,0.732977,0.666182,0.000000,0.000000,0.000000,0.000000,...,0.661928,0.000000,0.000000,0.779743,0.00000,0.0,0.782253,0.617477,0.612668,0.000000
968,0.554406,0.0,0.625912,1.000000,0.760793,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.738206,0.000000,0.000000,0.518439,0.00000,0.0,0.538423,0.795903,0.661067,0.554586
969,0.698107,0.0,0.732977,0.760793,1.000000,0.536955,0.000000,0.000000,0.000000,0.000000,...,0.889086,0.000000,0.000000,0.623860,0.00000,0.0,0.684870,0.727144,0.701659,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2288,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.816407,0.502665,0.503858,...,0.000000,0.000000,0.550695,0.000000,0.62266,1.0,0.000000,0.000000,0.000000,0.000000
2291,0.820361,0.0,0.782253,0.538423,0.684870,0.688397,0.000000,0.000000,0.000000,0.000000,...,0.664631,0.000000,0.000000,0.668477,0.00000,0.0,1.000000,0.528145,0.532720,0.000000
2323,0.554928,0.0,0.617477,0.795903,0.727144,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.750917,0.000000,0.000000,0.565051,0.00000,0.0,0.528145,1.000000,0.786605,0.000000
2326,0.526020,0.0,0.612668,0.661067,0.701659,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.707634,0.000000,0.000000,0.594457,0.00000,0.0,0.532720,0.786605,1.000000,0.000000


In [32]:
import scipy.sparse as sp
sp_mx = sp.coo_matrix(A)
G = dgl.from_scipy(sp_mx, eweight_name="weight")

In [36]:
G.adjacency_matrix()
# print(G.edata["weight"])

tensor(indices=tensor([[ 0,  0,  0,  ..., 97, 97, 97],
                       [ 0,  2,  3,  ..., 89, 90, 97]]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]),
       size=(98, 98), nnz=5854, layout=torch.sparse_coo)

In [1]:
import pandas as pd

In [7]:
df1 = pd.read_csv("/home/mert/Desktop/thesis/thesis/data/wind/production_large.csv")
df2 = pd.read_parquet("/home/mert/Desktop/thesis/thesis/data/wind/production_large.parquet")

In [13]:
a = pd.read_csv("/home/mert/Desktop/thesis/thesis/data/wind/ege_farms.csv")[["name", "rt_plant_id"]].drop_duplicates()

In [17]:
df1[df1.name_x != df1.name_y]

Unnamed: 0.1,Unnamed: 0,epoch,production,rt_plant_id,name_x,name_y


In [15]:
df1 = pd.merge(df1, a, on="rt_plant_id")
df1.head()

Unnamed: 0.1,Unnamed: 0,epoch,production,rt_plant_id,name_x,name_y
0,1,1483218000,4.6,1503,BORES,BORES
1,2,1483221600,5.0,1503,BORES,BORES
2,3,1483225200,5.2,1503,BORES,BORES
3,4,1483228800,5.6,1503,BORES,BORES
4,5,1483232400,5.0,1503,BORES,BORES


In [8]:
print(df1.shape, df2.shape)

(4458208, 5) (4458208, 3)


In [None]:
print(df.shape)
nodes = df[~df["rt_plant_id"].isin([2397, 2420, 2538])]
nodes = nodes[["rt_plant_id", "forecast_dt", "production", *[col for col in nodes.columns if col.startswith(("UGRD", "VGRD"))]]]
nodes = nodes.reset_index(drop=True)
print(nodes.shape)
nodes

In [None]:
nodes.groupby("forecast_dt").rt_plant_id.count().plot()

In [None]:
nodes.forecast_dt.nunique()

In [None]:
nodes[nodes["forecast_dt"] == nodes.forecast_dt.unique()[0]]

In [None]:
import torch
import dgl
import torch.nn as nn
import dgl.nn as dglnn
import torch.nn.functional as F

In [None]:
dataset = dgl.data.CiteseerGraphDataset()
graph = dataset[0] # since it only has one graph :)

In [None]:
graph

In [None]:
import dgl
from dgl.data import DGLDataset
import torch
import numpy as np

class WindPowerDataset(DGLDataset):
    def __init__(self):
        super().__init__(name="wind")

    def process(self):
        df = pd.read_parquet("data/wind/2019-01-24_outlier_removed.parquet")    
        df = df[~df["rt_plant_id"].isin([2397, 2420, 2538])]
        assert df.rt_plant_id.nunique() == 98
        plant_mapping = {k:v for k,v in zip(np.sort(df.rt_plant_id.astype(int).unique()), range(98))}
        df.rt_plant_id = df.rt_plant_id.map(plant_mapping)
        
        THRESHOLD = 0.5
        edges_data = df[["forecast_dt", "rt_plant_id", "production"]]
        edges_data = pd.pivot_table(edges_data, index="forecast_dt", columns="rt_plant_id", values="production")
        A = abs(edges_data.corr())
        A = A[A > THRESHOLD].fillna(0)
        long_df = pd.DataFrame()
        for i, row in A.iterrows():
            df_ = pd.DataFrame(list(zip(row.index, row)), columns=["Dst", "Weight"])
            df_ = df_[df_["Weight"] != 0]
            df_ = df_[df_["Dst"] != i]
            df_["Src"] = i
            long_df = long_df.append(df_)
        edges_data = long_df.reset_index(drop=True)[["Src", "Dst", "Weight"]]
        edge_features = torch.from_numpy(edges_data['Weight'].to_numpy())
        edges_src = torch.from_numpy(edges_data['Src'].to_numpy())
        edges_dst = torch.from_numpy(edges_data['Dst'].to_numpy())

        nodes_data = df[["rt_plant_id", "forecast_dt", "production", *[col for col in df.columns if col.startswith(("UGRD", "VGRD"))]]]
        nodes_data = nodes_data.reset_index(drop=True)
        nodes_data = nodes_data[nodes_data["forecast_dt"] == "2019-01-24 03:00:00+03:00"]
        node_features = torch.from_numpy(nodes_data[[col for col in nodes_data.columns if col.startswith(("UGRD", "VGRD"))]].to_numpy())
        node_labels = torch.from_numpy(nodes_data["production"].to_numpy())
        
        self.graph = dgl.graph((edges_src, edges_dst), num_nodes=nodes_data.shape[0])
        self.graph.ndata['feat'] = node_features
        self.graph.ndata['label'] = node_labels
        self.graph.edata['weight'] = edge_features

        
    def __getitem__(self, i):
        return self.graph

    def __len__(self):
        return 1

In [None]:
# https://github.com/dmlc/dgl/blob/master/examples/pytorch/gcn/gcn.py

"""GCN using DGL nn package
References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
"""
import torch
import torch.nn as nn
from dgl.nn.pytorch import GraphConv

class GCN(nn.Module):
    def __init__(self,
                 g,
                 in_feats,
                 n_hidden,
                 n_classes,
                 n_layers,
                 activation,
                 dropout):
        super(GCN, self).__init__()
        self.g = g
        self.layers = nn.ModuleList()
        # input layer
        self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
        # output layer
        self.layers.append(GraphConv(n_hidden, n_classes))
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, features):
        h = features
        for i, layer in enumerate(self.layers):
            if i != 0:
                h = self.dropout(h)
            h = layer(self.g, h)
        return h

In [None]:
import numpy as np
import torch
import torch.nn.functional as F
import dgl
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset

# from gcn import GCN

In [None]:
data = CoraGraphDataset() # len 1, num_nodes=2708, num_edges=10556

g = data[0]

features = g.ndata['feat']
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
in_feats = features.shape[1]
n_classes = data.num_classes
n_edges = g.number_of_edges()
print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
        (n_edges, n_classes,
            train_mask.int().sum().item(),
            val_mask.int().sum().item(),
            test_mask.int().sum().item()))

In [None]:
g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
n_edges = g.number_of_edges()
print(n_edges)

In [None]:
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0

In [None]:
g.ndata['norm'] = norm.unsqueeze(1)

In [None]:
args = {
    "n_hidden": 16,
    "dropout": 0.5,
    "n_layers": 1,
    "lr": 0.01,
    "n_epochs": 1000,
    "weight_decay": 5e-4
}

In [None]:
model = GCN(g,
            in_feats,
            args["n_hidden"],
            n_classes,
            args["n_layers"],
            F.relu,
            args["dropout"])

In [None]:
loss_fcn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    model.parameters(), lr=args["lr"], weight_decay=args["weight_decay"])


In [None]:
import time

def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)


dur = []
for epoch in range(args["n_epochs"]):
    model.train()
    if epoch >= 3:
        t0 = time.time()
    # forward
    logits = model(features)
    loss = loss_fcn(logits[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch >= 3:
        dur.append(time.time() - t0)

    acc = evaluate(model, features, labels, val_mask)
    print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
                                            acc, n_edges / np.mean(dur) / 1000))

print()
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))

In [None]:
dataset = WindPowerDataset()


In [None]:
dataset[0]

In [None]:
graph = dataset[0]

print(graph)

In [None]:
plot_dgl_graph(graph)

In [None]:
nodes_data = nodes_data[nodes_data["forecast_dt"] == "2019-01-24 03:00:00+03:00"]

In [None]:
df = pd.read_parquet("data/wind/2019-01-24_outlier_removed.parquet")    
df = df[~df["rt_plant_id"].isin([2397, 2420, 2538])]


In [None]:
edge_features = torch.from_numpy(edges_data['Weight'].to_numpy())
edges_src = torch.from_numpy(edges_data['Src'].to_numpy())
edges_dst = torch.from_numpy(edges_data['Dst'].to_numpy())


In [None]:
edges_src

In [None]:
node_features = torch.from_numpy(nodes_data['Age'].to_numpy())
node_labels = torch.from_numpy(nodes_data['Club'].astype('category').cat.codes.to_numpy())
edge_features = torch.from_numpy(edges_data['Weight'].to_numpy())
edges_src = torch.from_numpy(edges_data['Src'].to_numpy())
edges_dst = torch.from_numpy(edges_data['Dst'].to_numpy())
