In [1]:
import sys
import os

current_directory = os.getcwd()        
parent_directory = os.path.dirname(current_directory)
sys.path.append(parent_directory)

import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv
from torch_geometric.utils import dense_to_sparse, add_self_loops

import math

from preprocess.BaselinePrerocess import baseline_process
from preprocess.GraphTransformerPrerocess import graph_water_transformer_cov_process_for_gate_predictor
from preprocess.graph import graph_topology_5


In [2]:
n_hours = 72
K = 24 
masked_value = 1e-10
split_1 = 0.8
split_2 = 0.9
sigma2 = 0.1
epsilon = 0.5

In [3]:
train_X_mask, val_X_mask, test_X_mask, \
train_ws_y, val_ws_y, test_ws_y, \
scaler, ws_scaler = baseline_process(n_hours, K, masked_value, split_1, split_2)

In [4]:
train_cov, val_cov, test_cov, \
train_tws_reshape, val_tws_reshape, test_tws_reshape, \
train_gate_pump_y, val_gate_pump_y, test_gate_pump_y, \
train_ws_y, val_ws_y, test_ws_y, \
scaler, ws_scaler, gate_scalar = graph_water_transformer_cov_process_for_gate_predictor(n_hours, K, masked_value, split_1, split_2)

Index(['WS_S1', 'WS_S4', 'FLOW_S25A', 'GATE_S25A', 'HWS_S25A', 'TWS_S25A',
       'FLOW_S25B', 'GATE_S25B', 'GATE_S25B2', 'HWS_S25B', 'TWS_S25B',
       'PUMP_S25B', 'FLOW_S26', 'GATE_S26_1', 'GATE_S26_2', 'HWS_S26',
       'TWS_S26', 'PUMP_S26', 'MEAN_RAIN'],
      dtype='object')
train_tws/val_tws/test_tws: (77069, 5, 72) (9634, 5, 72) (19268, 5, 72) 
 train_cov/val_cov/test_cov: (77069, 96, 12) (9634, 96, 12) (19268, 96, 12) 
 train_ws_y/val_ws_y/test_ws_y: (77069, 96) (9634, 96) (19268, 96) 
  train_gate_pump_y/val_gate_pump_y/test_gate_pump_y: (77069, 24, 7) (9634, 24, 7) (19268, 24, 7)


In [5]:
train_adj_mat, val_adj_mat, test_adj_mat = graph_topology_5(n_hours, K, sigma2, epsilon, len(train_ws_y), len(val_ws_y), len(test_ws_y))

node_indices: [0 0 0 0 1 1 2 2 3 3 4 4] 
neighbor_indices: [1 2 3 4 0 2 0 1 0 4 0 3]
number of nodes: 5, number of edges: 12


In [6]:
# ===== model parameters ======
head_size = 96*2
num_heads = 3
ff_dim = 96
num_transformer_blocks = 1
dropout = 0.5
atte_reg = 1e-2
l1_reg = 1e-5
l2_reg = 1e-5
gcn_unit1 = 32
gcn_unit2 = 16
lstm_units = 32
learning_rate = 5e-4
decay_steps = 10000
decay_rate = 0.95
PATIENCE = 500
EPOCHS = 3000
BATCH = 512

input_shape = train_cov.shape[1:]
print(input_shape)

(96, 12)


In [7]:
# output = model(input)
# loss = criterion(output, target)
# loss = loss + torch.norm(model.layer.weight, p=2)

In [8]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [25]:
class TransformerEncoder(nn.Module):
    def __init__(self, input_shape, num_heads, dropout, epsilon):
        super(TransformerEncoder, self).__init__()

        self.MultAtten = nn.MultiheadAttention(
            embed_dim=input_shape[0],
            num_heads=num_heads,
            dropout=dropout,
        )
        self.Dropout1 = nn.Dropout(dropout)
        self.layer_norm1 = nn.LayerNorm(input_shape[::-1], epsilon)

        self.conv1d_1 = nn.Conv1d(input_shape[0], ff_dim, 3, 1, 1)
        self.Dropout2 = nn.Dropout(dropout)
        self.act1 = nn.ReLU()

        self.conv1d_2 = nn.Conv1d(ff_dim, input_shape[-1], 3, 1, 1)
        self.act2 = nn.ReLU()
        self.layer_norm2 = nn.LayerNorm(input_shape[::-1], epsilon)
    def forward(self, inputs):
        print(inputs.shape)
        inputs = inputs.permute(0, 2, 1)
        x, _ = self.MultAtten(inputs, inputs, inputs)
        x = self.Dropout1(x)
        res = x + inputs
        x = self.layer_norm1(res)

        print(x.shape)
        # Feed Forward Part
        x = x.permute(0, 2, 1)
        x  = self.conv1d_1(x)
        x = self.Dropout2(x)
        x = self.act1(x)
        # print(x.shape)

        x = self.conv1d_2(x)
        x = self.act2(x)
        x = self.layer_norm2(res)
        res = x + res
        res = res.permute(0, 2, 1)
        return res

inputs = torch.rand([1, 96, 12])
model = TransformerEncoder(input_shape, num_heads, dropout, epsilon)
cov = model(inputs)
print(cov.shape)

torch.Size([1, 96, 12])
torch.Size([1, 12, 96])
torch.Size([1, 96, 12])


In [22]:
count_parameters(model)

73068

In [23]:
from torch.nn.parameter import UninitializedParameter
import torch
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Name", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad:
            continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params += params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

model.forward(inputs)

total_params = count_parameters(model)
print(total_params)


torch.Size([1, 96, 12])
torch.Size([1, 12, 96])
+---------------------------+------------+
|            Name           | Parameters |
+---------------------------+------------+
|  MultAtten.in_proj_weight |   27648    |
|   MultAtten.in_proj_bias  |    288     |
| MultAtten.out_proj.weight |    9216    |
|  MultAtten.out_proj.bias  |     96     |
|     layer_norm1.weight    |    1152    |
|      layer_norm1.bias     |    1152    |
|      conv1d_1.weight      |   27648    |
|       conv1d_1.bias       |     96     |
|      conv1d_2.weight      |    3456    |
|       conv1d_2.bias       |     12     |
|     layer_norm2.weight    |    1152    |
|      layer_norm2.bias     |    1152    |
+---------------------------+------------+
Total Trainable Params: 73068
73068


In [229]:
cov = model(inputs)

fc1 = nn.Linear(input_shape[1], 5)
cov = fc1(cov)

conv_reshape = cov.view(-1, 5, input_shape[0])
conv_reshape.shape

torch.Size([1, 5, 96])

In [245]:
inp_lap = torch.rand([5, 5])
inp_seq = torch.rand([2, 5, 72])

edge_index = dense_to_sparse(inp_lap)[0]
# print(edge_index)
edge_index, _ = add_self_loops(edge_index, num_nodes=inp_lap.shape[0])
# print(edge_index)

gcn1 = GCNConv(72, 32)
gcn2 = GCNConv(32, 16)
act1 = nn.ReLU()
act2 = nn.ReLU()
x = act1(gcn1(inp_seq, edge_index)) #(5, 32)
print(x.shape)
x = act2(gcn2(x, edge_index))  #(5, 16)
print(x.shape)

torch.Size([2, 5, 32])
torch.Size([2, 5, 16])


In [231]:
count_parameters(gcn1)

2336

In [232]:
lstm = nn.LSTM(72, lstm_units)
xx = lstm(inp_seq)[0] #(5, 32)

In [233]:
x = torch.concat([conv_reshape, x, xx], dim=2)

In [234]:
x.shape

torch.Size([1, 5, 144])

In [235]:
class LuongAttention(nn.Module):
    def __init__(self):
        super(LuongAttention, self).__init__()

    def forward(self, query, value, key):
        dim = query.size(-1)
        scores = torch.bmm(query, key.transpose(-2, -1)) / math.sqrt(dim)
        attention_weights = torch.softmax(scores, dim=-1)
        output = torch.bmm(attention_weights, value)
        return output, attention_weights

attention = LuongAttention()
attention(x, x, x)[0].shape

torch.Size([1, 5, 144])

In [222]:
flatten = nn.Flatten()
x = flatten(x)

In [223]:
fc = nn.LazyLinear(96)
fc(x).shape



torch.Size([1, 96])

In [9]:
class LuongAttention(nn.Module):
    def __init__(self):
        super(LuongAttention, self).__init__()

    def forward(self, query, value, key):
        dim = query.size(-1)
        scores = torch.bmm(query, key.transpose(-2, -1)) / math.sqrt(dim)
        attention_weights = torch.softmax(scores, dim=-1)
        output = torch.bmm(attention_weights, value)
        return output, attention_weights

In [10]:
class Graph_Water_Transformer_Cov(nn.Module):
    def __init__(self, num_transformer_blocks, gcn_unit1, gcn_unit2):
        super(Graph_Water_Transformer_Cov, self).__init__()

        self.num_transformer_blocks = num_transformer_blocks
        self.gcn_unit1 = gcn_unit1
        self.gcn_unit2 = gcn_unit2
        

        self.transformer_encoder = TransformerEncoder(input_shape, num_heads, dropout, epsilon)
        self.fc1 = nn.Linear(input_shape[1], 5)

        self.gcn1 = GCNConv(72, self.gcn_unit1)
        self.gcn2 = GCNConv(self.gcn_unit1, self.gcn_unit2)
        self.gcn_act1 = nn.ReLU()
        self.gcn_act2 = nn.ReLU()



        self.lstm = nn.LSTM(72, lstm_units)
        self.attention = LuongAttention()
        self.final_fc = nn.LazyLinear(96)



    def forward(self, cov_inputs, inp_seq, inp_lap):
        # ======================== covariates with transformer ========================
        cov = cov_inputs
        for _ in range(self.num_transformer_blocks):
            cov = self.transformer_encoder(cov)
        cov = self.fc1(cov)
        conv_reshape = cov.view(-1, 5, input_shape[0])


        # ======================== water levels with GNN ========================
        # GCN
        edge_index = dense_to_sparse(inp_lap)[0]
        edge_index, _ = add_self_loops(edge_index, num_nodes=inp_lap.shape[0])
        x = self.gcn_act1(self.gcn1(inp_seq, edge_index)) #(5, 32)
        x = self.gcn_act2(self.gcn2(x, edge_index))  #(5, 16)

        # LSTM
        xx = self.lstm(inp_seq)[0] #(5, 32)

        # ======================== CONCAT and Attention ========================
        x = torch.concat([conv_reshape, x, xx], dim=2)
        x = self.attention(x, x, x)[0]

        x = self.final_fc(torch.flatten(x))
        return x

inputs = torch.rand([1, 96, 12])
inp_lap = torch.rand([5, 5])
inp_seq = torch.rand([1, 5, 72])

model = Graph_Water_Transformer_Cov(1, gcn_unit1, gcn_unit2)
model(inputs, inp_seq, inp_lap).shape



torch.Size([96])