In [1]:
import sys
import os
import networkx as nx
import torch
import torch_geometric
from torch_geometric import nn
from torch_geometric.utils.convert import from_networkx
from torch_geometric.data import Dataset, Data, DenseDataLoader, DataLoader
import pandas as pd
import numpy as np
from tqdm import *
import pickle

#load local packages
sys.path.append(os.path.join(os.getcwd(), '../..'))
from ConuForecast.src.graph_utils import GraphEngine, DBconnector
from ConuForecast.src.graph_dataset_builder import ConuGraphDataset


In [2]:
CONN = DBconnector('172.17.0.1', 5555, 'base-ina', 'postgres', 'postgres')

EVENT = 'sjj-47'
MODEL = f'model_{EVENT}'
PRECIP = f'precipitation_{EVENT}'
ATTRS = {
    'nodes': ['elevation', 'area', 'imperv', 'slope', 'rainfall', 'depth_above_invert'],
    'edges': ['flow_rate',]
    }
ET = '2001-01-01 00:40:00'


conu_basin_graphs = GraphEngine(model=MODEL, event=EVENT, precip=PRECIP, conn=CONN)

In [3]:
events = ['sjj-47', 'sjj-160']
df = []

for event in events:
    model = f'model_{event}'
    precip = f'precipitation_{event}'

    conu_basin_graphs = GraphEngine(model=model, event=event, precip=precip, conn=CONN)

    try:
        os.mkdir(f'../../data/{event}')
        os.mkdir(f'../../data/{event}/raw/')
        os.mkdir(f'../../data/{event}/processed')
    except:
        pass

    conu_dataset = ConuGraphDataset(
        f'../../data/{event}/',
        time_step=2, 
        graph_manager=conu_basin_graphs, 
        attrs_dict= ATTRS, 
        clean=False
        )
    df.append(conu_dataset)

df

[ConuGraphDataset(144), ConuGraphDataset(144)]

In [49]:
from torch.utils.data import ChainDataset, ConcatDataset

batch_df = DataLoader(ConcatDataset(df), batch_size=32, shuffle=True, num_workers=8)

[Batch(batch=[135941], edge_attrs=[162100, 1], edge_index=[2, 162100], x=[135941, 4], y=[135941])]

In [50]:
train_dataset, test_dataset = conu_dataset[:25], conu_dataset[25:]

In [51]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False,  num_workers=8)

In [24]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels, dataset):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(conu_dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        edge_index = edge_index.long()
        x = x.float()
        
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)


        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GCN(hidden_channels=64, dataset=conu_dataset)
print(model)

GCN(
  (conv1): GCNConv(5, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=1, bias=True)
)


In [25]:
model = GCN(hidden_channels=64, dataset=conu_dataset)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    
    n = 0
    
    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.
        
        n += 1
        # print(f'Batch: {n} of {len(train_loader)}')
        
        

def test(loader):
    model.eval()

    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch)  
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.


for epoch in tqdm(range(1, 10)):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

 11%|█         | 1/9 [00:02<00:21,  2.73s/it]

Epoch: 001, Train Acc: 1.0000, Test Acc: 1.0000


 22%|██▏       | 2/9 [00:05<00:19,  2.74s/it]

Epoch: 002, Train Acc: 1.0000, Test Acc: 1.0000


 33%|███▎      | 3/9 [00:08<00:16,  2.76s/it]

Epoch: 003, Train Acc: 1.0000, Test Acc: 1.0000


 44%|████▍     | 4/9 [00:11<00:13,  2.75s/it]

Epoch: 004, Train Acc: 1.0000, Test Acc: 1.0000


 56%|█████▌    | 5/9 [00:13<00:11,  2.79s/it]

Epoch: 005, Train Acc: 1.0000, Test Acc: 1.0000


 67%|██████▋   | 6/9 [00:16<00:08,  2.78s/it]

Epoch: 006, Train Acc: 1.0000, Test Acc: 1.0000


 78%|███████▊  | 7/9 [00:19<00:05,  2.78s/it]

Epoch: 007, Train Acc: 1.0000, Test Acc: 1.0000


 89%|████████▉ | 8/9 [00:22<00:02,  2.78s/it]

Epoch: 008, Train Acc: 1.0000, Test Acc: 1.0000


100%|██████████| 9/9 [00:25<00:00,  2.78s/it]

Epoch: 009, Train Acc: 1.0000, Test Acc: 1.0000



