In [1]:
import os

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.functional as nn

from tqdm import tqdm
from torch_geometric.data import Data

##Adding src path to get our code
import sys
sys.path.append('../src')

from utils.graph_utils import interpolate_element_into_nodes, bin_to_torchGraph
from models.GCNN_node import GCNN_node
from datasets.Ice_graph_dataset import Ice_graph_dataset

## Test notebook to try ideas and explain them 

Some of the implemented functions are loaded and demostrated in the following lines. 
Later The code is presented for testing. 

Note the main code is in /src

In [2]:
file_graphs = [dict(np.load(f'../data/{file}')) for file in sorted(os.listdir('../data')) if file[-3:]=='npz']

In [3]:

#Interpolate element information to nodes. #To be changed!
features_interp = list(zip(
    ['Ci_node','Thickness_node','Damage_node','S0_node','S1_node'],
    ['Concentration','Thickness','Damage','Sigma_0','Sigma_1']
)) 
file_graphs = interpolate_element_into_nodes(file_graphs,features_interp)

#Create torch graphs based on desired features and a given target index 
feature_list = ['M_VT_x', 'M_VT_y', 'M_wind_x', 'M_wind_y', 'M_ocean_x', 'M_ocean_y', 'x', 'y', 'Ci_node', 'Thickness_node', 'Damage_node' ]
target_index=3030
graph_list = bin_to_torchGraph(file_graphs,feature_list,target_index)

#Create a torch dataset to store/retrieve/transform the data
ice_dataset = Ice_graph_dataset(graph_list)
#get an instance to try our toy model
example_graph = next(iter(ice_dataset))

#Load a GNN and test the graphs on it
num_features = example_graph.x.shape[-1]  # Node feature dimension
hidden_channels = 6
num_classes = 2  #x and y

model = GCNN_node(num_features, hidden_channels, num_classes)

# test forward pass
output = model(example_graph.x, example_graph.edge_index,example_graph.edge_attr)

# Print the output (lat, lon tensor)
print(output)

Interpolating features from element to nodes...: 100%|██████████| 25/25 [00:27<00:00,  1.11s/it]
Converting bins to torch graphs...: 0it [00:00, ?it/s]

tensor([0.0514, 0.2110], grad_fn=<ViewBackward0>)





### Raw Code

In [4]:
for i,item in file_graphs[12].items():
    print(i,item.shape)

Damage (137736,)
Concentration (137736,)
Thickness (137736,)
Sigma_0 (137736,)
Sigma_1 (137736,)
M_VT_x (70886,)
M_VT_y (70886,)
M_wind_x (70886,)
M_wind_y (70886,)
M_ocean_x (70886,)
M_ocean_y (70886,)
x (70886,)
y (70886,)
t (137736, 3)
i (70886,)
Ci_node (70886,)
Thickness_node (70886,)
Damage_node (70886,)
S0_node (70886,)
S1_node (70886,)
sum_elements (70886,)


In [5]:
for file in tqdm(file_graphs,"Interpolating Ci from element to nodes..."):

    file['Ci_node'] = np.zeros(file['x'].shape)
    file['Thickness_node'] = np.zeros(file['x'].shape)
    file['Damage_node'] = np.zeros(file['x'].shape)
    file['S0_node'] = np.zeros(file['x'].shape)
    file['S1_node'] = np.zeros(file['x'].shape)

    file['sum_elements'] = np.zeros(file['x'].shape)


    for i,element in enumerate(file['t']):
        file['Ci_node'][element] += file['Concentration'][i]
        file['Thickness_node'][element] += file['Thickness'][i]
        file['Damage_node'][element] += file['Damage'][i]
        file['S0_node'][element] += file['Sigma_0'][i]
        file['S1_node'][element] += file['Sigma_1'][i]
        
        file['sum_elements'][element] += 1


    file['Ci_node'] = file['Ci_node']/file['sum_elements']
    file['Thickness_node'] = file['Thickness_node']/file['sum_elements']
    file['Damage_node'] = file['Damage_node']/file['sum_elements']
    file['S0_node'] = file['S0_node']/file['sum_elements']
    file['S1_node'] = file['S1_node']/file['sum_elements']

Interpolating Ci from element to nodes...: 100%|██████████| 25/25 [00:31<00:00,  1.25s/it]


In [6]:
# Do we preserve the same distribution?
one_file = file_graphs[12]

node_vars = ['Ci_node','Thickness_node','Damage_node','S0_node','S1_node']
element_vars = ['Concentration','Thickness','Damage','Sigma_0','Sigma_1']

print("Node vs Element distributions\n")
for n,e in zip(node_vars,element_vars):
    print(f"{e} distributions")
    print("\tStd: ", one_file[n].std(),"-",one_file[e].std())
    print("\tMean: ", one_file[n].mean(),"-",one_file[e].mean())

Node vs Element distributions

Concentration distributions
	Std:  0.2774466430714578 - 0.2770468
	Mean:  0.8938239535221482 - 0.8954742
Thickness distributions
	Std:  0.6762332596798135 - 0.67523193
	Mean:  1.2166780835472275 - 1.2214028
Damage distributions
	Std:  0.10938263753400297 - 0.13377665
	Mean:  0.03525301232316462 - 0.03522985
Sigma_0 distributions
	Std:  2678.028312898291 - 3091.5708
	Mean:  -1527.0704480992035 - -1541.5564
Sigma_1 distributions
	Std:  2334.660421478878 - 2658.9407
	Mean:  1743.758830423752 - 1768.2036


In [7]:
#Compute per hour:
#node features [num_nodes, num_node_features]
#edges [2, num_edges]
#edges atributtes [num_edges, num_edge_features]
graph_list=[]
target_idx = 15000
feature_list = ['M_VT_x', 'M_VT_y', 'M_wind_x', 'M_wind_y', 'M_ocean_x', 'M_ocean_y', 'x', 'y', 'Ci_node', 'Thickness_node', 'Damage_node' ]

for i,hour_graph in tqdm(enumerate(file_graphs[:-1]),"Converting bins to graphs..."):
    #get the next pos of target node
    target_coords = torch.tensor([file_graphs[i+1]['x'][target_idx],file_graphs[i+1]['y'][target_idx]])

    #concat all node features sequentially (following index(t) number) in a tensor
    features = []
   
    idx_counter=0
    idx_list = []
    for key,item in hour_graph.items():

        if key in feature_list:
            idx_list.append(key)
            features.append(torch.tensor(np.array([item])))
        

    node_features = torch.cat(features).t().to(torch.float32)

    #find all distinct (undirected) edges from every triangle
    edges = []
    for triangle in hour_graph['t']:
        edges += [ tuple(triangle[:2]), tuple(triangle[1:]), tuple(triangle[[0,-1]]) ] #tuples since we set() it later

    # Get a unique set and convert it to a torch tensor
    edges = torch.tensor(list(set(edges))).t()

    #Now we need to consult x,y coordinates of each node of the edges and compute the edge distance
    # -3,-4 index correspond to x,y in the feature map, for each each row of edge ends we retrieve this info by index
    # and we stack it as a 2xE (2 for each edge end, E as number of edges)
  
    coord_idx= [i for i,key in enumerate(idx_list) if key in ['x','y']]
    if len(coord_idx)==2:
        edges_coordinates = [
            torch.stack(
                [
                    node_features[edge_row][:,coord_idx[0]],
                    node_features[edge_row][:,coord_idx[1]]
                ]
            )
            for edge_row in edges
        ]
    else:
        raise ValueError("Unable to find coordinates for nodes in graph mesh. \nDid you include it in the feature list?")
    #now we can compute the norm of each edge vector using torch api
    # unsqueeze to match [num_edges, num_edge_features] shape
    edge_attr = torch.norm(edges_coordinates[1] - edges_coordinates[0],dim=0).unsqueeze(dim=-1).to(torch.float32)

    #Now we can create our torch-geometric graph using the "Data" class
    ice_graph = Data(x=node_features, edge_index=edges, edge_attr=edge_attr, y=target_coords)
    
    graph_list.append(ice_graph)

Converting bins to graphs...: 24it [00:18,  1.30it/s]


In [8]:
#get std,mean for graph data. z-score  vs implemented minmax? instance vs whole data normalization? WIP

x_std = np.array(
    [i.x.var(dim=0) for i in graph_list]
).mean(axis=0)**2

edge_attr_std = np.array(
    [i.edge_attr.var(dim=0) for i in graph_list]
).mean(axis=0)**2


x_mean= np.array(
    [i.x.mean(dim=0) for i in graph_list]
).mean(axis=0)

edge_attr_mean = np.array(
    [i.edge_attr.mean(dim=0) for i in graph_list]
).mean(axis=0)



In [9]:
from torch_geometric.data import Dataset
from torch_geometric.transforms import NormalizeFeatures

class Ice_graph_dataset(Dataset):
    def __init__(self,data_list):
        super(Ice_graph_dataset, self).__init__()

        # Define multiple instances of Data objects
        self.data_list = data_list

        self.transform = NormalizeFeatures(attrs=['x','edge_attr','y'])


    def len(self):
        return len(self.data_list)

    def get(self, idx):

        data = self.transform(self.data_list[idx])

        return data

In [10]:
dataset = Ice_graph_dataset(graph_list)

In [11]:
next(iter(dataset)).y.mean(),graph_list[5].y.mean()


(tensor(0.5000, dtype=torch.float64), tensor(770845.0310, dtype=torch.float64))

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(nn.Module):
    def __init__(self, num_features, hidden_channels, output_size):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.fc = nn.Linear(hidden_channels, output_size)  # Output layer with 2 neurons

    def forward(self, x, edge_index, edge_attr):
        x = self.conv1(x, edge_index,edge_attr)
        x = F.relu(x)
        x = self.conv2(x, edge_index,edge_attr)
        x = F.relu(x)

        # Global pooling to aggregate node features (... not sure how elegant)
        x = torch.mean(x, dim=0)

        # Fully connected layer for the final output
        x = self.fc(x)
        
        
        return x

num_features = ice_graph.x.shape[-1]  # Node feature dimension
hidden_channels = 6
num_classes = 2  #latitude and longitude

model = GCN(num_features, hidden_channels, num_classes)


# Forward pass
output = model(ice_graph.x, ice_graph.edge_index,ice_graph.edge_attr)

# Print the output (lat, lon tensor)
print(output)


tensor([ 141197.1875, -459843.0938], grad_fn=<ViewBackward0>)
