In [None]:
import torch
import os
import math
import numpy as np
import matplotlib.pyplot as plt
import torch_geometric
import random 
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import torch_geometric.transforms as T

### Loading in features / preparing Data objects

In [None]:
oil_gas_regression_data = torch.load('pyg_objects/oil+gas/regression_data')
oil_gas_regression_data.edge_index = torch.load('pyg_objects/oil+gas/co2+ch4-1000km')

In [None]:
import networkx as nx
from torch_geometric.utils import to_networkx

node_coords = torch.load('pyg_objects/oil+gas/co2+ch4-location-mapping')

# Convert PyG Data object to a NetworkX graph
graph = to_networkx(oil_gas_regression_data, to_undirected=True)

# Draw the graph
pos = nx.spiral_layout(graph)  # You can choose a different layout if needed
nx.draw(graph, pos=node_coords, with_labels=True,  node_color='skyblue', node_size=0.5, font_size=0)

# Display the plot
plt.show()

### Defining and initializing GCN

In [None]:
class GCNRegression(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        torch.manual_seed(637)
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels*2)
        self.fc = torch.nn.Linear(hidden_channels*2, out_channels)  # Output layer for regression task
        self.double()

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.fc(x)
        return x.squeeze(1)  # Remove the singleton dimension for regression

# Instantiate the model
gcn_regression_model = GCNRegression(in_channels=oil_gas_regression_data.num_features,
                                     hidden_channels=16,
                                     out_channels=2)
gcn_regression_optimizer = torch.optim.Adam(gcn_regression_model.parameters(), lr=0.01, weight_decay=5e-4)
gcn_regression_criterion = torch.nn.MSELoss()  # Mean Squared Error Loss for regression

gcn_regression_model

In [None]:
def gcn_train(model, data, optimizer, criterion):
    model.train()
    optimizer.zero_grad()  # Clear gradients.
    out = model(data.x, data.edge_index)  # Perform a single forward pass.
    
    # Assuming data.y contains the target values for regression
    loss = criterion(out[data.train_mask], data.y[data.train_mask])

    loss.backward()  # Derive gradients.
    optimizer.step()  # Update parameters based on gradients.
    return loss.item()  # Return the loss value as a scalar

def gcn_test(model, data):
    model.eval()
    out = model(data.x, data.edge_index)
    
    # Assuming you are directly using regression output without argmax
    predictions = out[data.test_mask]
    #print(predictions[0], data.y[data.test_mask][0])
    # Calculate regression metrics (e.g., MSE, MAE, etc.) based on your task
    mse_loss = F.mse_loss(predictions, data.y[data.test_mask])
    
    return mse_loss.item(), predictions


In [None]:
def dist_sweep(model, edges_path, data, optimizer, criterion, distances):
    preds = []
    all_losses = []
    
    for distance in distances:
        print('----', distance, '----')
        for param in model.parameters():
            if param.requires_grad:
                if len(param.shape) > 1:
                    torch.nn.init.xavier_uniform_(param.data)
                else:
                    torch.nn.init.zeros_(param.data)
        
        distance_losses = []
        for _ in range(0, 20):
            print(_)
            optimizer.zero_grad()
            filepath = edges_path + str(distance) + "km"
            edges = torch.load(filepath)
            data.edge_index = edges
            for epoch in range(1, 101):
                    loss = gcn_train(model=model,
                                data=data,
                                optimizer=optimizer,
                                criterion=criterion)
                    #print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

            loss, pred = gcn_test(model=model,
                                data=data)
            preds.append(pred)
            distance_losses.append(loss)
        all_losses.append(distance_losses)
    return preds, all_losses


In [None]:
# Instantiate the model
gcn_regression_model = GCNRegression(in_channels=oil_gas_regression_data.num_features,
                                     hidden_channels=16,
                                     out_channels=2)
gcn_regression_optimizer = torch.optim.NAdam(gcn_regression_model.parameters(), lr=0.01, weight_decay=5e-4)
gcn_regression_criterion = torch.nn.MSELoss()  # Mean Squared Error Loss for regression

oil_gas_regression_data = oil_gas_regression_data

distances =[1,2,3,4,5,25,50,100,200, 500,1000,2000,5000,10000]
preds, losses = dist_sweep(model=gcn_regression_model, 
                           edges_path='pyg_objects/oil+gas/co2+ch4-',
                           data=oil_gas_regression_data, 
                           optimizer=gcn_regression_optimizer, 
                           criterion=gcn_regression_criterion, 
                           distances=distances)

### Comparison with MLP

In [None]:
from torch.nn import Linear

class MLP(torch.nn.Module):
    def __init__(self, input_dim, hidden_channels, output_dim):
        super().__init__()
        torch.manual_seed(12345)
        self.lin1 = Linear(input_dim, hidden_channels)
        self.lin2 = Linear(hidden_channels, output_dim)
        self.double()
        
    def forward(self, x):
        x = self.lin1(x)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return x

model = MLP(input_dim=oil_gas_regression_data.num_features,
            hidden_channels=16,
            output_dim=2)
print(model)

In [None]:
def mlp_train(model, data, optimizer, criterion):
    model.train()
    optimizer.zero_grad()  # Clear gradients.
    out = model(data.x)  # Perform a single forward pass.

    # Assuming data.y contains the target values for regression
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    
    loss.backward()  # Derive gradients.
    optimizer.step()  # Update parameters based on gradients.
    return loss.item()

def mlp_test(model, data, criterion):
    model.eval()
    out = model(data.x)
    
    # Assuming data.y contains the target values for regression
    predictions = out[data.test_mask]
    
    # Calculate regression metrics (e.g., MSE, MAE, etc.) based on your task
    mse_loss = criterion(predictions, data.y[data.test_mask])
    
    return mse_loss.item()

In [None]:
criterion = torch.nn.MSELoss()  # Change the loss function to MSE for regression
optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=5e-4)  # Define optimizer.

# Training loop
for epoch in range(1, 500):
    loss = mlp_train(model=model, data=oil_gas_regression_data, optimizer=optimizer, criterion=criterion)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

# Testing after training
test_loss = mlp_test(model=model, data=oil_gas_regression_data, criterion=criterion)
print(f'Test Loss: {test_loss:.4f}')

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np

cmap = plt.cm.plasma
norm = mcolors.Normalize(vmin=0, vmax=len(losses)-1)

for l, label, i in zip(losses, distances, range(0, len(distances))):
    plt.plot(l, label=str(label)+'km', color=cmap(norm(i)))

plt.axhline(y=31098332930239.0273, color='gray', linestyle='--', label='MLP loss')

plt.xlabel('# of training passes')
plt.ylabel('MSE Loss')
plt.title("Oil & Gas Regression")
plt.legend(fontsize='small')
plt.show()

In [None]:
# Now we will test the coal data

In [None]:
coal_regression_data = torch.load('pyg_objects/coal/coal_regression_data')
coal_regression_data

In [None]:
# Instantiate the model
gcn_regression_model = GCNRegression(in_channels=coal_regression_data.num_features,
                                     hidden_channels=16,
                                     out_channels=1)
gcn_regression_optimizer = torch.optim.NAdam(gcn_regression_model.parameters(), lr=0.01, weight_decay=5e-4)
gcn_regression_criterion = torch.nn.MSELoss()  # Mean Squared Error Loss for regression


distances =[1,2,3,4,5,25,50,100,200, 500,1000,2000]
preds, losses = dist_sweep(model=gcn_regression_model, 
                           edges_path='pyg_objects/coal/coal-ch4-edges-',
                           data=coal_regression_data, 
                           optimizer=gcn_regression_optimizer, 
                           criterion=gcn_regression_criterion, 
                           distances=distances)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np

cmap = plt.cm.plasma
norm = mcolors.Normalize(vmin=0, vmax=len(losses)-1)

for l, label, i in zip(losses, distances, range(0, len(distances))):
    plt.plot(l, label=str(label)+'km', color=cmap(norm(i)))

plt.axhline(y=490899825.2230, color='gray', linestyle='--', label='MLP loss')

plt.xlabel('# of training passes')
plt.ylabel('MSE Loss')
plt.title("Coal Regression")
plt.legend(fontsize='small')
plt.show()

In [None]:
model = MLP(input_dim=coal_regression_data.num_features,
            hidden_channels=16,
            output_dim=1)
criterion = torch.nn.MSELoss()  # Change the loss function to MSE for regression
optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=5e-4)  # Define optimizer.

# Training loop
for epoch in range(1, 500):
    loss = mlp_train(model=model, data=coal_regression_data, optimizer=optimizer, criterion=criterion)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

# Testing after training
test_loss = mlp_test(model=model, data=coal_regression_data, criterion=criterion)
print(f'Test Loss: {test_loss:.4f}')
