In [6]:
import osmnx as ox
import networkx as nx
from scipy.sparse import csr_matrix
import scipy.sparse as sp
import pandas as pd
import pickle

from collections import namedtuple

import dgl
from dgl.nn import GraphConv
import torch
from torch import nn
import torch.nn.functional as F

### Data Extraction

In [8]:
data = pd.read_pickle("/Users/mario/Downloads/Texas.pkl")

In [58]:
data = data.dropna()
data.head()

Unnamed: 0,Experiment,Town,County,State,Treatment,Search_Place,Graph
832,"(BaylU, 1845)",Waco,McLennan,Texas,1,"Waco, Texas","(236070980, 236070991, 236070999, 236071000, 2..."
833,"(BaylU, 1861)",Independence,Washington,Texas,0,"Independence, Texas","(4200548405, 4200548406, 4200548407, 420054840..."
834,"(DaBapU, 1898)",Dallas,Dallas,Texas,1,"Dallas, Texas","(29604777, 30759132, 81538002, 81538022, 81538..."
835,"(DaBapU, 1898)",San Antonio,Bexar,Texas,0,"San Antonio, Texas","(149452547, 149452554, 149452556, 149452652, 1..."
836,"(DaBapU, 1898)",Houston,Harris,Texas,0,"Houston, Texas","(151365298, 151365299, 151365329, 151365331, 1..."


In [60]:
def get_nodes(graphs):
    
    all_features_list = []
    
    for G in graphs:
        nodes = list(G.nodes)
        degree_tensor = torch.tensor(list(G.degree()))
        in_degree_tensor = torch.tensor(list(G.in_degree()))
        all_features_tensor = torch.cat([degree_tensor.unsqueeze(1), in_degree_tensor.unsqueeze(1)], dim=1)
        all_features_list.append(all_features_tensor)

    combined_feature_tensor = torch.cat(all_features_list, dim=0)
    return combined_feature_tensor

In [61]:
def get_edges(graphs):
    
    all_sparse_tensors = []
    
    for G in graphs:
        adj_coo = nx.to_scipy_sparse_array(G, dtype=float, format='coo')
        
        # Extract the row indices, column indices, and data
        row, col = adj_coo.row, adj_coo.col
        data = adj_coo.data
        
        # Convert to a PyTorch sparse COO tensor
        indices = torch.tensor([row, col], dtype=torch.int64)
        values = torch.tensor(data, dtype=torch.float32)
        sparse_tensor = torch.sparse_coo_tensor(indices, values, adj_coo.shape)
        
        all_sparse_tensors.append(sparse_tensor)
        
    return all_sparse_tensors

In [62]:
node_features = get_nodes(data["Graph"])

In [63]:
edge_features = get_edges(data["Graph"])

### Data Preprocessing

In [88]:
class AttentionPool(nn.Module):
      """ 
      
      Attention-based pooling layer for graph classification.
      Handles graphs of different sizes. 
      
      """

      def __init__(self, units):
        super(AttentionPool, self).__init__()
        self.units = units
        
        self.attention_dense = nn.Linear(units, units)
        
        self.score_dense = nn.Linear(units, 1)

      def forward(self, node_features, adj_matrix):
        # Calculate attention scores
        attention_inputs = self.attention_dense(node_features)
        attention_logits = torch.matmul(attention_inputs, attention_inputs.t())
        # Mask out self-attention
        attention_logits -= torch.eye(attention_logits.size(0)) * 1e9
        # Might change to softmax, to normalze weightos over the logits in the attention pool
        #attention_weights = F.sigmoid(self.score_dense(attention_logits))
        attention_weights = F.softmax(attention_logits, dim=-1)
        
        # Apply attention to node features
        pooled_features = torch.matmul(attention_weights, node_features)

        return pooled_features

In [89]:
def pool_features(node_features, edge_matrices):
    pooled_features_list = []
    for node_features_batch, adj_matrix_batch in zip(node_features, edge_matrices):
        # 
        units = node_features_batch.shape[1] # Adjust units based on the input feature size
        attention_pool = AttentionPool(units=units)
        pooled_features = attention_pool(node_features_batch, adj_matrix_batch)
        pooled_features_list.append(pooled_features)
    return pooled_features_list

In [70]:
# ME CODE
node_features = [n.float() for n in node_features]

In [90]:
test_node = node_features[0:5]
test_edge = edge_features[0:5]
test_pool = pool_features(test_node, test_edge)

### Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score

In [91]:
class GCNConv(nn.Module):
    """Graph Convolutional Layer"""

    def __init__(self, in_features, out_features):
        super(GCNConv, self).__init__()
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, node_features, adj_matrix):
        # Normalize adjacency matrix
        adj_matrix = F.normalize(adj_matrix, dim=1, p=1, eps=1e-9)
        # Transform features with the linear layer
        node_features_transformed = self.linear(node_features)
        # Perform graph convolution
        x = torch.matmul(adj_matrix, node_features_transformed)
        return x

In [92]:
class GraphCNN(nn.Module):
      """Graphical CNN model for network classification."""
    
      def __init__(self, in_features, hidden_features):
        super(GraphCNN, self).__init__()
        self.gcn1 = GCNConv(in_features, hidden_features)
        self.gcn2 = GCNConv(hidden_features, hidden_features)
        self.attention_pool = AttentionPool(hidden_features)
        self.fc1 = nn.Linear(hidden_features, 64)
        self.fc2 = nn.Linear(64, 2)
    
      def forward(self, node_features, adj_matrix):
        x = self.gcn1(node_features, adj_matrix)
        x = F.relu(x)
        x = self.gcn2(x, adj_matrix)
        x = F.relu(x)
        x = self.attention_pool(x, adj_matrix)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.5)
        output = F.sigmoid(self.fc2(x))
        return output
    

In [100]:
# Hyperparameters
learning_rate = 0.001
num_epochs = 50
hidden_features = 128

# Initialize the model
model = GraphCNN(in_features=5, hidden_features=hidden_features)  # Adjust in_features
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Training loop
model.train()
for epoch in range(num_epochs):
    optimizer.zero_grad()
    output = model(node_features_train, adj_matrix_train)
    loss = criterion(output, labels_train)
    loss.backward()
    optimizer.step()
    
    # Print loss periodically
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluation on test data
model.eval()
with torch.no_grad():
    output = model(node_features_test, adj_matrix_test)
    _, predicted = torch.max(output, 1)
    accuracy = accuracy_score(labels_test, predicted)
    print(f'Test Accuracy: {accuracy:.4f}')


Epoch [10/50], Loss: 0.6618
Epoch [20/50], Loss: 0.6513
Epoch [30/50], Loss: 0.6173
Epoch [40/50], Loss: 0.5862
Epoch [50/50], Loss: 0.5743
Test Accuracy: 0.6000
