# GNN 


Code based on 

https://colab.research.google.com/drive/1h3-vJGRVloF5zStxL5I0rSy4ZUPNsjy8?usp=sharing#scrollTo=zF5bw3m9UrMy

In [31]:
import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
import pandas as pd
import random
from math import floor
import networkx as nx
import matplotlib.pyplot as plt
from torch_geometric.utils import to_networkx
import time



In [25]:
# Import data
X = pd.read_csv('./Data/df_X_county.csv')
X['constant'] = 1
y = pd.read_csv('./Data/df_y_county.csv')

X = X.to_numpy()
y = y.to_numpy()

X = torch.tensor(X)
y = torch.tensor(y)

In [26]:
# make masks
n = X.shape[0]
randomassort = list(range(n))
random.shuffle(randomassort)
max_train = floor(len(randomassort) * .1)
train_mask_idx = torch.tensor(randomassort[:max_train])
test_mask_idx = torch.tensor(randomassort[max_train:])
train_mask = torch.zeros(n); test_mask = torch.zeros(n)
train_mask.scatter_(0, train_mask_idx, 1)
test_mask.scatter_(0, test_mask_idx, 1)
train_mask = train_mask.type(torch.bool)
test_mask = test_mask.type(torch.bool)

In [28]:
data = Data(x = X, y = y)

data.test_mask = test_mask
data.train_mask = train_mask
print(data)
print('==============================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')

Data(x=[17006, 13], y=[17006, 1], test_mask=[17006], train_mask=[17006])
Number of nodes: 17006
Number of edges: 0
Average node degree: 0.00
Number of training nodes: 1700
Training node label rate: 0.10


AttributeError: 'GlobalStorage' object has no attribute 'edge_index'

In [32]:
def visualize_graph(G, color):
    plt.figure(figsize=(7,7))
    plt.xticks([])
    plt.yticks([])
    nx.draw_networkx(G, pos=nx.spring_layout(G, seed=42), with_labels=False,
                     node_color=color, cmap="Set2")
    plt.show()
    
    
def visualize_embedding(h, color, epoch=None, loss=None):
    plt.figure(figsize=(7,7))
    plt.xticks([])
    plt.yticks([])
    h = h.detach().cpu().numpy()
    plt.scatter(h[:, 0], h[:, 1], s=140, c=color, cmap="Set2")
    if epoch is not None and loss is not None:
        plt.xlabel(f'Epoch: {epoch}, Loss: {loss.item():.4f}', fontsize=16)
    plt.show()


G = to_networkx(data, to_undirected=True)
visualize_graph(G, color=data.y)

AttributeError: 'NoneType' object has no attribute 't'

In [15]:

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(1234)
        self.conv1 = GCNConv(data.num_features, 4)
        self.conv2 = GCNConv(4, 4)
        self.conv3 = GCNConv(4, 2)
        self.pred = Linear(2, 1)

    def forward(self, x, edge_index):
        h = self.conv1(x, edge_index)
        h = h.tanh()
        h = self.conv2(h, edge_index)
        h = h.tanh()
        h = self.conv3(h, edge_index)
        h = h.tanh()  # Final GNN embedding space.
        
        # Apply a final (linear) classifier.
        out = self.pred(h)

        return out, h

model = GCN()
print(model)

GCN(
  (conv1): GCNConv(13, 4)
  (conv2): GCNConv(4, 4)
  (conv3): GCNConv(4, 2)
  (pred): Linear(in_features=2, out_features=1, bias=True)
)


In [4]:
from IPython.display import Javascript  # Restrict height of output cell.
#display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 430})'''))

model = GCN()
criterion = torch.nn.MSECriterion()  # Define loss criterion.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  # Define optimizer.

def train(data):
    optimizer.zero_grad()  # Clear gradients.
    out, h = model(data.x, data.edge_index)  # Perform a single forward pass.
    loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
    loss.backward()  # Derive gradients.
    optimizer.step()  # Update parameters based on gradients.
    return loss, h

for epoch in range(401):
    loss, h = train(data)
    if epoch % 10 == 0:
        visualize_embedding(h, color=data.y, epoch=epoch, loss=loss)
        time.sleep(0.3)

NameError: name 'dataset' is not defined