In [9]:
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.loader import DataLoader as GeoDataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np
from tqdm import tqdm
from torch_geometric.nn import GCNConv, global_mean_pool, GINConv, BatchNorm, global_add_pool


with open('/Users/nasibhuseynzade/Downloads/zinc_dataset.pkl','rb') as f:
    dataset = pickle.load(f)

In [4]:
def train_test_model(model, dataset, rewired_dataset, num_epochs=4, batch_size=32, learning_rate=0.0005):

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    rewired_train_size = int(0.8 * len(rewired_dataset))
    rewired_test_size = len(rewired_dataset) - rewired_train_size
    rewired_train_dataset, rewired_test_dataset = torch.utils.data.random_split(rewired_dataset, [rewired_train_size, rewired_test_size])

    train_loader = GeoDataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = GeoDataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    rewired_train_loader = GeoDataLoader(rewired_train_dataset, batch_size=batch_size, shuffle=True)
    rewired_test_loader = GeoDataLoader(rewired_test_dataset, batch_size=batch_size, shuffle=False)



    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = torch.nn.MSELoss()

    train_losses = []
    original_r2_scores = []
    rewired_r2_scores = []

    for epoch in tqdm(range(num_epochs)):
 
        model.train()
        total_loss = 0

        for batch in train_loader:
            batch = batch.to(device)
            batch.x = batch.x.float()
            batch.y = batch.y.float().view(-1, 1)
            optimizer.zero_grad()
            out = model(batch)  
            
            loss = F.mse_loss(out, batch.y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * batch.num_graphs

        train_loss = total_loss / len(train_loader)
        train_losses.append(train_loss)

        model.eval()
        y_true = []
        y_pred = []
        test_preds, test_targets = [], []
        with torch.no_grad():
            for batch in test_loader:
                batch = batch.to(device)
                batch.x = batch.x.float()
                batch.y = batch.y.float().view(-1, 1)
            
                out = model(batch)
                test_targets.append(batch.y.cpu().numpy())
                test_preds.append(out.cpu().numpy())
    
        test_targets = np.concatenate(test_targets)
        test_preds = np.concatenate(test_preds)
        test_r2 = r2_score(test_targets, test_preds)
        original_r2_scores.append(test_r2)

        rewired_test_preds, rewired_test_targets = [], []
        with torch.no_grad():
            for batch in rewired_test_loader:
                batch = batch.to(device)
                batch.x = batch.x.float()
                batch.y = batch.y.float().view(-1, 1)
                out = model(batch)
                rewired_test_preds.append(batch.y.cpu().numpy())
                rewired_test_targets.append(out.cpu().numpy())

        rewired_test_preds = np.concatenate(rewired_test_preds)
        rewired_test_targets = np.concatenate(rewired_test_targets)
        rewired_test_r2 = r2_score(rewired_test_targets, rewired_test_preds)
        rewired_r2_scores.append(rewired_test_r2)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Original Test R2: {test_r2:.4f}, Rewired Test R2: {rewired_test_r2:.4f}')

    
    return train_losses, original_r2_scores, rewired_r2_scores



In [3]:
class GINModel(torch.nn.Module):
    def __init__(self, num_features, num_classes=1, hidden_dim=64, depth=3):
        super(GINModel, self).__init__()

        # Define GIN layers
        self.convs = torch.nn.ModuleList()
        self.convs.append(GINConv(
            torch.nn.Sequential(
                torch.nn.Linear(num_features, hidden_dim),
                torch.nn.ReLU(),
                torch.nn.Linear(hidden_dim, hidden_dim),
                torch.nn.ReLU()
            )
        ))

        # Additional GIN layers
        for _ in range(depth - 1):
            self.convs.append(GINConv(
                torch.nn.Sequential(
                    torch.nn.Linear(hidden_dim, hidden_dim),
                    torch.nn.ReLU(),
                    torch.nn.Linear(hidden_dim, hidden_dim),
                    torch.nn.ReLU()
                )
            ))

        # Batch normalization layers
        self.batch_norms = torch.nn.ModuleList([BatchNorm(hidden_dim) for _ in range(depth)])

        # Final regression layer
        self.final_lin = torch.nn.Linear(hidden_dim, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        for conv, batch_norm in zip(self.convs, self.batch_norms):
            x = conv(x, edge_index)
            x = batch_norm(x)
            x = F.relu(x)
        x = global_add_pool(x, data.batch)  # Pool to get a graph-level representation
        x = self.final_lin(x)  # Final regression output
        return x

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model=GINModel(num_features=dataset[0].x.shape[1])
r2_values = train_test_model(model, dataset, num_epochs=4)

In [5]:
import numpy as np
from math import inf

def choose_edge_to_add(x, edge_index, degrees):
    # chooses edge (u, v) to add which minimizes y[u]*y[v]
    n = x.size
    m = edge_index.shape[1]
    y = x / ((degrees + 1) ** 0.5)
    products = np.outer(y, y)
    for i in range(m):
        u = edge_index[0, i]
        v = edge_index[1, i]
        products[u, v] = inf
    for i in range(n):
        products[i, i] = inf
    smallest_product = np.argmin(products)
    return (smallest_product % n, smallest_product // n)

def compute_degrees(edge_index, num_nodes=None):
    # returns array of degrees of all nodes
    if num_nodes is None:
        num_nodes = np.max(edge_index) + 1
    degrees = np.zeros(num_nodes)
    m = edge_index.shape[1]
    for i in range(m):
        degrees[edge_index[0, i]] += 1
    return degrees

def add_edge(edge_index, u, v):
    new_edge = np.array([[u, v],[v, u]])
    return np.concatenate((edge_index, new_edge), axis=1)

def adj_matrix_multiply(edge_index, x):
    # given an edge_index, computes Ax, where A is the corresponding adjacency matrix
    n = x.size
    y = np.zeros(n)
    m = edge_index.shape[1]
    for i in range(m):
        u = edge_index[0, i]
        v = edge_index[1, i]
        y[u] += x[v]
    return y

def compute_spectral_gap(edge_index, x):
	m = edge_index.shape[1]
	n = np.max(edge_index) + 1
	degrees = compute_degrees(edge_index, num_nodes=n)
	y = adj_matrix_multiply(edge_index, x / (degrees ** 0.5)) / (degrees ** 0.5)
	for i in range(n):
		if x[i] > 1e-9:
			return 1 - y[i]/x[i]
	return 0.

def _edge_rewire(edge_index, edge_type, x=None, num_iterations=50, initial_power_iters=50):
	m = edge_index.shape[1]
	n = np.max(edge_index) + 1
	if x is None:
		x = 2 * np.random.random(n) - 1
	degrees = compute_degrees(edge_index, num_nodes=n)
	for i in range(initial_power_iters):
		x = x - x.dot(degrees ** 0.5) * (degrees ** 0.5)/sum(degrees)
		y = x + adj_matrix_multiply(edge_index, x / (degrees ** 0.5)) / (degrees ** 0.5)
		x = y / np.linalg.norm(y)
	for I in range(num_iterations):
		i, j = choose_edge_to_add(x, edge_index, degrees=degrees)
		edge_index = add_edge(edge_index, i, j)
		degrees[i] += 1
		degrees[j] += 1
		edge_type = np.append(edge_type, 1)
		edge_type = np.append(edge_type, 1)
		x = x - x.dot(degrees ** 0.5) * (degrees ** 0.5)/sum(degrees)
		y = x + adj_matrix_multiply(edge_index, x / (degrees ** 0.5)) / (degrees ** 0.5)
		x = y / np.linalg.norm(y)
	return edge_index, edge_type, x

def edge_rewire(edge_index, x=None, edge_type=None, num_iterations=50, initial_power_iters=5):
    m = edge_index.shape[1]
    n = np.max(edge_index) + 1
    if x is None:
        x = 2 * np.random.random(n) - 1
    if edge_type is None:
        edge_type = np.zeros(m, dtype=np.int64)
    return _edge_rewire(edge_index, edge_type=edge_type, x=x, num_iterations=num_iterations, initial_power_iters=initial_power_iters)





import torch
from torch_geometric.transforms import NormalizeFeatures
import matplotlib.pyplot as plt
import pickle


fosr_dataset=dataset.copy()

print("Rewiring started")
for i in range(len(dataset)):
    
    edge_index, edge_type, _ = edge_rewire(dataset[i].edge_index.numpy(), num_iterations=2)
    fosr_dataset[i].edge_index = torch.tensor(edge_index)
    fosr_dataset[i].edge_type = torch.tensor(edge_type)
print("Rewiring ended")

Rewiring started
Rewiring ended


In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model=GINModel(num_features=dataset[0].x.shape[1])

In [10]:
train_losses, original_r2_scores, rewired_r2_scores = train_test_model(model, dataset, fosr_dataset, num_epochs=4, batch_size=32, learning_rate=0.0005)

 25%|██▌       | 1/4 [00:46<02:18, 46.06s/it]

Epoch 1/4, Train Loss: 69.6996, Original Test R2: 0.0726, Rewired Test R2: 0.3589


 50%|█████     | 2/4 [01:22<01:20, 40.16s/it]

Epoch 2/4, Train Loss: 55.5451, Original Test R2: 0.5211, Rewired Test R2: -0.0886


 75%|███████▌  | 3/4 [01:58<00:38, 38.28s/it]

Epoch 3/4, Train Loss: 50.8030, Original Test R2: 0.6545, Rewired Test R2: 0.5219


100%|██████████| 4/4 [02:42<00:00, 40.63s/it]

Epoch 4/4, Train Loss: 48.0309, Original Test R2: 0.4309, Rewired Test R2: 0.5242



