In [63]:
import torch
import numpy as np
# import matplotlib.pyplot as plt

# from datasets import get_dataset
#from basics import train, get_data, accuracy
from extensions import ExtendedData
# from torch_geometric.datasets import StochasticBlockModelDataset
# from synthetic_data import SBM
from nett import sample_dcsbm
# import torch_geometric.utils 

from torch_geometric.data import Data
from sklearn.datasets import make_blobs
# from optimize import evaluate_model, evaluate_params

# import platform
# platform.system()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

plot_net = False

n = 300 # change to 600, 1200
# Create a symmetric tensor with p on the diagonal and q off diagonal

p = 0.07
q = 0.01
K = 3

# Create a tensor filled with q
B = torch.full((K, K), q)
# Fill the diagonal with p
for i in range(K):
    B[i, i] = p

B = B * 0.2
# data = StochasticBlockModelDataset('data/SBM', [n//3, n//3, n//3], B, num_channels=3, class_sep=.1)[0]
# data = SBM('data/SBM', [n//3, n//3, n//3], B, num_channels=3, class_sep=2)[0]

X, y, centers = make_blobs(
    n_samples=n,
    n_features=K,
    centers=K,
    cluster_std=10,
    random_state=42,
    return_centers=True  # Set this to True to return centers
)

data = Data(edge_index = sample_dcsbm(y, B), x = torch.tensor(X).float(), y = torch.tensor(y).long())

# data = get_data('squirrel')

data = ExtendedData.from_dict(data.to_dict())
data.create_masks('balanced')
# data.to('cpu')

# Print mean degree
print(torch.mean(data.get_degrees()))
data.plot_features_3d(use_svd=True)


tensor(1.6733, dtype=torch.float64)


In [64]:
if plot_net:
    # !pip install git+https://github.com/aaamini/graph_visualizer.git
    from graph_visualizer import visualize_graph

    # Visualize the graph
    visualize_graph(data)


In [65]:
# performance of the Bayes classifier
from sklearn.metrics import accuracy_score

# Function to find the closest center for each point in X
def predict_labels(X, centers):
    distances = torch.cdist(X, torch.tensor(centers).float())
    closest_center = torch.argmin(distances, dim=1)
    return closest_center

# Calculate accuracy for each test split and report the average
def evaluate_bayes_classifier(data, centers):
    test_masks = data.masks['test']
    accuracies = []

    for i in range(test_masks.shape[1]):
        test_mask = test_masks[:, i]
        X_test = data.x[test_mask]
        y_test = data.y[test_mask]
        y_pred = predict_labels(X_test, centers)
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)
        # print(f'Test Accuracy for split {i+1}: {accuracy:.4f}')
    return  np.mean(accuracies)

In [66]:
from tqdm import tqdm
import numpy as np
import torch

# from np_gnn.models import NPGNN
from model_interfaces import ACM_GCNP, NPGNN_AB
from basics import train_model_class

torch.manual_seed(42)

models = [{'model_class': NPGNN_AB, 'hyper_params': {'spec_train': True, 'kern_fn': None, 'norm': False, 'Shift': False, 'pct': 1}},
          {'model_class': ACM_GCNP, 'hyper_params': {'dropout': 0.5}}]

lr = 1e-2
wd = 5e-4
# nreps = data.masks['train'].shape[1]
nreps = 3
n_iter = 1500

val_results = {model['model_class'].__name__: [] for model in models}
tst_results = {model['model_class'].__name__: [] for model in models}

for rep in tqdm(range(nreps), desc='Training Repetitions'):
    tqdm.write(f"Rep {rep + 1}/{nreps}")
    for model in models:
        model_class = model['model_class']
        hyper_params = model['hyper_params']
        best_acc, test_acc, iterations = train_model_class(
            model_class, hyper_params, data, rep, n_iter, lr, wd
        )

        val_results[model_class.__name__].append(best_acc)
        tst_results[model_class.__name__].append(test_acc)
        # print model result indented under repetition info
        tqdm.write(f"    {model_class.__name__} - val = {best_acc:.3f}, tst = {test_acc:.3f}, # iter = {iterations + 1:4d}")

# Calculate and print mean and standard deviation for each model
for model in models:
    model_name = model['model_class'].__name__
    mean_val = np.mean(val_results[model_name])
    std_val = np.std(val_results[model_name])
    mean_tst = np.mean(tst_results[model_name])
    std_tst = np.std(tst_results[model_name])
    tqdm.write(f"{model_name} - mean val = {mean_val:.3f} ± {std_val:.3f}, mean tst = {mean_tst:.3f} ± {std_tst:.3f}")

# Evaluate the Bayes classifier
print(f'\n---\nAverage Test Accuracy (Optimal): {evaluate_bayes_classifier(data, centers):.4f}')



'has_cuda' is deprecated, please use 'torch.backends.cuda.is_built()'

Training Repetitions:   0%|          | 0/3 [00:00<?, ?it/s]

Rep 1/3
    NPGNN_AB - val = 0.383, tst = 0.433, # iter =  213


Training Repetitions:  33%|███▎      | 1/3 [00:00<00:01,  1.13it/s]

    ACM_GCNP - val = 0.767, tst = 0.833, # iter =  257
Rep 2/3


Training Repetitions:  33%|███▎      | 1/3 [00:01<00:01,  1.13it/s]

    NPGNN_AB - val = 0.333, tst = 0.417, # iter = 1149


Training Repetitions:  67%|██████▋   | 2/3 [00:03<00:01,  1.68s/it]

    ACM_GCNP - val = 0.733, tst = 0.717, # iter =  540
Rep 3/3


Training Repetitions:  67%|██████▋   | 2/3 [00:03<00:01,  1.68s/it]

    NPGNN_AB - val = 0.533, tst = 0.433, # iter = 1490


Training Repetitions: 100%|██████████| 3/3 [00:04<00:00,  1.60s/it]

    ACM_GCNP - val = 0.817, tst = 0.767, # iter =  284
NPGNN_AB - mean val = 0.417 ± 0.085, mean tst = 0.428 ± 0.008
ACM_GCNP - mean val = 0.772 ± 0.034, mean tst = 0.772 ± 0.048

---
Average Test Accuracy (Optimal): 0.6433





In [70]:
# from jacobi_conv.PolyConv import PolyConvFrame, JacobiConv
# model = PolyConvFrame(JacobiConv)
# all_ones = torch.ones(data.edge_index.shape[1], dtype=torch.float).to(device)
# model(data.x, data.edge_index, all_ones).shape

# acm_gcnp_model_inputs = ACM_GCNP.get_model_inputs(data)
# acm_gcnp_model = ACM_GCNP({'dropout':.5}).to(device)