In [1]:
import yaml
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as TVDatasets

import torch_geometric
from torch_geometric.data import Data as GraphData 

from torch_geometric.nn import GCNConv, GATConv, APPNP, SAGEConv
from torch_geometric.nn.models.label_prop import LabelPropagation
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import to_networkx

from sklearn.metrics import classification_report
# from sklearn.calibration import CalibrationDisplay

import numpy as np
import pandas as pd
import seaborn as sns
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

%matplotlib inline
import networkx as nx
import matplotlib.pyplot as plt

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Torch is running on {device}")


import sys
from gnn_cp.data.data_manager import GraphDataManager
from gnn_cp.models.graph_models import GCN, GAT, APPNPNet, SAGE
from gnn_cp.models.model_manager import GraphModelManager
from gnn_cp.data.utils import make_dataset_instances
import gnn_cp.cp.transformations as cp_t
import gnn_cp.cp.graph_transformations as cp_gt
from gnn_cp.cp.graph_cp import GraphCP

Torch is running on cuda
Torch Graph Models are running on cuda
Torch Graph Models are running on cuda
Torch Graph Models are running on cuda
Torch Graph Models are running on cuda


In [3]:
config_file_dir = "../baselines/config.yaml"
results_dir = "../baselines/results"

In [9]:
# loading the baseline settings
with open(config_file_dir, 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
general_dataset_config = config.get("baseline", {}).get("general_dataset_config", {})


assert os.path.isdir(results_dir), "The results path does not exist!"

models_cache_dir = os.path.join(results_dir, "models")
assert os.path.isdir(models_cache_dir), "Directory to trained models is not found! Maybe first tun the make_baselines.py file"
data_dir = os.path.join(results_dir, "datasets")
assert os.path.isdir(data_dir), "Directory to Data Files is not found!"
splits_dir = os.path.join(results_dir, "splits")
assert os.path.isdir(splits_dir), "Directory to Data Splits is not found!"

dataset_names = list(config.get("baseline", {}).get("datasets", {}).keys())
models_config = config.get("baseline", {}).get("models", {})
model_classes = list(models_config.keys())

# Making a directory to store results for CPs
cp_results_dir = os.path.join(results_dir, "cp_results")
if not os.path.isdir(cp_results_dir):
    os.mkdir(cp_results_dir)


# region
# Making dataset-split and model instances
results_dict = {}
model_acc_df = []
errors = []
dataset_str_list = '\n'.join([f'{i}: {dataset_name}' for i, dataset_name in enumerate(dataset_names)])
for dataset_name_idx in range(len(dataset_names)):
    # dataset_name_idx = int(input(f"specify the dataset index:\n{dataset_str_list}\n"))
    dataset_key = dataset_names[int(dataset_name_idx)]
    results_dict[dataset_key] = {}

    model_str_list = '\n'.join([f'{i}: {model_name}' for i, model_name in enumerate(model_classes)])
    for model_class_idx in range(len(model_classes)):
        # model_class_idx = int(input(f"specify the model index:\n{model_str_list}\n"))
        model_class_name = model_classes[model_class_idx]

        dataset_manager = GraphDataManager(data_dir, splits_dir)
        dataset = dataset_manager.get_dataset_from_key(dataset_key).data

        print(f"dataset = {dataset_key}")
        try:
            instances = make_dataset_instances(data_dir, splits_dir, models_cache_dir, dataset_key, model_class_name, models_config)
        except Exception as e:
            print(f"Error: {e} for {dataset_key} and {model_class_name}")
            errors.append(f"Error: {e} for {dataset_key} and {model_class_name}")
            continue

        instances_accuracy = [instance["accuracy"] for instance in instances]
        print(f"acc={np.mean(instances_accuracy).round(3) * 100} +- {np.std(instances_accuracy).round(3) * 100}")
        best_model_accuracy = np.max(instances_accuracy)
        mean_model_accuracy = np.mean(instances_accuracy)
        sd_model_accuracy = np.std(instances_accuracy)
        model_acc_df.append({"dataset": dataset_key, "model": model_class_name, "accuracy_mean": np.mean(instances_accuracy).round(3) * 100, "accuracy_var": np.std(instances_accuracy).round(3) * 100, "best_accuracy": best_model_accuracy})
        results_dict[dataset_key][model_class_name] = {"mean": np.mean(instances_accuracy).round(3) * 100, "sd": np.std(instances_accuracy).round(3)*100, "best": best_model_accuracy, "mean": round(mean_model_accuracy * 100, 2), "sd": round(sd_model_accuracy, 2)}

    

dataset = cora_ml
Dataset Loaded Successfully!
Following labeled splits:
class 0: train=20, val=20
class 1: train=20, val=20
class 2: train=20, val=20
class 3: train=20, val=20
class 4: train=20, val=20
class 5: train=20, val=20
class 6: train=20, val=20
Loading Models
Loading Models GCN
Accuracy: 0.8231307550644565 +- 0.009074238748131642
acc=82.3 +- 0.8999999999999999
dataset = cora_ml
Dataset Loaded Successfully!
Following labeled splits:
class 0: train=20, val=20
class 1: train=20, val=20
class 2: train=20, val=20
class 3: train=20, val=20
class 4: train=20, val=20
class 5: train=20, val=20
class 6: train=20, val=20
Loading Models
Loading Models GAT
Accuracy: 0.798268876611418 +- 0.03126421006141403
acc=79.80000000000001 +- 3.1
dataset = cora_ml
Dataset Loaded Successfully!
Following labeled splits:
class 0: train=20, val=20
class 1: train=20, val=20
class 2: train=20, val=20
class 3: train=20, val=20
class 4: train=20, val=20
class 5: train=20, val=20
class 6: train=20, val=20
Loa

In [25]:
r = (pd.DataFrame(model_acc_df).set_index(["dataset", "model"]))
r["adaptive_coverage"] = np.round(1 * 0.66 + 0.34 * (r["accuracy_mean"]/100), 3) * 100
r["best_accuracy"] = (r["best_accuracy"] * 100).round(1)

In [None]:
r["accuracy"] = r.apply(lambda row: f"{np.round(row['accuracy_mean'] , 1)} +- {np.round(row['accuracy_var'], 2)}", axis=1)

print(r[["accuracy", "best_accuracy", "adaptive_coverage"]].to_latex())