In [1]:
import json

json_file_path = "configs/mutag_gnn_preds.json"
with open(json_file_path, "r", encoding="utf-8") as json_file:
    settings = json.load(json_file)
for str_target_concept, examples in settings.items():
    positive_examples = set(examples["positive_examples"])

    negative_examples = set(examples["negative_examples"])
    print(len(positive_examples.intersection(negative_examples)))

0


In [2]:
import json

json_file_path = "configs/mutag.json"
with open(json_file_path, "r", encoding="utf-8") as json_file:
    settings = json.load(json_file)
for str_target_concept, examples in settings["problems"].items():
    positive_examples = set(examples["positive_examples"])

    negative_examples = set(examples["negative_examples"])
    print(len(positive_examples.intersection(negative_examples)))

0


In [1]:
import json
import os
import shutil
import time

import numpy as np
import torch
import torch as th
import torch.nn.functional as F
from torch import nn

from src.dglnn_local.subgraphx import HeteroSubgraphX
from src.gnn_explainers.configs import get_configs
from src.gnn_explainers.dataset import RDFDatasets
from src.gnn_explainers.hetro_features import HeteroFeature
from src.gnn_explainers.model import RGCN
from src.gnn_explainers.trainer import train_gnn
from src.gnn_explainers.utils import get_nodes_dict

In [5]:
def get_lp_mutag(gnn_pred_dt, idx_map):
    positive_examples = [
        idx_map[item]["IRI"] for item in gnn_pred_dt if gnn_pred_dt[item] == 1
    ]
    negative_examples = [
        idx_map[item]["IRI"] for item in gnn_pred_dt if gnn_pred_dt[item] == 0
    ]

    lp_dict = {
        "carcino": {
            "positive_examples": positive_examples,
            "negative_examples": negative_examples,
        }
    }
    return lp_dict


def get_lp_mutag_train_test(gnn_pred_dt_train, gnn_pred_dt_test, idx_map):
    # Positive and negative examples for training set
    train_positive_examples = [
        idx_map[item]["IRI"]
        for item in gnn_pred_dt_train
        if gnn_pred_dt_train[item] == 1
    ]
    train_negative_examples = [
        idx_map[item]["IRI"]
        for item in gnn_pred_dt_train
        if gnn_pred_dt_train[item] == 0
    ]

    # Positive and negative examples for test set
    test_positive_examples = [
        idx_map[item]["IRI"] for item in gnn_pred_dt_test if gnn_pred_dt_test[item] == 1
    ]
    test_negative_examples = [
        idx_map[item]["IRI"] for item in gnn_pred_dt_test if gnn_pred_dt_test[item] == 0
    ]

    lp_dict_test_train = {
        "carcino": {
            "positive_examples_train": train_positive_examples,
            "negative_examples_train": train_negative_examples,
            "positive_examples_test": test_positive_examples,
            "negative_examples_test": test_negative_examples,
        }
    }
    return lp_dict_test_train


def get_lp_aifb(gnn_pred_dt, idx_map):
    class_to_pred = {}
    multi_lp_dict = {}

    # Creating new_dict
    for key, value in gnn_pred_dt.items():
        class_to_pred.setdefault(value, []).append(key)

    # Creating news_dict
    multi_lp_dict = {
        f"id{key+1}instance": {
            "positive_examples": [idx_map[val]["IRI"] for val in values],
            "negative_examples": [
                idx_map[val]["IRI"]
                for k, v in class_to_pred.items()
                if k != key
                for val in v
            ],
        }
        for key, values in class_to_pred.items()
    }

    return multi_lp_dict


def get_lp_aifb_train_test(gnn_pred_dt_train, gnn_pred_dt_test, idx_map):
    class_to_pred_train = {}
    class_to_pred_test = {}

    # Creating new_dict for training set
    for key, value in gnn_pred_dt_train.items():
        class_to_pred_train.setdefault(value, []).append(key)

    # Creating new_dict for test set
    for key, value in gnn_pred_dt_test.items():
        class_to_pred_test.setdefault(value, []).append(key)

    # Merge training and test dictionaries
    all_class_to_pred = {**class_to_pred_train, **class_to_pred_test}

    multi_lp_dict = {
        f"id{key+1}instance": {
            "positive_examples_train": [
                idx_map[val]["IRI"] for val in class_to_pred_train.get(key, [])
            ],
            "negative_examples_train": [
                idx_map[val]["IRI"]
                for k, v in class_to_pred_train.items()
                if k != key
                for val in v
            ],
            "positive_examples_test": [
                idx_map[val]["IRI"] for val in class_to_pred_test.get(key, [])
            ],
            "negative_examples_test": [
                idx_map[val]["IRI"]
                for k, v in class_to_pred_test.items()
                if k != key
                for val in v
            ],
        }
        for key in all_class_to_pred.keys()
    }

    return multi_lp_dict

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset = "mutag"
dataset = str.lower(dataset)

configs = get_configs(dataset)
hidden_dim = configs["hidden_dim"]
num_bases = configs["n_bases"]
lr = configs["lr"]
weight_decay = configs["weight_decay"]
epochs = configs["max_epoch"]
validation = configs["validation"]
hidden_layers = configs["num_layers"] - 1
act = None

my_dataset = RDFDatasets(dataset, root="data/", validation=validation)
g = my_dataset.g.to(device)
out_dim = my_dataset.num_classes
e_types = g.etypes
category = my_dataset.category
train_idx = my_dataset.train_idx.to(device)
test_idx = my_dataset.test_idx.to(device)
labels = my_dataset.labels.to(device)

if validation:
    valid_idx = my_dataset.valid_idx.to(device)
    test_idx = torch.cat([test_idx, valid_idx], dim=0)

idx_map = my_dataset.idx_map
# pred_idx = torch.cat([train_idx, test_idx], dim=0)
pred_idx = test_idx
input_feature = HeteroFeature({}, get_nodes_dict(g), hidden_dim, act=act).to(device)
model = RGCN(
    hidden_dim,
    hidden_dim,
    out_dim,
    e_types,
    num_bases,
    category,
).to(device)

# Define the optimizer
optimizer = th.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

loss_fn = F.cross_entropy
model.add_module("input_feature", input_feature)
optimizer.add_param_group({"params": input_feature.parameters()})

PATH = f"trained_models/{dataset}_trained.pt"
if not os.path.isfile(PATH):
    train_gnn(dataset=dataset, PATH=PATH)

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint["model_state_dict"])
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
epoch = checkpoint["epoch"]
loss = checkpoint["loss"]
feat = model.input_feature()

test_idx = my_dataset.test_idx.to(device)
labels = my_dataset.labels.to(device)
gt = labels[test_idx].tolist()
pred_logit = model(g, feat)[category]
gnn_preds_test = pred_logit[test_idx].argmax(dim=1).tolist()
gnn_preds_train = pred_logit[train_idx].argmax(dim=1).tolist()


gnn_pred_dt_train = {
    tensor.item(): pred for tensor, pred in zip(train_idx, gnn_preds_train)
}
gnn_pred_dt_test = {
    tensor.item(): pred for tensor, pred in zip(train_idx, gnn_preds_test)
}
lp_data = get_lp_mutag(gnn_pred_dt_test, idx_map)
lp_data_train_test = get_lp_mutag_train_test(
    gnn_pred_dt_train, gnn_pred_dt_test, idx_map
)
# File path where you want to store the JSON data
file_path = f"configs/{dataset}_gnn_preds.json"
file_path_train_test = f"configs/{dataset}_gnn_preds_train_test.json"

# Writing the dictionary to a JSON file with indentation
with open(file_path, "w") as json_file:
    json.dump(lp_data, json_file, indent=4)

with open(file_path_train_test, "w") as json_file:
    json.dump(lp_data_train_test, json_file, indent=4)

Done loading data from cached files.


In [15]:
for str_target_concept, examples in lp_data_train_test.items():
    print(str_target_concept)
    positive_examples = set(examples["positive_examples_train"])
    print(len(positive_examples))
    negative_examples = set(examples["negative_examples_train"])
    print(len(negative_examples))
    # print(len(positive_examples.intersection(negative_examples)))

id1instance
60
80
id2instance
48
92
id4instance
12
128
id3instance
20
120


In [13]:
set(gnn_preds_train)

{0, 1, 2, 3}

In [4]:
from src.utils.create_lp import create_lp_aifb, create_lp_mutag

create_lp_mutag()
create_lp_aifb()

Learning Problem created for Mutag dataset and stored at configs/mutag.json
Learning Problem created for AIFB dataset and stored at configs/aifb.json


In [5]:
from src.logical_explainers.EvoLearner import train_evo, train_evo_fid

train_evo()
train_evo_fid()

0
0
0
0
