In [1]:
import os
import sys

sys.path.append(os.path.join(os.path.dirname("__file__"), ".."))
from copy import deepcopy
import torch
import torch_geometric
import networkx as nx
from torch_geometric.datasets import Planetoid
import pickle
import torch_geometric.transforms as T

cora_dataset = Planetoid(root="../data", name="Cora")


edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

# g = torch_geometric.utils.to_networkx(cora_dataset.data, to_undirected=True)
# nx.draw(g, node_size=100)

# g = torch_geometric.utils.to_networkx(T.LargestConnectedComponents()(cora_dataset.data))
# nx.draw(g, node_size=100)

In [2]:
def get_data(
    dataset_name,
    train_fraction=1,
    lcc=False,
    added_edge_fraction=0,
    feature_noise_ratio=0,
    **kwargs
):
    """Get the pytorch-geometric data object.

    Args:
        data_type: Data type. Choose from "Cora", "Pubmed", "citeseer". If want the feature to be binarized, include "-bool" in data_type string.
                   if want to use largest connected components, include "-lcc" in data_type. If use random splitting with train:val:test=0.1:0.1:0.8,
                   include "-rand" in the data_type string.
        train_fraction: Fraction of training labels preserved for the training set.
        added_edge_fraction: Fraction of added (or deleted) random edges. Use positive (negative) number for randomly adding (deleting) edges.
        feature_noise_ratio: Noise ratio for the additive independent Gaussian noise on the features.

    Returns:
        A pytorch-geometric data object containing the specified dataset.
    """

    def to_mask(idx, size):
        mask = torch.zeros(size).bool()
        mask[idx] = True
        return mask

    data_path = os.path.join(
        os.path.dirname(os.path.realpath("__file__")), "..", "data"
    )

    # Load data:
    info = {}
    if dataset_name in ["Cora", "Pubmed", "citeseer"]:
        dataset = Planetoid(
            root=data_path, name=dataset_name, transform=T.NormalizeFeatures()
        )

        dataset.loss = "softmax"
    else:
        raise Exception("data_type {} is not valid!".format(dataset_name))

    # work with lcc only
    if lcc:
        dataset[0] = T.LargestConnectedComponents()(dataset[0])

    # if boolean:
    #     data.x = data.x.bool().float()

    # Reduce the number of training examples by randomly choosing some of the original training examples:
    if train_fraction != 1:
        try:
            train_mask_file = "../attack_data/{}/train_mask_tr_{}_seed_{}.p".format(
                dataset_name, train_fraction, kwargs["seed"] % 10
            )
            new_train_mask = pickle.load(open(train_mask_file, "rb"))
            dataset[0].train_mask = torch.BoolTensor(new_train_mask).to(
                dataset[0].y.device
            )
            print("Load train_mask at {}".format(train_mask_file))
        except:
            raise

    # Add random edges for untargeted attacks:
    if added_edge_fraction > 0:
        dataset[0] = add_random_edge(
            dataset[0], added_edge_fraction=added_edge_fraction
        )
    elif added_edge_fraction < 0:
        dataset[0] = remove_edge_random(
            dataset[0], remove_edge_fraction=-added_edge_fraction
        )

    # Perturb features for untargeted attacks:
    if feature_noise_ratio > 0:
        x_max_mean = dataset[0].x.max(1)[0].mean()
        dataset[0].x = (
            dataset[0].x
            + torch.randn(dataset[0].x.shape) * x_max_mean * feature_noise_ratio
        )

    # For adversarial attacks:
    dataset[0].dataset_name = dataset_name
    if "attacked_nodes" in kwargs:
        attack_path = os.path.join(
            os.path.dirname(os.path.realpath("__file__")),
            "..",
            "attack_data",
            dataset_name,
        )
        if not os.path.exists(attack_path):
            os.makedirs(attack_path)
        try:
            with open(os.path.join(attack_path, "test-node.pkl"), "rb") as f:
                node_ids = pickle.load(f)
                dataset[0].node_ids = node_ids
                print(
                    "Load previous attacked node_ids saved in {}.".format(attack_path)
                )
        except:
            raise
            test_ids = np.array(torch.where(data.test_mask)[0])
            node_ids = get_list_elements(test_ids, kwargs["attacked_nodes"])
            with open(os.path.join(attack_path, "test-node.pkl"), "wb") as f:
                pickle.dump(node_ids, f)
            info["node_ids"] = node_ids
            print("Save attacked node_ids into {}.".format(attack_path))
    return dataset


def remove_edge_random(data, remove_edge_fraction):
    """Randomly remove a certain fraction of edges."""
    data_c = deepcopy(data)
    num_edges = int(data_c.edge_index.shape[1] / 2)
    num_removed_edges = int(num_edges * remove_edge_fraction)
    edges = [tuple(ele) for ele in np.array(data_c.edge_index.T)]
    for i in range(num_removed_edges):
        idx = np.random.choice(len(edges))
        edge = edges[idx]
        edge_r = (edge[1], edge[0])
        edges.pop(idx)
        try:
            edges.remove(edge_r)
        except:
            pass
    data_c.edge_index = torch.LongTensor(np.array(edges).T).to(data.edge_index.device)
    return data_c


def add_random_edge(data, added_edge_fraction=0):
    """Add random edges to the original data's edge_index."""
    if added_edge_fraction == 0:
        return data
    data_c = deepcopy(data)
    num_edges = int(data.edge_index.shape[1] / 2)
    num_added_edges = int(num_edges * added_edge_fraction)
    edges = [tuple(ele) for ele in to_np_array(data.edge_index.T)]
    added_edges = []
    for i in range(num_added_edges):
        while True:
            added_edge_cand = tuple(
                np.random.choice(data.x.shape[0], size=2, replace=False)
            )
            added_edge_r_cand = (added_edge_cand[1], added_edge_cand[0])
            if added_edge_cand in edges or added_edge_cand in added_edges:
                if added_edge_cand in edges:
                    assert added_edge_r_cand in edges
                if added_edge_cand in added_edges:
                    assert added_edge_r_cand in added_edges
                continue
            else:
                added_edges.append(added_edge_cand)
                added_edges.append(added_edge_r_cand)
                break

    added_edge_index = torch.LongTensor(np.array(added_edges).T).to(
        data.edge_index.device
    )
    data_c.edge_index = torch.cat([data.edge_index, added_edge_index], 1)
    return data_c


def get_edge_corrupted_data(data, corrupt_fraction, is_original_included=True):
    """Add random edges to the original data's edge_index.

    Args:
        data: PyG data instance
        corrupt_fraction: fraction of edges being removed and then the corresponding random edge added.
        is_original_included: if True, the original edges may be included in the random edges.

    Returns:
        data_edge_corrupted: new data instance where the edge is replaced by random edges.
    """
    data_edge_corrupted = deepcopy(data)
    num_edges = int(data.edge_index.shape[1] / 2)
    num_corrupted_edges = int(num_edges * corrupt_fraction)
    edges = [tuple(item) for item in to_np_array(data.edge_index.T)]
    removed_edges = []
    num_nodes = data.x.shape[0]

    # Remove edges:
    for i in range(num_corrupted_edges):
        id = np.random.choice(range(len(edges)))
        edge = edges.pop(id)
        try:
            edge_r = edges.remove((edge[1], edge[0]))
        except:
            pass
        removed_edges.append(edge)
        removed_edges.append((edge[1], edge[0]))

    # Setting up excluded edges when adding:
    remaining_edges = list(set(edges).difference(set(removed_edges)))
    if is_original_included:
        edges_exclude = remaining_edges
    else:
        edges_exclude = edges

    # Add edges:
    added_edges = []
    for i in range(num_corrupted_edges):
        while True:
            added_edge_cand = tuple(np.random.choice(num_nodes, size=2, replace=False))
            added_edge_r_cand = (added_edge_cand[1], added_edge_cand[0])
            if added_edge_cand in edges_exclude or added_edge_cand in added_edges:
                continue
            else:
                added_edges.append(added_edge_cand)
                added_edges.append(added_edge_r_cand)
                break

    added_edge_index = torch.LongTensor(np.array(added_edges + remaining_edges).T).to(
        data.edge_index.device
    )
    data_edge_corrupted.edge_index = added_edge_index
    return data_edge_corrupted

In [3]:
dataset = get_data("Cora")
dataset


Cora()

In [4]:
from models.gat import GIBGAT
from models.gcn import GIBGCN

from modules.train import train_node_level

In [5]:
class Config(dict):
    def __init__(self, *args, **kwargs):
        super(Config, self).__init__(*args, **kwargs)
        self.__dict__ = self


conf_GIBGCN = Config(
    exp_name="GIBGCN_CORA",
    model=GIBGCN(dataset.num_features, dataset.num_classes, latent_size=8),
    model_name="GIBGCN_CORA",
    dataset_name="Cora",
    lr=0.03,
    weight_decay=5e-4,
    beta1=0.001,
    beta2=0.01,
    CHECKPOINT_PATH="../saved_models",
    device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    loss_type=dataset.loss,
)
conf_GIBGCN

{'exp_name': 'GIBGCN_CORA',
 'model': GIBGCN(
   (conv1): GCNConv(1433, 8)
   (conv2): GCNConv(8, 7)
 ),
 'model_name': 'GIBGCN_CORA',
 'dataset_name': 'Cora',
 'lr': 0.03,
 'weight_decay': 0.0005,
 'beta1': 0.001,
 'beta2': 0.01,
 'CHECKPOINT_PATH': '../saved_models',
 'device': device(type='cuda'),
 'loss_type': 'softmax'}

In [6]:
pl_GIBGCN, result_GIBGCN = train_node_level(
    conf_GIBGCN,
    dataset,
)

Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mstepp1[0m. Use [1m`wandb login --relogin`[0m to force relogin


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Global seed set to 42
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params
---------------------------------
0 | model | GIBGCN | 23.1 K
---------------------------------
23.1 K    Trainable params
0         Non-trainable params
23.1 K    Total params
0.092     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

TypeError: Unwrapping the module did not yield a `LightningModule`, got <class 'models.gcn.GIBGCN'> instead.

In [None]:
# Small function for printing the test scores
def print_results(result_dict):
    if "train" in result_dict:
        print("Train accuracy: %4.2f%%" % (100.0 * result_dict["train"]))
    if "val" in result_dict:
        print("Val accuracy:   %4.2f%%" % (100.0 * result_dict["val"]))
    print("Test accuracy:  %4.2f%%" % (100.0 * result_dict["test"]))
    
print_results(result)

In [None]:
%reload_ext tensorboard
%tensorboard --logdir lightning_logs/