In [None]:
%load_ext autoreload
%autoreload 2
%env JOBLIB_TEMP_FOLDER=/tmp

In [None]:
!pwd

In [None]:
import sys
sys.path.append('..')

In [None]:
import sys

IN_COLAB = 'google.colab' in sys.modules
REPO_DIR = '..' if IN_COLAB  else '..'

# Code

In [None]:
import os
import itertools
import collections
import tqdm.auto as tqdm
import time
import gc 
from IPython.display import display

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import random
import pickle

import tensorflow as tf
import sonnet as snt
import graph_nets
from graph_nets.graphs import GraphsTuple
import graph_attribution as gatt

# Ignore tf/graph_nets UserWarning:
# Converting sparse IndexedSlices to a dense Tensor of unknown shape
import warnings
warnings.simplefilter("ignore", UserWarning)

for mod in [tf, snt, gatt]:
    print(f'{mod.__name__:20s} = {mod.__version__}')

## Graph Attribution specific imports

In [None]:
from graph_attribution import tasks
from graph_attribution import graphnet_models as gnn_models
from graph_attribution import graphnet_techniques as techniques
from graph_attribution import datasets
from graph_attribution import experiments
from graph_attribution import templates
from graph_attribution import graphs as graph_utils

datasets.DATA_DIR = os.path.join(REPO_DIR, 'data')
print(f'Reading data from: {datasets.DATA_DIR}')

# Load Experiment data, a task and attribution techniques

In [None]:
print(f'Available tasks: {[t.name for t in tasks.Task]}')
print(f'Available model types: {[m.name for m in gnn_models.BlockType]}')
print(f'Available ATT techniques: {list(techniques.get_techniques_dict(None,None).keys())}')

In [None]:
def train_and_evaluate(
        task_type, 
        block_type 
):
    task_dir = datasets.get_task_dir(task_type)
    exp, task, methods = experiments.get_experiment_setup(task_type, block_type)
    task_act, task_loss = task.get_nn_activation_fn(), task.get_nn_loss_fn()
    graph_utils.print_graphs_tuple(exp.x_train)
    
    hp = gatt.hparams.get_hparams({'block_type':block_type, 'task_type':task_type})
    
    model = experiments.GNN(node_size = hp.node_size,
               edge_size = hp.edge_size,
               global_size = hp.global_size,
               y_output_size = task.n_outputs,
               block_type = gnn_models.BlockType(hp.block_type),
               activation = task_act,
               target_type = task.target_type,
               n_layers = hp.n_layers)
    model(exp.x_train)

    optimizer = snt.optimizers.Adam(hp.learning_rate)



    opt_one_epoch = gatt.training.make_tf_opt_epoch_fn(exp.x_train, exp.y_train, hp.batch_size, model,
                                      optimizer, task_loss)

    pbar = tqdm.tqdm(range(hp.epochs))
    losses = collections.defaultdict(list)
    start_time = time.time()
    for _ in pbar:
        train_loss = opt_one_epoch(exp.x_train, exp.y_train).numpy()
        losses['train'].append(train_loss)
        r = model(exp.x_test);
        losses['test'].append(task_loss(exp.y_test, model(exp.x_test)[0]).numpy())
        pbar.set_postfix({key: values[-1] for key, values in losses.items()})

    losses = {key: np.array(values) for key, values in losses.items()}
    
    graphN = graph_nets.utils_tf.get_num_graphs(exp.x_train)
    
    if graphN >= 1000:
        indexes = np.array(random.sample(range(0, graphN-1), 1000))
        indexes.sort()
        train_data = graph_utils.get_graphs_tf(exp.x_train, indexes)
        print("Sampling is done!")
    else:
        train_data = exp.x_train
    
    results = []
    for method in tqdm.tqdm(methods.values(), total=len(methods)):
        results.append(experiments.generate_result(model, method, task, exp.x_test, exp.y_test, exp.att_test, train_data, 0.01))
    
    tf.keras.backend.clear_session()
    del model
    gc.collect()
    return pd.DataFrame(results), losses

In [None]:
from itertools import product

In [None]:
os.path.abspath('datasets')

In [None]:
results = {}
losses = {}
task = ['benzene', 'logic7', 'logic8', 'logic10', 'crippen']
block = ['gcn', 'gat', 'mpnn', 'graphnet']
n_trials = 5
run = range(n_trials)

for block, task, run in product(block, task,run):
    print((block, task, run))
    try:
        result, loss = train_and_evaluate(
            task_type=task, 
            block_type=block
        )
        results[(block, task, run)] = result
        losses[(block, task, run)] = loss
        print(result)
    except Exception as e:
        raise e

In [None]:
import pickle

# save
with open('../data/egsar_results_gcn&gan&mpnn&graphnet(' + str(n_trials) + '_trials).pickle', 'wb') as f:
    pickle.dump(results, f, pickle.HIGHEST_PROTOCOL)

In [None]:
print("Done!")