### Fingerprint Example

In [None]:
import argparse

from ogb.graphproppred import DglGraphPropPredDataset, Evaluator

import numpy as np
from sklearn.ensemble import RandomForestClassifier

In [None]:
# import fingerprint
from gtrick import ogb2fp

### Define Train Process

In [None]:
def run_graph_pred(args):
    dataset = DglGraphPropPredDataset(
        name=args.dataset, root=args.dataset_path)
    evaluator = Evaluator(name=args.dataset)

    # get fingerprint feature
    X, y = ogb2fp(args.dataset, root=args.dataset_path)

    split_idx = dataset.get_idx_split()
    train_idx, val_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]
    X_train, X_val, X_test = X[train_idx], X[val_idx], X[test_idx]
    y_train, y_val, y_test = y[train_idx], y[val_idx], y[test_idx]

    val_metrics, test_metrics = [], []

    for run in range(args.runs):
        print('\nRun {}'.format(run + 1))

        rf = RandomForestClassifier(
            min_samples_leaf=args.min_samples_leaf, 
            n_estimators=args.n_estimators, 
            n_jobs=-1,
            criterion='entropy',
            class_weight={0:1, 1:10}
            )
        rf.fit(X_train, y_train.flatten())

        # Calculate probabilities
        yh_val = rf.predict_proba(X_val)[:, 1].reshape(-1, 1)
        yh_test = rf.predict_proba(X_test)[:, 1].reshape(-1, 1)

        val_metric = evaluator.eval({'y_true': y_val, 'y_pred': yh_val})[dataset.eval_metric]
        test_metric = evaluator.eval({'y_true': y_test, 'y_pred': yh_test})[dataset.eval_metric]

        val_metrics.append(val_metric)
        test_metrics.append(test_metric)

        print(f'Valid: {val_metric:.4f}, Test: {test_metric:.4f}')
        print()

    print(f'Valid: {np.mean(val_metrics):.4f} ± {np.std(val_metrics):.4f}')
    print(f'Test: {np.mean(test_metrics):.4f} ± {np.std(test_metrics):.4f}')

### Run Experiment

In [None]:
parser = argparse.ArgumentParser(
    description='train graph property prediction')
parser.add_argument('--dataset', type=str, default='ogbg-molhiv',
                    choices=['ogbg-molhiv'])
parser.add_argument('--dataset_path', type=str, default='/dev/dataset',
                    help='path to dataset')
parser.add_argument('--device', type=int, default=0)
parser.add_argument('--min_samples_leaf', type=int, default=2)
parser.add_argument('--n_estimators', type=int, default=1000)
parser.add_argument('--runs', type=int, default=3)
args = parser.parse_args(args=[])
print(args)

run_graph_pred(args)