In [1]:
import sys
from GPT_GNN.data import *
from GPT_GNN.model import *
from warnings import filterwarnings
filterwarnings("ignore")

import argparse

In [None]:
parser = argparse.ArgumentParser(description='Pre-training HGT on a given graph (heterogeneous / homogeneous)')

'''
   GPT-GNN arguments 
'''
parser.add_argument('--attr_ratio', type=float, default=0.5,
                    help='Ratio of attr-loss against link-loss, range: [0-1]') 
parser.add_argument('--attr_type', type=str, default='text',
                    choices=['text', 'vec'],
                    help='The type of attribute decoder')
parser.add_argument('--neg_samp_num', type=int, default=255,
                    help='Maximum number of negative sample for each target node.')
parser.add_argument('--queue_size', type=int, default=256,
                    help='Max size of adaptive embedding queue.')
parser.add_argument('--w2v_dir', type=str, default='/datadrive/dataset/w2v_all',
                    help='The address of preprocessed graph.')

'''
    Dataset arguments
'''
parser.add_argument('--data_dir', type=str, default='./processed_OAG/graph_CS.pk',
                    help='The address of preprocessed graph.')
parser.add_argument('--pretrain_model_dir', type=str, default='./Pretrained_model',
                    help='The address for storing the models and optimization results.')
parser.add_argument('--cuda', type=int, default=-1,
                    help='Avaiable GPU ID')      
parser.add_argument('--sample_depth', type=int, default=6,
                    help='How many layers within a mini-batch subgraph')
parser.add_argument('--sample_width', type=int, default=128,
                    help='How many nodes to be sampled per layer per type')

'''
   Model arguments 
'''
parser.add_argument('--conv_name', type=str, default='hgt',
                    choices=['hgt', 'gcn', 'gat', 'rgcn', 'han', 'hetgnn'],
                    help='The name of GNN filter. By default is Heterogeneous Graph Transformer (hgt)')
parser.add_argument('--n_hid', type=int, default=400,
                    help='Number of hidden dimension')
parser.add_argument('--n_heads', type=int, default=8,
                    help='Number of attention head')
parser.add_argument('--n_layers', type=int, default=3,
                    help='Number of GNN layers')
parser.add_argument('--prev_norm', help='Whether to add layer-norm on the previous layers', action='store_true', default=True)
parser.add_argument('--last_norm', help='Whether to add layer-norm on the last layers',     action='store_true', default=True)
parser.add_argument('--dropout', type=int, default=0.2,
                    help='Dropout ratio')

'''
    Optimization arguments
'''
parser.add_argument('--max_lr', type=float, default=1e-3,
                    help='Maximum learning rate.')
parser.add_argument('--scheduler', type=str, default='cycle',
                    help='Name of learning rate scheduler.' , choices=['cycle', 'cosine'])
parser.add_argument('--n_epoch', type=int, default=20,
                    help='Number of epoch to run')
parser.add_argument('--n_pool', type=int, default=8,
                    help='Number of process to sample subgraph')    
parser.add_argument('--n_batch', type=int, default=32,
                    help='Number of batch (sampled graphs) for each epoch') 
parser.add_argument('--batch_size', type=int, default=256,
                    help='Number of output nodes for training')    
parser.add_argument('--clip', type=float, default=0.5,
                    help='Gradient Norm Clipping') 


args = parser.parse_args()

In [None]:
args_print(args)