In [1]:
import networkx as nx
import numpy as np
import pandas as pd

In [2]:
import torch

In [3]:
import os
from sys import platform

if platform == 'win32':
    os.chdir('/'.join(os.getcwd().split('\\')[:-2]))
else:
    os.chdir('/'.join(os.getcwd().split('/')[:-2]))
from src import *

comet_ml is installed but `COMET_API_KEY` is not set.


In [4]:
from ogb.nodeproppred import PygNodePropPredDataset

In [5]:
try:
    past_nc_res_df = pd.read_csv('./results/node_class.csv')
except:
    past_nc_res_df = pd.DataFrame(columns=['Acc','Std','Max','Runs','Embed','PCA','Reg','Dataset','Agg'])
    
try:
    past_ep_res_df = pd.read_csv('./results/edge_pred.csv')
except:
    past_ep_res_df = pd.DataFrame(columns=['AUC','Std','Max','Runs','Embed','PCA','Reg','Dataset','Agg'])

In [6]:
ep_results_dict = {
    'AUC':[],
    'Std':[],
    'Max':[],
    'Runs':[],
    'Embed':[],
    'PCA':[],
    'Reg':[],
    'Dataset':[],
    'Agg':[]
}

In [7]:
nc_results_dict = {
    'Acc':[],
    'Std':[],
    'Max':[],
    'Runs':[],
    'Embed':[],
    'PCA':[],
    'Reg':[],
    'Dataset':[],
    'Agg':[]
}

In [8]:
def run_edge(dataset, num_runs, features, pca, reg):
    edge_results = run_edge_prediction(dataset, num_runs,features=features, pca=pca, reg=reg)
    
    df = pd.DataFrame(edge_results)
    
    display(df.mean())
    display(df.std())
    display(df.max())
    
    for agg in ['mean','gcn','eve','pool']:
        try:
            ep_results_dict['AUC'].append(df.mean()[agg])
            ep_results_dict['Std'].append(df.std()[agg])
            ep_results_dict['Max'].append(df.max()[agg])
            ep_results_dict['Runs'].append(num_runs)
            ep_results_dict['Embed'].append(features)
            ep_results_dict['PCA'].append(pca)
            ep_results_dict['Reg'].append(reg)
            ep_results_dict['Dataset'].append(dataset)
            ep_results_dict['Agg'].append(agg)
        except:
            pass

In [9]:
def run_node(dataset, num_runs, features, pca, reg):
    node_results = run_node_classification(dataset, num_runs,features=features, pca=pca, reg=reg)
    
    df = pd.DataFrame(node_results)
    
    display(df.mean())
    display(df.std())
    display(df.max())
    
    for agg in ['mean','gcn','eve','pool']:
        try:
            nc_results_dict['Acc'].append(df.mean()[agg])
            nc_results_dict['Std'].append(df.std()[agg])
            nc_results_dict['Max'].append(df.max()[agg])
            nc_results_dict['Runs'].append(num_runs)
            nc_results_dict['Embed'].append(features)
            nc_results_dict['PCA'].append(pca)
            nc_results_dict['Reg'].append(reg)
            nc_results_dict['Dataset'].append(dataset)
            nc_results_dict['Agg'].append(agg)
        except:
            pass

# Edge Prediction

In [10]:
data,_ = get_raw_text_arxiv(use_text=False)

In [11]:
data

Data(num_nodes=169343, edge_index=[2, 1166243], x=[169343, 128], node_year=[169343, 1], y=[169343], train_mask=[169343], val_mask=[169343], test_mask=[169343])

# Node Classification

In [12]:
kwargs = {
    'dataset':'ogbn',
    'num_runs':3,
    'features':'default',
    'pca':False,
    'reg':0
}

run_node(**kwargs)

Running on: cuda
torch.Size([169343, 128])


100%|██████████| 3/3 [00:17<00:00,  5.84s/it]
100%|██████████| 3/3 [00:04<00:00,  1.38s/it]
100%|██████████| 3/3 [00:26<00:00,  8.75s/it]
100%|██████████| 3/3 [00:09<00:00,  3.18s/it]


mean    0.502877
gcn     0.511786
eve     0.430412
pool    0.488207
dtype: float32

mean    0.006232
gcn     0.004039
eve     0.015020
pool    0.005441
dtype: float32

mean    0.507026
gcn     0.516203
eve     0.441804
pool    0.494023
dtype: float32

In [13]:
kwargs = {
    'dataset':'ogbn',
    'num_runs':3,
    'features':'LM',
    'pca':True,
    'reg':0
}

run_node(**kwargs)

Running on: cuda
torch.Size([169343, 384])


100%|██████████| 3/3 [00:06<00:00,  2.13s/it]
100%|██████████| 3/3 [00:05<00:00,  1.72s/it]
100%|██████████| 3/3 [50:39<00:00, 1013.24s/it]
100%|██████████| 3/3 [00:23<00:00,  7.78s/it]


mean    0.695177
gcn     0.689937
eve     0.653478
pool    0.684046
dtype: float32

mean    0.001035
gcn     0.001039
eve     0.001044
pool    0.001508
dtype: float32

mean    0.696356
gcn     0.690986
eve     0.654404
pool    0.685472
dtype: float32

# Output

In [14]:
node_results_df = pd.DataFrame(nc_results_dict)
edge_results_df = pd.DataFrame(ep_results_dict)

In [15]:
total_node_results = pd.concat([past_nc_res_df,node_results_df])
total_edge_results = pd.concat([past_ep_res_df,edge_results_df])

In [16]:
total_node_results.to_csv('./results/node_class.csv',index=False)
total_edge_results.to_csv('./results/edge_pred.csv',index=False)