In [6]:
#!pip install networkx
#!pip install dgl
!pip install dataclasses

Collecting dataclasses
  Downloading https://files.pythonhosted.org/packages/e1/d2/6f02df2616fd4016075f60157c7a0452b38d8f7938ae94343911e0fb0b09/dataclasses-0.7-py3-none-any.whl
Installing collected packages: dataclasses
Successfully installed dataclasses-0.7


In [7]:
import os
import gc
import argparse
import time
import numpy as np
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.nn.pytorch.conv import SAGEConv

import pickle as pkl
import pandas as pd
import numpy as np

from dataclasses import dataclass, field, asdict, make_dataclass
from typing import List, Callable
from collections import defaultdict
from itertools import product

from importlib import reload

import utils
import graph_model
import graph_utils
reload(utils)
reload(graph_model)
reload(graph_utils)

from utils import get_metrics_dict
from graph_utils import build_graph, get_train_val_test_masks
from graph_model import GraphSAGE

from sklearn.preprocessing import MultiLabelBinarizer

In [2]:
PATH_TO_DATA = "/scratch/mz2476/wiki/data/aligned_datasets/"
PATH_TO_MODELS = "models/"

device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
device

'cuda'

In [4]:
# Load data
#load feature dataframes
with open(os.path.join(PATH_TO_DATA, "graph_df.pkl"), "rb") as f:
    wiki_graph_df = pkl.load(f)

with open(os.path.join(PATH_TO_DATA, "text_embed_en.pkl"), "rb") as f:
    wiki_feature_df = pkl.load(f)

with open(os.path.join(PATH_TO_DATA, "en_outlinks_tokens_df.pkl"), "rb") as f:
    wiki_label_df = pkl.load(f)
    
joined_df = wiki_feature_df.join(wiki_graph_df, lsuffix='1')
joined_df = joined_df.join(wiki_label_df, lsuffix='2').sort_values(by='node_id')

In [5]:
joined_df.head(2)

Unnamed: 0,QID1,text_1000_embed,QID2,node_id,to_nodes,QID,title,raw_outlinks,outlinks,raw_tokens,tokens,mid_level_categories
0,Q6199,"[tensor(-0.0119), tensor(-0.0165), tensor(-0.0...",Q6199,0,"[10810, 31108, 1477, 32954, 33284, 3969, 6429,...",Q6199,Anarchism,"[[[Anti-authoritarianism|anti-authoritarian]],...","[Anti-authoritarianism, political philosophy, ...","[anarchism, is, an, anti, authoritarianism, an...","[anarchism, anti, authoritarianism, anti, auth...","[History_And_Society.History and society, Hist..."
1,Q38404,"[tensor(-0.0052), tensor(-0.0246), tensor(-0.0...",Q38404,1,"[29931, 9899, 5124, 26669, 6874, 1103, 1103, 1...",Q38404,Autism,"[[[Psychiatry]], [[Interpersonal relationship|...","[Psychiatry, Interpersonal relationship, commu...","[autism, is, developmental, disorder, characte...","[autism, developmental, disorder, characterize...","[STEM.Medicine, STEM.Biology, History_And_Soci..."


In [6]:
%%time
G = build_graph(joined_df, directed=True)

CPU times: user 19.8 s, sys: 303 ms, total: 20.1 s
Wall time: 19.8 s


In [7]:
# Labels
mlb = MultiLabelBinarizer()
labels = mlb.fit_transform(joined_df.mid_level_categories)
labels = torch.FloatTensor(labels)

# Add train/val/test masks
train_mask, val_mask, test_mask = get_train_val_test_masks(G.number_of_nodes())

# Add features for all nodes
G.ndata['node_id'] = torch.arange(G.number_of_nodes())
embeds = np.nan_to_num(np.stack(joined_df.text_1000_embed.values), nan=0.)
G.ndata['_text_embed'] = torch.FloatTensor(embeds)

# Add features ONLY for train
G.ndata['_topics'] = labels * train_mask[:, None].float()
G.ndata['_empty'] = torch.zeros(G.number_of_nodes(), 0)

In [8]:
from functools import partial

@dataclass(frozen=True)
class Args:
    embedding_dim    : int = 150
    n_hidden         : int = 150
    n_layers         : int = 2
    aggregator_type  : str = "mean" # ``mean``, ``gcn``, ``pool``, ``lstm``
    activation       : Callable = partial(F.leaky_relu, negative_slope=0.1)

    n_classes    : int = labels.shape[1]
    num_nodes    : int = G.number_of_nodes()
    features_dim : int = 0

    lr           : float = 0.01
    weight_decay : float = 0.
    dropout      : float = 0.1
    step_size    : int = 200
    n_epochs     : int = 300
        
args = Args()

In [9]:
reload(graph_utils)

from graph_utils import predict, train_GraphSAGE, get_gpu_memory_map

In [10]:
# G.ndata["features"] = G.ndata['_text_embed'][:, 0:1]

# args = Args(
#             features_dim=G.ndata["features"].shape[1],
#             embedding_dim=10,
#             n_hidden=10,
#             n_layers=4,
#             aggregator_type="gcn",
#             lr=0.001,
#         )

# model = GraphSAGE(**asdict(args))


# model.to(device)
# labels = labels.to(device)
# G.ndata["features"] = G.ndata["features"].to(device)
# G.ndata["node_id"] = G.ndata["node_id"].to(device)
# train_mask = train_mask.to(device)
# val_mask = val_mask.to(device)

In [11]:
# list_features = [
#     ['_empty'],
#     ['_topics'],
#     ['_text_embed'],
#     ['_topics', '_text_embed']
# ]
# list_emb_dim = [100, 200, 300]
# list_n_hidden = [50, 100, 150]
# list_n_layers = [1, 2, 3]
# list_aggregator = [
# #     "mean",
# #     "gcn", 
# #     "pool",
#     "lstm"
# ]

In [15]:
features_names = ['_topics', '_text_embed']
G.ndata["features"] = torch.cat([G.ndata[name] for name in features_names], dim=1)
G.ndata["features"].shape

torch.Size([33823, 345])

In [16]:
list_features = [
    ['_empty'],
    ['_topics'],
    ['_text_embed'],
    ['_topics', '_text_embed']
]
list_emb_dim = [200]
list_n_hidden = [200]
list_n_layers = [1]
list_aggregator = [
#     "mean",
    "gcn", 
#     "pool",
#     "lstm"
]

def run_grid_search(G, labels, train_mask, val_mask, device, FNAME):
    metrics_list = []

    for features_names, embedding_dim, n_hidden, n_layers, aggregator_type\
        in product(list_features, list_emb_dim, list_n_hidden, list_n_layers, list_aggregator):

        print(50*"--")
        print("features_names, embedding_dim, n_hidden, n_layers, aggregator_type:\n", 
              features_names, embedding_dim, n_hidden, n_layers, aggregator_type)

        if len(features_names) > 1:
            G.ndata["features"] = torch.cat([G.ndata[name] for name in features_names], dim=1)
        elif len(features_names) == 1:
            G.ndata["features"] = G.ndata[features_names[0]]
        else:
            raise ValueError

        args = Args(
            features_dim=G.ndata["features"].shape[1],
            embedding_dim=embedding_dim,
            n_hidden=n_hidden,
            n_layers=n_layers,
            aggregator_type=aggregator_type,
            n_epochs=400,
            dropout=0.2,
        )

        model = GraphSAGE(**asdict(args))


        model.to(device)
        labels = labels.to(device)
        G.ndata["features"] = G.ndata["features"].to(device)
        G.ndata["node_id"] = G.ndata["node_id"].to(device)
        train_mask = train_mask.to(device)
        val_mask = val_mask.to(device)

        use_loss_reweighting = False
        pos_weights = 1 / labels[train_mask].mean(axis=0) if use_loss_reweighting else None
        criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weights)
        model_parameters = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.Adam(model_parameters, lr=args.lr, weight_decay=args.weight_decay)
        exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=0.01)

        metrics = train_GraphSAGE(model, criterion, optimizer, exp_lr_scheduler, 
                        device, "test", asdict(args), args.n_epochs,
                        G, labels, train_mask, val_mask)
        metrics_list.append(metrics)
        torch.save(metrics_list, PATH_TO_MODELS + FNAME)
        
        
        print(get_gpu_memory_map())
        gc.collect()
        torch.cuda.empty_cache()
  
    return metrics_list

In [17]:
d = run_grid_search(G, labels, train_mask, val_mask, device, "results.pth")

----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 200 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05785 | Validation f1_micro 0.631
Epoch 199 | Train Loss: 0.03425 | Validation f1_micro 0.615
Epoch 299 | Train Loss: 0.03383 | Validation f1_micro 0.619
Epoch 399 | Train Loss: 0.03366 | Validation f1_micro 0.618

Training complete in 0m 28s
Best val f1_micro: 0.6400 

{0: 1203}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 200 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05605 | Validation f1_micro 0.639
Epoch 199 | Train Loss: 0.03361 | Validation f1_micro 0.615
Epoch 299 | Train Loss: 0.03313 | Validation f1_micro 0.62
Epoch 399 | Train Loss: 0.03297 | Validation f1_micro 0.619

Training complete in 0m 30s
Best val f1_micro: 0.6460 

{0: 1265}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 200 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05521 | Validation f1_micro 0.636
Epoch 199 | Train Loss: 0.03194 | Validation f1_micro 0.614
Epoch 299 | Train Loss: 0.03161 | Validation f1_micro 0.617
Epoch 399 | Train Loss: 0.03122 | Validation f1_micro 0.615

Training complete in 0m 36s
Best val f1_micro: 0.6420 

{0: 1449}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 200 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05373 | Validation f1_micro 0.642
Epoch 199 | Train Loss: 0.03128 | Validation f1_micro 0.614
Epoch 299 | Train Loss: 0.03084 | Validation f1_micro 0.613
Epoch 399 | Train Loss: 0.03063 | Validation f1_micro 0.61

Training complete in 0m 38s
Best val f1_micro: 0.6460 

{0: 1447}


In [13]:
# 1 layer
d = run_grid_search(G, labels, train_mask, val_mask, device, "results.pth")

----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 150 100 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.06469 | Validation f1_micro 0.628
Epoch 199 | Train Loss: 0.0372 | Validation f1_micro 0.619
Epoch 299 | Train Loss: 0.03686 | Validation f1_micro 0.62
Epoch 399 | Train Loss: 0.0366 | Validation f1_micro 0.618

Training complete in 0m 40s
Best val f1_micro: 0.6380 

{0: 893}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 150 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05606 | Validation f1_micro 0.634
Epoch 199 | Train Loss: 0.03063 | Validation f1_micro 0.608
Epoch 299 | Train Loss: 0.03017 | Validation f1_micro 0.607
Epoch 399 | Train Loss: 0.02986 | Validation f1_micro 0.61

Training complete in 0m 27s
Best val f1_micro: 0.6400 

{0: 1063}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 200 100 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05969 | Validation f1_micro 0.631
Epoch 199 | Train Loss: 0.03429 | Validation f1_micro 0.616
Epoch 299 | Train Loss: 0.03382 | Validation f1_micro 0.617
Epoch 399 | Train Loss: 0.03354 | Validation f1_micro 0.615

Training complete in 0m 26s
Best val f1_micro: 0.6390 

{0: 1045}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 200 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05252 | Validation f1_micro 0.637
Epoch 199 | Train Loss: 0.02807 | Validation f1_micro 0.609
Epoch 299 | Train Loss: 0.0275 | Validation f1_micro 0.608
Epoch 399 | Train Loss: 0.02735 | Validation f1_micro 0.61

Training complete in 0m 28s
Best val f1_micro: 0.6430 

{0: 1105}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 150 100 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.06201 | Validation f1_micro 0.632
Epoch 199 | Train Loss: 0.0363 | Validation f1_micro 0.615
Epoch 299 | Train Loss: 0.03594 | Validation f1_micro 0.613
Epoch 399 | Train Loss: 0.03572 | Validation f1_micro 0.613

Training complete in 0m 26s
Best val f1_micro: 0.6380 

{0: 1031}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 150 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05406 | Validation f1_micro 0.641
Epoch 199 | Train Loss: 0.0292 | Validation f1_micro 0.609
Epoch 299 | Train Loss: 0.02896 | Validation f1_micro 0.607
Epoch 399 | Train Loss: 0.02873 | Validation f1_micro 0.609

Training complete in 0m 28s
Best val f1_micro: 0.6440 

{0: 1055}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 200 100 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05834 | Validation f1_micro 0.638
Epoch 199 | Train Loss: 0.03347 | Validation f1_micro 0.615
Epoch 299 | Train Loss: 0.033 | Validation f1_micro 0.617
Epoch 399 | Train Loss: 0.03282 | Validation f1_micro 0.618

Training complete in 0m 27s
Best val f1_micro: 0.6410 

{0: 1051}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 200 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05032 | Validation f1_micro 0.644
Epoch 199 | Train Loss: 0.02657 | Validation f1_micro 0.604
Epoch 299 | Train Loss: 0.02608 | Validation f1_micro 0.605
Epoch 399 | Train Loss: 0.02584 | Validation f1_micro 0.603

Training complete in 0m 30s
Best val f1_micro: 0.6480 

{0: 1159}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 150 100 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.06093 | Validation f1_micro 0.633
Epoch 199 | Train Loss: 0.03368 | Validation f1_micro 0.611
Epoch 299 | Train Loss: 0.03323 | Validation f1_micro 0.61
Epoch 399 | Train Loss: 0.033 | Validation f1_micro 0.608

Training complete in 0m 32s
Best val f1_micro: 0.6360 

{0: 1317}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 150 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05387 | Validation f1_micro 0.639
Epoch 199 | Train Loss: 0.02791 | Validation f1_micro 0.607
Epoch 299 | Train Loss: 0.02753 | Validation f1_micro 0.604
Epoch 399 | Train Loss: 0.02719 | Validation f1_micro 0.605

Training complete in 0m 35s
Best val f1_micro: 0.6420 

{0: 1331}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 200 100 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05629 | Validation f1_micro 0.64
Epoch 199 | Train Loss: 0.03085 | Validation f1_micro 0.604
Epoch 299 | Train Loss: 0.03042 | Validation f1_micro 0.607
Epoch 399 | Train Loss: 0.03014 | Validation f1_micro 0.608

Training complete in 0m 33s
Best val f1_micro: 0.6410 

{0: 1369}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 200 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.0507 | Validation f1_micro 0.643
Epoch 199 | Train Loss: 0.02613 | Validation f1_micro 0.601
Epoch 299 | Train Loss: 0.02571 | Validation f1_micro 0.599
Epoch 399 | Train Loss: 0.02538 | Validation f1_micro 0.602

Training complete in 0m 36s
Best val f1_micro: 0.6430 

{0: 1403}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 150 100 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05751 | Validation f1_micro 0.634
Epoch 199 | Train Loss: 0.03165 | Validation f1_micro 0.606
Epoch 299 | Train Loss: 0.03128 | Validation f1_micro 0.603
Epoch 399 | Train Loss: 0.03108 | Validation f1_micro 0.604

Training complete in 0m 34s
Best val f1_micro: 0.6370 

{0: 1299}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 150 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.0513 | Validation f1_micro 0.643
Epoch 199 | Train Loss: 0.02642 | Validation f1_micro 0.596
Epoch 299 | Train Loss: 0.0259 | Validation f1_micro 0.598
Epoch 399 | Train Loss: 0.02552 | Validation f1_micro 0.594

Training complete in 0m 36s
Best val f1_micro: 0.6460 

{0: 1339}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 200 100 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.05387 | Validation f1_micro 0.639
Epoch 199 | Train Loss: 0.02936 | Validation f1_micro 0.6
Epoch 299 | Train Loss: 0.02888 | Validation f1_micro 0.598
Epoch 399 | Train Loss: 0.02865 | Validation f1_micro 0.597

Training complete in 0m 35s
Best val f1_micro: 0.6410 

{0: 1429}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 200 200 1 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.04861 | Validation f1_micro 0.639
Epoch 199 | Train Loss: 0.02451 | Validation f1_micro 0.592
Epoch 299 | Train Loss: 0.02399 | Validation f1_micro 0.593
Epoch 399 | Train Loss: 0.02373 | Validation f1_micro 0.589

Training complete in 0m 38s
Best val f1_micro: 0.6450 

{0: 1469}


In [14]:
print("node_id:", np.mean([0.6380, 0.6400, 0.6390, 0.6430]))
print("topics + node:",  np.mean([0.6380, 0.6440, 0.6410, 0.6480]))
print("text + node:", np.mean([0.6360, 0.6420, 0.6410, 0.6430]))
print("text + topics + node:", np.mean([0.6370, 0.6460, 0.6410, 0.6450]))

node_id: 0.64
topics + node: 0.64275
text + node: 0.6405000000000001
text + topics + node: 0.64225


In [34]:
# 2 layers
d = run_grid_search(G, labels, train_mask, val_mask, device, "results.pth")

----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 150 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.04908 | Validation f1_micro 0.589
Epoch 199 | Train Loss: 0.009274 | Validation f1_micro 0.582
Epoch 299 | Train Loss: 0.009153 | Validation f1_micro 0.584
Epoch 399 | Train Loss: 0.009009 | Validation f1_micro 0.581

Training complete in 0m 43s
Best val f1_micro: 0.5930 

{0: 1551}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 150 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.07734 | Validation f1_micro 0.595
Epoch 199 | Train Loss: 0.05388 | Validation f1_micro 0.614
Epoch 299 | Train Loss: 0.05335 | Validation f1_micro 0.62
Epoch 399 | Train Loss: 0.05321 | Validation f1_micro 0.62

Training complete in 0m 28s
Best val f1_micro: 0.6250 

{0: 1507}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 150 200 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.04174 | Validation f1_micro 0.598
Epoch 199 | Train Loss: 0.005263 | Validation f1_micro 0.592
Epoch 299 | Train Loss: 0.004953 | Validation f1_micro 0.592
Epoch 399 | Train Loss: 0.004813 | Validation f1_micro 0.587

Training complete in 0m 47s
Best val f1_micro: 0.6020 

{0: 1687}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 150 200 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.07126 | Validation f1_micro 0.61
Epoch 199 | Train Loss: 0.04782 | Validation f1_micro 0.614
Epoch 299 | Train Loss: 0.04725 | Validation f1_micro 0.614
Epoch 399 | Train Loss: 0.04697 | Validation f1_micro 0.616

Training complete in 0m 32s
Best val f1_micro: 0.6270 

{0: 1681}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 200 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.04622 | Validation f1_micro 0.585
Epoch 199 | Train Loss: 0.009195 | Validation f1_micro 0.582
Epoch 299 | Train Loss: 0.008763 | Validation f1_micro 0.59
Epoch 399 | Train Loss: 0.008641 | Validation f1_micro 0.585

Training complete in 0m 43s
Best val f1_micro: 0.5970 

{0: 1581}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 200 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.07543 | Validation f1_micro 0.596
Epoch 199 | Train Loss: 0.05302 | Validation f1_micro 0.62
Epoch 299 | Train Loss: 0.0525 | Validation f1_micro 0.617
Epoch 399 | Train Loss: 0.05211 | Validation f1_micro 0.616

Training complete in 0m 29s
Best val f1_micro: 0.6230 

{0: 1557}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 200 200 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.03728 | Validation f1_micro 0.598
Epoch 199 | Train Loss: 0.004825 | Validation f1_micro 0.594
Epoch 299 | Train Loss: 0.004521 | Validation f1_micro 0.592
Epoch 399 | Train Loss: 0.004548 | Validation f1_micro 0.595

Training complete in 0m 48s
Best val f1_micro: 0.6010 

{0: 1711}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 200 200 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.06679 | Validation f1_micro 0.623
Epoch 199 | Train Loss: 0.04515 | Validation f1_micro 0.612
Epoch 299 | Train Loss: 0.04462 | Validation f1_micro 0.612
Epoch 399 | Train Loss: 0.04425 | Validation f1_micro 0.614

Training complete in 0m 33s
Best val f1_micro: 0.6290 

{0: 1711}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 150 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.03167 | Validation f1_micro 0.498
Epoch 199 | Train Loss: 0.01008 | Validation f1_micro 0.546
Epoch 299 | Train Loss: 0.009969 | Validation f1_micro 0.542
Epoch 399 | Train Loss: 0.01006 | Validation f1_micro 0.541

Training complete in 0m 43s
Best val f1_micro: 0.5510 

{0: 1607}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 150 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.0773 | Validation f1_micro 0.602
Epoch 199 | Train Loss: 0.05374 | Validation f1_micro 0.612
Epoch 299 | Train Loss: 0.05333 | Validation f1_micro 0.611
Epoch 399 | Train Loss: 0.05305 | Validation f1_micro 0.613

Training complete in 0m 29s
Best val f1_micro: 0.6250 

{0: 1507}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 150 200 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.01668 | Validation f1_micro 0.5
Epoch 199 | Train Loss: 0.005494 | Validation f1_micro 0.563
Epoch 299 | Train Loss: 0.005254 | Validation f1_micro 0.562
Epoch 399 | Train Loss: 0.005395 | Validation f1_micro 0.566

Training complete in 0m 48s
Best val f1_micro: 0.5690 

{0: 1685}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 150 200 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.06688 | Validation f1_micro 0.627
Epoch 199 | Train Loss: 0.04603 | Validation f1_micro 0.615
Epoch 299 | Train Loss: 0.04542 | Validation f1_micro 0.615
Epoch 399 | Train Loss: 0.0453 | Validation f1_micro 0.614

Training complete in 0m 35s
Best val f1_micro: 0.6340 

{0: 1659}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 200 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.02583 | Validation f1_micro 0.521
Epoch 199 | Train Loss: 0.007411 | Validation f1_micro 0.549
Epoch 299 | Train Loss: 0.007338 | Validation f1_micro 0.55
Epoch 399 | Train Loss: 0.007067 | Validation f1_micro 0.553

Training complete in 0m 46s
Best val f1_micro: 0.5620 

{0: 1653}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 200 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.07593 | Validation f1_micro 0.599
Epoch 199 | Train Loss: 0.05287 | Validation f1_micro 0.614
Epoch 299 | Train Loss: 0.05259 | Validation f1_micro 0.618
Epoch 399 | Train Loss: 0.05237 | Validation f1_micro 0.613

Training complete in 0m 31s
Best val f1_micro: 0.6230 

{0: 1619}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 200 200 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.01725 | Validation f1_micro 0.517
Epoch 199 | Train Loss: 0.004669 | Validation f1_micro 0.556
Epoch 299 | Train Loss: 0.004386 | Validation f1_micro 0.567
Epoch 399 | Train Loss: 0.004328 | Validation f1_micro 0.563

Training complete in 0m 50s
Best val f1_micro: 0.5720 

{0: 1777}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 200 200 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.06405 | Validation f1_micro 0.631
Epoch 199 | Train Loss: 0.04352 | Validation f1_micro 0.607
Epoch 299 | Train Loss: 0.04258 | Validation f1_micro 0.604
Epoch 399 | Train Loss: 0.0425 | Validation f1_micro 0.603

Training complete in 0m 35s
Best val f1_micro: 0.6350 

{0: 1753}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 150 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.0458 | Validation f1_micro 0.602
Epoch 199 | Train Loss: 0.008842 | Validation f1_micro 0.59
Epoch 299 | Train Loss: 0.008535 | Validation f1_micro 0.587
Epoch 399 | Train Loss: 0.008345 | Validation f1_micro 0.59

Training complete in 0m 48s
Best val f1_micro: 0.6030 

{0: 1897}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 150 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.07629 | Validation f1_micro 0.603
Epoch 199 | Train Loss: 0.05257 | Validation f1_micro 0.608
Epoch 299 | Train Loss: 0.05204 | Validation f1_micro 0.605
Epoch 399 | Train Loss: 0.05173 | Validation f1_micro 0.61

Training complete in 0m 36s
Best val f1_micro: 0.6210 

{0: 1659}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 150 200 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.03344 | Validation f1_micro 0.61
Epoch 199 | Train Loss: 0.0038 | Validation f1_micro 0.603
Epoch 299 | Train Loss: 0.003664 | Validation f1_micro 0.602
Epoch 399 | Train Loss: 0.003619 | Validation f1_micro 0.603

Training complete in 0m 55s
Best val f1_micro: 0.6170 

{0: 1925}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 150 200 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.06909 | Validation f1_micro 0.618
Epoch 199 | Train Loss: 0.04709 | Validation f1_micro 0.609
Epoch 299 | Train Loss: 0.04621 | Validation f1_micro 0.612
Epoch 399 | Train Loss: 0.04585 | Validation f1_micro 0.61

Training complete in 0m 42s
Best val f1_micro: 0.6300 

{0: 1819}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 200 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.03574 | Validation f1_micro 0.593
Epoch 199 | Train Loss: 0.006299 | Validation f1_micro 0.586
Epoch 299 | Train Loss: 0.006259 | Validation f1_micro 0.586
Epoch 399 | Train Loss: 0.006029 | Validation f1_micro 0.581

Training complete in 0m 50s
Best val f1_micro: 0.5950 

{0: 1879}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 200 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.07794 | Validation f1_micro 0.592
Epoch 199 | Train Loss: 0.05332 | Validation f1_micro 0.61
Epoch 299 | Train Loss: 0.05266 | Validation f1_micro 0.604
Epoch 399 | Train Loss: 0.05248 | Validation f1_micro 0.607

Training complete in 0m 37s
Best val f1_micro: 0.6180 

{0: 1805}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 200 200 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.02897 | Validation f1_micro 0.603
Epoch 199 | Train Loss: 0.003236 | Validation f1_micro 0.607
Epoch 299 | Train Loss: 0.003047 | Validation f1_micro 0.606
Epoch 399 | Train Loss: 0.002923 | Validation f1_micro 0.602

Training complete in 0m 56s
Best val f1_micro: 0.6130 

{0: 2013}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 200 200 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.06667 | Validation f1_micro 0.625
Epoch 199 | Train Loss: 0.04522 | Validation f1_micro 0.604
Epoch 299 | Train Loss: 0.04444 | Validation f1_micro 0.61
Epoch 399 | Train Loss: 0.04432 | Validation f1_micro 0.606

Training complete in 0m 42s
Best val f1_micro: 0.6320 

{0: 1907}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 150 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.02575 | Validation f1_micro 0.534
Epoch 199 | Train Loss: 0.007693 | Validation f1_micro 0.574
Epoch 299 | Train Loss: 0.007473 | Validation f1_micro 0.567
Epoch 399 | Train Loss: 0.007379 | Validation f1_micro 0.574

Training complete in 0m 51s
Best val f1_micro: 0.5780 

{0: 1897}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 150 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.07474 | Validation f1_micro 0.609
Epoch 199 | Train Loss: 0.05204 | Validation f1_micro 0.611
Epoch 299 | Train Loss: 0.05146 | Validation f1_micro 0.613
Epoch 399 | Train Loss: 0.05121 | Validation f1_micro 0.614

Training complete in 0m 37s
Best val f1_micro: 0.6260 

{0: 1737}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 150 200 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.01383 | Validation f1_micro 0.532
Epoch 199 | Train Loss: 0.003875 | Validation f1_micro 0.582
Epoch 299 | Train Loss: 0.003838 | Validation f1_micro 0.579
Epoch 399 | Train Loss: 0.003678 | Validation f1_micro 0.582

Training complete in 0m 57s
Best val f1_micro: 0.5910 

{0: 1995}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 150 200 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.0677 | Validation f1_micro 0.624
Epoch 199 | Train Loss: 0.04623 | Validation f1_micro 0.622
Epoch 299 | Train Loss: 0.04565 | Validation f1_micro 0.616
Epoch 399 | Train Loss: 0.04545 | Validation f1_micro 0.621

Training complete in 0m 42s
Best val f1_micro: 0.6320 

{0: 1847}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 200 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.02822 | Validation f1_micro 0.535
Epoch 199 | Train Loss: 0.007694 | Validation f1_micro 0.565
Epoch 299 | Train Loss: 0.007399 | Validation f1_micro 0.559
Epoch 399 | Train Loss: 0.007178 | Validation f1_micro 0.565

Training complete in 0m 52s
Best val f1_micro: 0.5700 

{0: 1871}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 200 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.07272 | Validation f1_micro 0.611
Epoch 199 | Train Loss: 0.0511 | Validation f1_micro 0.613
Epoch 299 | Train Loss: 0.05038 | Validation f1_micro 0.615
Epoch 399 | Train Loss: 0.05021 | Validation f1_micro 0.615

Training complete in 0m 38s
Best val f1_micro: 0.6250 

{0: 1885}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 200 200 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.01575 | Validation f1_micro 0.549
Epoch 199 | Train Loss: 0.003496 | Validation f1_micro 0.578
Epoch 299 | Train Loss: 0.003375 | Validation f1_micro 0.581
Epoch 399 | Train Loss: 0.003289 | Validation f1_micro 0.585

Training complete in 0m 58s
Best val f1_micro: 0.5880 

{0: 2047}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 200 200 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.0634 | Validation f1_micro 0.63
Epoch 199 | Train Loss: 0.04375 | Validation f1_micro 0.604
Epoch 299 | Train Loss: 0.04299 | Validation f1_micro 0.607
Epoch 399 | Train Loss: 0.04275 | Validation f1_micro 0.609

Training complete in 0m 44s
Best val f1_micro: 0.6340 

{0: 1955}


In [None]:
print("node_id:", np.mean([0.6250, 0.6270, 0.6290]))
print("topics + node:",  np.mean([0.6250, 0.6340, 0.6230, 0.6350]))
print("text + node:", np.mean([0.6210, 0.6300, 0.6180, 0.6320]))
print("text + topics + node:", np.mean([0.6260, 0.6320, 0.6250, 0.6340 ]))

In [32]:
d = run_grid_search(G, labels, train_mask, val_mask, device, "results.pth")

----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 200 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.04718 | Validation f1_micro 0.588
Epoch 199 | Train Loss: 0.009601 | Validation f1_micro 0.585
Epoch 299 | Train Loss: 0.009374 | Validation f1_micro 0.582

Training complete in 0m 38s
Best val f1_micro: 0.5940 

{0: 11675}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_empty'] 200 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.07598 | Validation f1_micro 0.6
Epoch 199 | Train Loss: 0.05263 | Validation f1_micro 0.62
Epoch 299 | Train Loss: 0.05195 | Validation f1_micro 0.621

Training complete in 0m 22s
Best val f1_micro: 0.6260 

{0: 1557}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 200 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.0318 | Validation f1_micro 0.514
Epoch 199 | Train Loss: 0.008936 | Validation f1_micro 0.555
Epoch 299 | Train Loss: 0.008693 | Validation f1_micro 0.547

Training complete in 0m 34s
Best val f1_micro: 0.5590 

{0: 1665}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics'] 200 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.0737 | Validation f1_micro 0.603
Epoch 199 | Train Loss: 0.05215 | Validation f1_micro 0.61
Epoch 299 | Train Loss: 0.05152 | Validation f1_micro 0.611

Training complete in 0m 23s
Best val f1_micro: 0.6230 

{0: 1617}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 200 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.03539 | Validation f1_micro 0.595
Epoch 199 | Train Loss: 0.006095 | Validation f1_micro 0.59
Epoch 299 | Train Loss: 0.006004 | Validation f1_micro 0.588

Training complete in 0m 38s
Best val f1_micro: 0.6000 

{0: 1901}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_text_embed'] 200 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.0737 | Validation f1_micro 0.606
Epoch 199 | Train Loss: 0.05056 | Validation f1_micro 0.608
Epoch 299 | Train Loss: 0.05005 | Validation f1_micro 0.606

Training complete in 0m 27s
Best val f1_micro: 0.6200 

{0: 1823}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 200 100 2 mean


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.02387 | Validation f1_micro 0.54
Epoch 199 | Train Loss: 0.006177 | Validation f1_micro 0.569
Epoch 299 | Train Loss: 0.006039 | Validation f1_micro 0.567

Training complete in 0m 39s
Best val f1_micro: 0.5740 

{0: 1957}
----------------------------------------------------------------------------------------------------
features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
 ['_topics', '_text_embed'] 200 100 2 gcn


  'precision', 'predicted', average, warn_for)


Epoch 99 | Train Loss: 0.07333 | Validation f1_micro 0.61
Epoch 199 | Train Loss: 0.05063 | Validation f1_micro 0.613
Epoch 299 | Train Loss: 0.05005 | Validation f1_micro 0.611

Training complete in 0m 29s
Best val f1_micro: 0.6270 

{0: 1891}


[{'train': defaultdict(list,
              {'loss_hist': [1.4740639925003052,
                0.4492360055446625,
                0.28481194376945496,
                0.26651832461357117,
                0.27330002188682556,
                0.2582458555698395,
                0.24719974398612976,
                0.23596566915512085,
                0.21538092195987701,
                0.19785867631435394,
                0.18656332790851593,
                0.17797784507274628,
                0.17096729576587677,
                0.16886968910694122,
                0.16582553088665009,
                0.1608845740556717,
                0.1562299281358719,
                0.15216860175132751,
                0.1494113802909851,
                0.14838650822639465,
                0.1464831680059433,
                0.14446032047271729,
                0.1418657749891281,
                0.13913321495056152,
                0.1366763412952423,
                0.13436786830425262,
     

In [25]:
%debug

> [0;32m/scratch/mz2476/miniconda3/envs/graph/lib/python3.7/site-packages/torch/autograd/__init__.py[0m(93)[0;36mbackward[0;34m()[0m
[0;32m     91 [0;31m    Variable._execution_engine.run_backward(
[0m[0;32m     92 [0;31m        [0mtensors[0m[0;34m,[0m [0mgrad_tensors[0m[0;34m,[0m [0mretain_graph[0m[0;34m,[0m [0mcreate_graph[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 93 [0;31m        allow_unreachable=True)  # allow_unreachable flag
[0m[0;32m     94 [0;31m[0;34m[0m[0m
[0m[0;32m     95 [0;31m[0;34m[0m[0m
[0m
ipdb> u
> [0;32m/scratch/mz2476/miniconda3/envs/graph/lib/python3.7/site-packages/torch/tensor.py[0m(118)[0;36mbackward[0;34m()[0m
[0;32m    116 [0;31m                [0mproducts[0m[0;34m.[0m [0mDefaults[0m [0mto[0m[0;31m [0m[0;31m`[0m[0;31m`[0m[0;32mFalse[0m[0;31m`[0m[0;31m`[0m[0;34m.[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    117 [0;31m        """
[0m[0;32m--> 118 [0;31m        [0mtorch[0m[0;3

In [None]:
# features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
#  ['_empty'] 100 150 2 mean
# Epoch 99 | Train Loss: 0.05154 | Validation f1_micro 0.601
# Epoch 199 | Train Loss: 0.008532 | Validation f1_micro 0.598
# Epoch 299 | Train Loss: 0.008263 | Validation f1_micro 0.594
# Epoch 399 | Train Loss: 0.008067 | Validation f1_micro 0.592
# Training complete in 0m 43s
# Best val f1_micro: 0.6050 

# features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
#  ['_empty'] 100 150 1 gcn
# Epoch 99 | Train Loss: 0.06535 | Validation f1_micro 0.626
# Epoch 199 | Train Loss: 0.03704 | Validation f1_micro 0.624
# Epoch 299 | Train Loss: 0.03665 | Validation f1_micro 0.625
# Epoch 399 | Train Loss: 0.03643 | Validation f1_micro 0.624

# Training complete in 0m 24s
# Best val f1_micro: 0.6410 


# features_names, embedding_dim, n_hidden, n_layers, aggregator_type:
#  ['_empty'] 300 150 1 gcn
# Epoch 99 | Train Loss: 0.0509 | Validation f1_micro 0.643
# Epoch 199 | Train Loss: 0.02739 | Validation f1_micro 0.606
# Epoch 299 | Train Loss: 0.02689 | Validation f1_micro 0.603
# Epoch 399 | Train Loss: 0.02658 | Validation f1_micro 0.604

# Training complete in 0m 31s
# Best val f1_micro: 0.6430 

In [27]:
if not os.path.exists(PATH_TO_MODELS + "test"):
    os.mkdir(PATH_TO_MODELS + "test")

In [28]:
d = train_GraphSAGE(model, criterion, optimizer, exp_lr_scheduler, 
                    device, "test", asdict(args), args.n_epochs,
                    G, train_mask, val_mask)

Epoch 0 | Train Loss: 0.7314 | Validation f1_micro 0.097


  'precision', 'predicted', average, warn_for)


Epoch 10 | Train Loss: 0.1809 | Validation f1_micro 0.162
Epoch 20 | Train Loss: 0.1503 | Validation f1_micro 0.236
Epoch 30 | Train Loss: 0.1304 | Validation f1_micro 0.329
Epoch 40 | Train Loss: 0.1144 | Validation f1_micro 0.435
Epoch 50 | Train Loss: 0.1014 | Validation f1_micro 0.505
Epoch 60 | Train Loss: 0.0922 | Validation f1_micro 0.546
Epoch 70 | Train Loss: 0.08526 | Validation f1_micro 0.572
Epoch 80 | Train Loss: 0.08002 | Validation f1_micro 0.592
Epoch 90 | Train Loss: 0.07542 | Validation f1_micro 0.603


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 22.38 GiB total capacity; 21.49 GiB already allocated; 8.06 MiB free; 324.69 MiB cached)

In [34]:
# import subprocess

# def get_gpu_memory_map():
#     """Get the current gpu usage.

#     Returns
#     -------
#     usage: dict
#         Keys are device ids as integers.
#         Values are memory usage as integers in MB.
#     """
#     result = subprocess.check_output(
#         [
#             'nvidia-smi', '--query-gpu=memory.used',
#             '--format=csv,nounits,noheader'
#         ], encoding='utf-8')
#     # Convert lines into a dictionary
#     gpu_memory = [int(x) for x in result.strip().split('\n')]
#     gpu_memory_map = dict(zip(range(len(gpu_memory)), gpu_memory))
#     return gpu_memory_map

In [35]:
# def pretty_size(size):
# 	"""Pretty prints a torch.Size object"""
# 	assert(isinstance(size, torch.Size))
# 	return " × ".join(map(str, size))

# def dump_tensors(gpu_only=True):
# 	"""Prints a list of the Tensors being tracked by the garbage collector."""
# 	import gc
# 	total_size = 0
# 	for obj in gc.get_objects():
# 		try:
# 			if torch.is_tensor(obj):
# 				if not gpu_only or obj.is_cuda:
# 					print("%s:%s%s %s" % (type(obj).__name__, 
# 										  " GPU" if obj.is_cuda else "",
# 										  " pinned" if obj.is_pinned else "",
# 										  pretty_size(obj.size())))
# 					total_size += obj.numel()
# 			elif hasattr(obj, "data") and torch.is_tensor(obj.data):
# 				if not gpu_only or obj.is_cuda:
# 					print("%s → %s:%s%s%s%s %s" % (type(obj).__name__, 
# 												   type(obj.data).__name__, 
# 												   " GPU" if obj.is_cuda else "",
# 												   " pinned" if obj.data.is_pinned else "",
# 												   " grad" if obj.requires_grad else "", 
# 												   " volatile" if obj.volatile else "",
# 												   pretty_size(obj.data.size())))
# 					total_size += obj.data.numel()
# 		except Exception as e:
# 			pass        
# 	print("Total size:", total_size)

In [None]:
# torch.save({
#     'state_dict': model.state_dict(),
#     'options': options,
#         }, f'{PATH_TO_MODELS}/node_id.pt')
# print("Model saved.")

In [102]:
# load_pretrained = True

# if load_pretrained:
#     if device == 'cuda':
#         model_pt = torch.load(f'{PATH_TO_MODELS}/node_id_topics.pt')
#     else:
#         model_pt = torch.load(f'{PATH_TO_MODELS}/node_id_topics.pt', map_location=torch.device('cpu'))
#     options = model_pt['options']
    
#     model = GraphSAGE(**options)
#     model.load_state_dict(model_pt['state_dict'])
#     model.to(device)

# y_pred = (torch.exp(model(G)) > threshold).float()
# get_metrics_dict(labels[val_mask].cpu(), y_pred[val_mask].cpu())
    