## Model

In [None]:
"""GCN using DGL nn package

References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
"""
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F
from dgl.nn.pytorch import GraphConv
import dgl.function as fn
from dgl.nn import GATConv
from dgl.nn.pytorch.conv import SAGEConv
import math



class GCN(nn.Module):
    def __init__(self,
                 g,
                 in_feats,
                 n_hidden,
                 n_classes,
                 activation,
                 dropout=0.5):
        super(GCN, self).__init__()

        self.g = g

        self.gcn_layer1 = GraphConv(in_feats, n_hidden, activation=activation)

        self.gcn_layer2 = GraphConv(n_hidden, n_classes)

        self.dropout = nn.Dropout(p=dropout)

    def forward(self, features):
        h = features

        h = self.gcn_layer1(self.g, h)

        h = self.dropout(h)

        h = self.gcn_layer2(self.g, h)

        return h
    
    def freeze_features(self, freeze):
        self.emb.weight.requires_grad = not freeze

    def freeze_graph(self, freeze):
        self.gcn_layer1.weight.requires_grad = not freeze
        self.gcn_layer2.weight.requires_grad = not freeze


class GAT(nn.Module):
    def __init__(self,
                 g,
                 in_feats,
                 n_hidden,
                 n_classes,
                 activation,
                 dropout,
                 n_layers,
                 heads,
                 attn_drop,
                 negative_slope,
                 residual):
        super(GAT, self).__init__()
        self.g = g
        self.num_layers = n_layers
        self.gat_layers = nn.ModuleList()
        self.activation = activation
        # input projection (no residual)
        self.gat_layers.append(GATConv(
            in_feats, n_hidden, heads[0],
            dropout, attn_drop, negative_slope, False, self.activation))
        # hidden layers
        for l in range(1, n_layers):
            # due to multi-head, the in_dim = num_hidden * num_heads
            self.gat_layers.append(GATConv(
                n_hidden * heads[l - 1], n_hidden, heads[l],
                dropout, attn_drop, negative_slope, residual, self.activation))
        # output projection
        self.gat_layers.append(GATConv(
            n_hidden * heads[-2], n_classes, heads[-1],
            dropout, attn_drop, negative_slope, residual, None))

    def forward(self, inputs):
        h = self.encode(self.g, inputs)
        # output projection
        logits = self.gat_layers[-1](self.g, h).mean(1)
        return logits
    
    def encode(self, g, inputs):
        h = inputs
        for l in range(self.num_layers):
            h = self.gat_layers[l](g, h).flatten(1)
        return h



class GraphSAGE(nn.Module):
    def __init__(self,
                 g,
                 in_feats,
                 n_hidden,
                 n_classes,
                 activation,
                 dropout,
                 n_layers,
                 aggregator_type):
        super(GraphSAGE, self).__init__()
        self.layers = nn.ModuleList()
        self.g = g

        # input layer
        self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
        # output layer
        self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None)) # activation None

    def forward(self, features):
        h = self.encode(self.g, features)
        return self.layers[-1](self.g, h)
    
    def encode(self, g, features):
        h = features
        for layer in self.layers[:-1]:
            h = layer(g, h)
        return h
    
    
"""
Graph InfoClust in DGL
Implementation is based on https://github.com/dmlc/dgl/tree/master/examples/pytorch/dgi
"""

class Encoder(nn.Module):
    def __init__(self, g, in_feats, n_hidden, n_layers, activation, dropout):
        super(Encoder, self).__init__()
        self.g = g
        self.conv = GCN(g, in_feats, n_hidden, n_hidden, activation, dropout)

    def forward(self, features, corrupt=False):
        if corrupt:
            perm = torch.randperm(self.g.number_of_nodes())
            features = features[perm]
        features = self.conv(features)
        return features


class Discriminator(nn.Module):
    def __init__(self, n_hidden):
        super(Discriminator, self).__init__()
        self.weight = nn.Parameter(torch.Tensor(n_hidden, n_hidden))
        self.reset_parameters()

    def uniform(self, size, tensor):
        bound = 1.0 / math.sqrt(size)
        if tensor is not None:
            tensor.data.uniform_(-bound, bound)

    def reset_parameters(self):
        size = self.weight.size(0)
        self.uniform(size, self.weight)

    def forward(self, features, summary):
        #features @ self.weight @ summary.t()
        return torch.matmul(features, torch.matmul(self.weight, summary))
    
class DiscriminatorK(nn.Module):
    def __init__(self, n_hidden):
        super(DiscriminatorK, self).__init__()

    def forward(self, features, summary):
        
        n, h = features.size()
        
        ####features =  features / features.norm(dim=1)[:, None]
        #features = torch.sum(features*summary, dim=1)
        
        #features = features @ self.weight @ summary.t()
        return torch.bmm(features.view(n, 1, h), summary.view(n, h, 1)) #torch.sum(features*summary, dim=1) 


    
class GIC(nn.Module):
    def __init__(self, g, in_feats, n_hidden, n_classes, activation, dropout, n_layers, K, beta, alpha):
        super(GIC, self).__init__()
        self.n_hidden = n_hidden
        self.g=g
        self.encoder = Encoder(g, in_feats, n_hidden, n_layers, activation, dropout)
        self.discriminator = Discriminator(n_hidden)
        self.discriminator2 = Discriminator(n_hidden)
        self.discriminatorK = DiscriminatorK(n_hidden)
        self.loss = nn.BCEWithLogitsLoss()
        self.K = K
        self.beta = beta
        self.cluster = Clusterator(n_hidden,K)
        self.alpha = alpha
        self.fc = nn.Linear(n_hidden, n_classes)
        

    def calc_loss(self, features, mask):
        positive = self.encoder(features, corrupt=False)[mask]
        negative = self.encoder(features, corrupt=True)[mask]
        graph_summary = torch.sigmoid(positive.mean(dim=0))
        
        mu, r = self.cluster(positive, self.beta)
        
        
        cluster_summary = torch.sigmoid(r @ mu)
        
        pos_graph = self.discriminator(positive, graph_summary)
        neg_graph = self.discriminator(negative, graph_summary)
        

        l1 = self.loss(pos_graph, torch.ones_like(pos_graph)) 
        l2 = self.loss(neg_graph, torch.zeros_like(neg_graph)) 

        l = self.alpha*(l1+l2)
        
        
        pos_cluster = self.discriminatorK(positive, cluster_summary)
        neg_cluster = self.discriminatorK(negative, cluster_summary)
        
        
        l += (1-self.alpha)*(self.loss(pos_cluster, torch.ones_like(pos_cluster)) + self.loss(neg_cluster, torch.zeros_like(neg_cluster))) 
        return l
    
    def forward(self, features):
        return self.fc(self.encoder(features, False))

    def encode(self, g, inputs):
        return self.encoder(inputs, False)
    

def cluster(data, k, temp, num_iter, init, cluster_temp):
    '''
    pytorch (differentiable) implementation of soft k-means clustering. 
    Modified from https://github.com/bwilder0/clusternet
    '''
    cuda0 = torch.cuda.is_available()#False
    
    
    
    if cuda0:
        mu = init.cuda()
        data = data.cuda()
        cluster_temp = cluster_temp.cuda()
    else:
        mu = init
    n = data.shape[0]
    d = data.shape[1]

    data = data / (data.norm(dim=1) + 1e-8)[:, None]
    
    for t in range(num_iter):
        #get distances between all data points and cluster centers
        
        mu = mu / mu.norm(dim=1)[:, None]
        dist = torch.mm(data, mu.transpose(0,1))
        
        
        #cluster responsibilities via softmax
        r = F.softmax(cluster_temp*dist, dim=1)
        #total responsibility of each cluster
        cluster_r = r.sum(dim=0)
        #mean of points in each cluster weighted by responsibility
        cluster_mean = r.t() @ data
        #update cluster means
        new_mu = torch.diag(1/cluster_r) @ cluster_mean
        mu = new_mu
        
    
    
    r = F.softmax(cluster_temp*dist, dim=1)
    
    
    return mu, r

class Clusterator(nn.Module):
    '''
    The ClusterNet architecture. The first step is a 2-layer GCN to generate embeddings.
    The output is the cluster means mu and soft assignments r, along with the 
    embeddings and the the node similarities (just output for debugging purposes).
    
    The forward pass inputs are x, a feature matrix for the nodes, and adj, a sparse
    adjacency matrix. The optional parameter num_iter determines how many steps to 
    run the k-means updates for.
    Modified from https://github.com/bwilder0/clusternet
    '''
    def __init__(self, nout, K):
        super(Clusterator, self).__init__()

        
        self.sigmoid = nn.Sigmoid()
        self.K = K
        self.nout = nout
        
        self.init =  torch.rand(self.K, nout)
        
    def forward(self, embeds, cluster_temp, num_iter=10):
        
        mu_init, _ = cluster(embeds, self.K, 1, num_iter, cluster_temp = torch.tensor(cluster_temp), init = self.init)
        #self.init = mu_init.clone().detach()
        mu, r = cluster(embeds, self.K, 1, 1, cluster_temp = torch.tensor(cluster_temp), init = mu_init.clone().detach())
        
        return mu, r


## Functions

In [None]:
import time

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [None]:
def get_masks(n,
              main_ids,
              main_labels,
              test_ratio,
              val_ratio,
              seed=1):
    """
    Randomly splits data into train/val/test using random seed
    returns masks instead of the data itself  
    """
    train_mask = np.zeros(n)
    val_mask = np.zeros(n)
    test_mask = np.zeros(n)

    x_dev, x_test, y_dev, y_test = train_test_split(main_ids,
                                                    main_labels,
                                                    stratify=main_labels,
                                                    test_size=test_ratio,
                                                    random_state=seed)

    x_train, x_val, y_train, y_val = train_test_split(x_dev,
                                                      y_dev,
                                                      stratify=y_dev,
                                                      test_size=val_ratio,
                                                      random_state=seed)

    train_mask[x_train] = 1
    val_mask[x_val] = 1
    test_mask[x_test] = 1

    return train_mask, val_mask, test_mask


In [None]:
def evaluate(model, features, labels, mask):
    """
    Evaluate model quality (F1-score)
    """
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask].detach().cpu().numpy()
        _, predicted = torch.max(logits, dim=1)
        predicted = predicted.detach().cpu().numpy()
        f1 = f1_score(labels, predicted, average='micro')
        return f1

In [None]:
import numpy as np
import dgl
from dgl import DGLGraph

def train_gcn_lp(graph,
              features,
              labels,
              model,
              args,
              seed=1,
              n_hidden=64,
              n_epochs=200,
              lr=1e-2,
              weight_decay=5e-4,
              dropout=0.5,
              verbose=False,
              cuda=False, modelType="GAT"):

    features = torch.FloatTensor(features)
    labels = torch.LongTensor(labels)
    
    mask = []
    for i in range(len(labels)):
        # nodes with labels
        if graph.nodes[i]['is_main']:
            mask.append(1)
        else:
            mask.append(0)
            
    mask = torch.BoolTensor(mask)
        

    if cuda:
        torch.cuda.set_device("cuda:0")
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = DGLGraph(graph)
    g = dgl.transform.add_self_loop(g)
    n_edges = g.number_of_edges()

    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0

    if cuda:
        norm = norm.cuda()

    g.ndata['norm'] = norm.unsqueeze(1)

    in_feats = features.shape[1]

    # + 1 for unknown class
    n_classes = len(np.unique(labels))
    
    ##########
    ##########  HERE WE USE MODEL
    ##########
    model = model(g,
                in_feats=in_feats,
                n_hidden=n_hidden,
                n_classes=n_classes,
                activation=F.relu,
                dropout=dropout, **args)
    if cuda:
        model.cuda()

    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           factor=0.9,
                                                           patience=20,
                                                           min_lr=1e-10)

    best_f1 = -100
    # initialize graph
    dur = []
    for epoch in range(n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        logits = model(features)
        loss = loss_fcn(logits[mask], labels[mask])
        if modelType == "GIC":
            loss += model.calc_loss(features, mask)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        f1 = evaluate(model, features, labels, mask)
        scheduler.step(1 - f1)
        if f1 > best_f1:
            best_f1 = f1
            torch.save(model.state_dict(), 'best_model.pt')

        if verbose:
            print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | F1 {:.4f} | "
                  "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                                f1, n_edges / np.mean(dur) / 1000))

    model.load_state_dict(torch.load('best_model.pt'))
    
    embeddings = model.encode(model.g, features).detach().cpu().numpy()

    return embeddings

## LP Model

In [None]:
class GCN_Model_LP:
    def __init__(self, graph, features, model, args, labels=None, dim=80, modelName="GAT"):
        self.graph = graph
        self.features = features
        self.dim = dim
        self.labels = labels
        self.embeddings = None
        self.model = model
        self.args = args
        self.modelName = modelName

    def learn_embeddings(self):
        embeddings = train_gcn_lp(self.graph, self.features, self.labels, self.model, self.args, self.modelName)
        self.embeddings = embeddings

    def get_embeddings_for_ids(self, ids):
        result = []
        for i, embedding in enumerate(self.embeddings):
            if i in ids:
                result.append(embedding)

        return result

## Experiments

In [None]:
from datasets import Cora, CiteseerM10, Dblp

datasets = [
   ('Cora', Cora),
   ('CiteseerM10', CiteseerM10),
   ('DBLP', Dblp)
]


from text_transformers import SBert, LDA, W2V, Sent2Vec, Doc2Vec, BOW, TFIDF

gatargs = {
              "n_layers": 1,
              "heads": [8, 1],
              "attn_drop": 0.6,
              "negative_slope": 0.2,  
              "residual": False
          }

gsargs = {"n_layers": 2, "aggregator_type": "mean"}
gicargs = {
                "n_layers": 1,
                "K": 128,
                "beta": 100,
                "alpha": 0.5
            }

tasks = [
    ('GAT(W2V)', lambda ds: LpTask(ds, test_ratios, W2V, GCN_Model_LP, GAT, gatargs, d=100, labels=True)),
    ('GraphSAGE(W2V)', lambda ds: LpTask(ds, test_ratios, W2V, GCN_Model_LP, GraphSAGE, gsargs, d=100, labels=True)),
    ('GIC(W2V)', lambda ds: LpTask(ds, test_ratios, TFIDF, GCN_Model_LP, GIC, gicargs, d=100, labels=True, modelName="GIC")),
    ('GAT(S-BERT)', lambda ds: LpTask(ds, test_ratios, SBert, GCN_Model_LP, GAT, gatargs, d=100, labels=True)),
    ('GraphSAGE(S-BERT)', lambda ds: LpTask(ds, test_ratios, SBert, GCN_Model_LP, GraphSAGE, gsargs, d=100, labels=True)),
    ('GIC(S-BERT)', lambda ds: LpTask(ds, test_ratios, TFIDF, GCN_Model_LP, GIC, gicargs, d=100, labels=True, modelName="GIC")),
    ('GAT(TFIDF)', lambda ds: LpTask(ds, test_ratios, TFIDF, GCN_Model_LP, GAT, gatargs, d=100, labels=True)),
    ('GraphSAGE(TFIDF)', lambda ds: LpTask(ds, test_ratios, TFIDF, GCN_Model_LP, GraphSAGE, gsargs, d=100, labels=True)),
    ('GIC(TFIDF)', lambda ds: LpTask(ds, test_ratios, TFIDF, GCN_Model_LP, GIC, gicargs, d=100, labels=True, modelName="GIC")),
    ('GAT(Sent2Vec)', lambda ds: LpTask(ds, test_ratios, Sent2Vec, GCN_Model_LP, GAT, gicargs, d=100, labels=True)),
    ('GraphSAGE(Sent2Vec)', lambda ds: LpTask(ds, test_ratios, Sent2Vec, GCN_Model_LP, GraphSAGE, gicargs, d=100, labels=True)),
    ('GIC(Sent2Vec)',lambda ds: LpTask(ds, test_ratios, Sent2Vec, GCN_Model_LP, GIC, gicargs, d=100, labels=True, modelName="GIC")),
]

In [None]:
test_ratios = [0.5, 0.7, 0.9, 0.95]

In [None]:
import warnings
warnings.simplefilter("ignore")

In [None]:
from tqdm import tqdm
from task import LpTask

res = {}

for ds_name, ds_constr in tqdm(datasets, desc='datasets'):
    ds = ds_constr()
    for task_name, task_constr in tqdm(tasks, desc='Tasks'):
        task = task_constr(ds)
        task_res = task.evaluate()
        for test_ratio in task_res:
            scores = task_res[test_ratio]
            res[f'{1 - test_ratio:.2f} - {ds_name} - {task_name}'] = scores

        print(res)

In [None]:
import pandas as pd

def beautify_results(res):
    res2 = []
    for name, scores in res.items():
        res2.append({"name": name, "mn": np.mean(scores), "error": np.std(scores)})
    df = pd.DataFrame(res2)
    df["test_ratio"] = df.name.str.split(" - ").str[0]
    df["dataset"] = df.name.str.split(" - ").str[1]
    df["graph_model"] = df.name.str.split(" - ").str[2].str.split().str[0]
    df["text_model"] = df.name.str.split(" - ").str[2].str.split().str[1]
    return df

In [None]:
df = beautify_results(res)

In [None]:
df

# Node classification

In [None]:
import numpy as np
import dgl
from dgl import DGLGraph
import time

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

def get_masks(n,
              main_ids,
              main_labels,
              test_ratio,
              val_ratio,
              seed=1):
    """
    Randomly splits data into train/val/test using random seed
    returns masks instead of the data itself  
    """
    train_mask = np.zeros(n)
    val_mask = np.zeros(n)
    test_mask = np.zeros(n)

    x_dev, x_test, y_dev, y_test = train_test_split(main_ids,
                                                    main_labels,
                                                    stratify=main_labels,
                                                    test_size=test_ratio,
                                                    random_state=seed)

    x_train, x_val, y_train, y_val = train_test_split(x_dev,
                                                      y_dev,
                                                      stratify=y_dev,
                                                      test_size=val_ratio,
                                                      random_state=seed)

    train_mask[x_train] = 1
    val_mask[x_val] = 1
    test_mask[x_test] = 1

    return train_mask, val_mask, test_mask


def evaluate(model, features, labels, mask):
    """
    Evaluate model quality (F1-score)
    """
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask].detach().cpu().numpy()
        _, predicted = torch.max(logits, dim=1)
        predicted = predicted.detach().cpu().numpy()
        f1 = f1_score(labels, predicted, average='micro')
        return f1

def train_gcn(dataset,
              test_ratio=0.5,
              val_ratio=0.2,
              seed=1,
              n_hidden=64,
              n_epochs=200,
              lr=1e-2,
              weight_decay=5e-4,
              dropout=0.5,
              verbose=True,
              cuda=False, MODEL=GCN, gat_args={}, modelType = "GAT"):
    data = dataset.get_data()

    features = torch.FloatTensor(data['features'])
    labels = torch.LongTensor(data['labels'])

    n = len(data['ids'])
    train_mask, val_mask, test_mask = get_masks(n,
                                                data['main_ids'],
                                                data['main_labels'],
                                                test_ratio=test_ratio,
                                                val_ratio=val_ratio,
                                                seed=seed)

    train_mask = torch.BoolTensor(train_mask)
    val_mask = torch.BoolTensor(val_mask)
    test_mask = torch.BoolTensor(test_mask)

    if cuda:
        torch.cuda.set_device("cuda:0")
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = DGLGraph(data['graph'])
    g = dgl.transform.add_self_loop(g)
    n_edges = g.number_of_edges()

    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0

    if cuda:
        norm = norm.cuda()

    g.ndata['norm'] = norm.unsqueeze(1)

    in_feats = features.shape[1]

    # + 1 for unknown class
    n_classes = data['n_classes'] + 1
    
    ##########
    ##########  HERE WE USE MODEL
    ##########
    model = MODEL(g,
                in_feats=in_feats,
                n_hidden=n_hidden,
                n_classes=n_classes,
                activation=F.relu,
                dropout=dropout, **gat_args)
    if cuda:
        model.cuda()

    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           factor=0.9,
                                                           patience=20,
                                                           min_lr=1e-10)

    best_f1 = -100
    # initialize graph
    dur = []
    for epoch in range(n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        if modelType == "GIC":
            loss += model.calc_loss(features, train_mask)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        f1 = evaluate(model, features, labels, val_mask)
        scheduler.step(1 - f1)
        if f1 > best_f1:
            best_f1 = f1
            torch.save(model.state_dict(), 'best_model.pt')

        if verbose:
            print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | F1 {:.4f} | "
                  "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                                f1, n_edges / np.mean(dur) / 1000))

    model.load_state_dict(torch.load('best_model.pt'))
    f1 = evaluate(model, features, labels, test_mask)

    if verbose:
        print()
        print("Test F1 {:.2}".format(f1))

    return f1

In [None]:
from datasets import Cora, CiteseerM10, Dblp

datasets = [
   ('Cora', Cora),
   ('CiteseerM10', CiteseerM10),
   ('DBLP', Dblp)
]


from text_transformers import SBert, LDA, W2V, Sent2Vec, Doc2Vec, BOW, TFIDF

text_transformers = [
    ("TFIDF", TFIDF()),
    ("W2V(d=300)", W2V(train=False, d=300)),
    ("S-BERT(d=300)", SBert(train=False, d=300)),
    ("Sent2Vec(d=600)", Sent2Vec(train=False, d=600)),
]

In [None]:
seeds = [1, 2]
test_ratios = [0.5, 0.7, 0.9, 0.95]

In [None]:
models = [
    (GAT, "GAT", {
      "n_layers": 1,
      "heads": [8, 1],
      "attn_drop": 0.6,
      "negative_slope": 0.2,  
      "residual": False
    }),
    (GIC, "GIC", {
        "n_layers": 1,
        "K": 128,
        "beta": 100,
        "alpha": 0.5
    }),
    (GraphSAGE, "GraphSAGE", {"n_layers": 2, "aggregator_type": "mean"})
]

In [None]:
from tqdm import tqdm
res = {}
for ds_name, ds_constr in tqdm(datasets, desc='datasets'):
    ds = ds_constr()
    for text_trans_name, text_transofmer in tqdm(text_transformers, "transformers"):
        ds.transform_features(text_transofmer)
        for model, m_name, args in models:
            for test_ratio in tqdm(test_ratios, desc='test ratio'):
                scores = []
                for seed in seeds:
                    score = train_gcn(ds, test_ratio, seed=seed, verbose=False, MODEL=model, gat_args=args, modelType=m_name)
                    scores.append(score)

                nm = f'{1 - test_ratio:.2f} - {ds_name} - {m_name} {text_trans_name}'
                res[nm] = scores
                print()
                print(nm, scores, np.mean(scores), np.std(scores))

In [None]:
df = beautify_results(res)

In [None]:
df