In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import time
import numpy as np
import torch
from torch.nn import Dropout, ELU
import torch.nn.functional as F
from torch import nn
from dgl.nn.pytorch import GATConv
import itertools 
import dgl
from collections import defaultdict as ddict
from tqdm import tqdm
import torch_geometric.data
import torch_geometric
from torch_geometric.datasets import WebKB, WikipediaNetwork, WikiCS, Actor
import pandas as pd
import qgrid
from collections import Counter 


np.set_printoptions(suppress=True, formatter={'float_kind':'{:0.4f}'.format})

In [None]:
class GATOptimized(nn.Module):
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 out_dim,
                 num_layers=1,
                 heads=8,
                 activation=F.elu,
                 feat_drop=.6,
                 attn_drop=.6,
                 negative_slope=.2,
                 residual=False):
        super(GATOptimized, self).__init__()
        self.num_layers = num_layers
        self.gat_layers = nn.ModuleList()
        self.activation = activation
        # input projection (no residual)
        self.gat_layers.append(GATConv(
            in_dim, hidden_dim, heads,
            feat_drop, attn_drop, negative_slope, False, self.activation))
        # hidden layers
        for l in range(1, num_layers):
            # due to multi-head, the in_dim = num_hidden * num_heads
            self.gat_layers.append(GATConv(
                hidden_dim * heads, hidden_dim, heads,
                feat_drop, attn_drop, negative_slope, residual, self.activation))
        # output projection
        self.gat_layers.append(GATConv(
            hidden_dim * heads, out_dim, 1,
            feat_drop, attn_drop, negative_slope, residual, None))

    def forward(self, graph, inputs):
        h = inputs
        g = graph
        for l in range(self.num_layers):
            h = self.gat_layers[l](g, h).flatten(1)
        # output projection
        logits = self.gat_layers[-1](g, h).mean(1)
        return logits

In [None]:
class MLP(torch.nn.Module):
    def __init__(self, in_dim, out_dim):
        super(MLP, self).__init__()
        self.lin = torch.nn.Linear(in_dim, out_dim)

    def forward(self, x):
        return self.lin(x)

In [5]:
def get_web(data):
    device = torch.device('cuda:0') # if torch.cuda.is_available() else 'cpu')
    
    graph = dgl.graph((data.data.edge_index[0], data.data.edge_index[1])).to(device)
    graph = dgl.remove_self_loop(graph)
    graph = dgl.add_self_loop(graph)
    
    train_idx = np.where(data.data.train_mask[:, 0])[0]
    val_idx = np.where(data.data.val_mask[:, 0])[0]
    test_idx = np.where(data.data.test_mask[:, 0])[0]
    
    node_features = data.data.x.to(device)
    labels = data.data.y.to(device)
    
    classes = labels.unique().cpu().numpy()
    num_classes = classes.shape[0]
    num_nodes, in_dim = node_features.shape
    
    return graph, node_features, num_nodes, in_dim, labels, classes, num_classes, train_idx, val_idx, test_idx

In [6]:
def get_citation(name):
    if name.lower() == 'cora':
        data = dgl.data.CoraGraphDataset(verbose=False)
    elif name.lower() == 'citeseer':
        data = dgl.data.CiteseerGraphDataset(verbose=False)
    elif name.lower() == 'pubmed':
        data = dgl.data.PubmedGraphDataset(verbose=False)
    else:
        raise ValueError('Unknown name: {}'.format(name))
        
    device = torch.device('cuda:0') # if torch.cuda.is_available() else 'cpu')
    graph = data[0].to(device)
    graph = dgl.remove_self_loop(graph)
    graph = dgl.add_self_loop(graph)
    node_features = graph.ndata['feat'].to(device)
    num_nodes, in_dim = node_features.shape
    labels = graph.ndata['label'].to(device)
    classes = labels.unique().cpu().numpy()
    num_classes = classes.shape[0]
    train_idx, val_idx, test_idx = np.where(data[0].ndata['train_mask'])[0], np.where(data[0].ndata['val_mask'])[0], np.where(data[0].ndata['test_mask'])[0]
    return graph, node_features, num_nodes, in_dim, labels, classes, num_classes, train_idx, val_idx, test_idx

In [7]:
def get_input(name):
    if name in ['Texas', 'Cornell', 'Wisconsin']:
        data = WebKB('./pyg', name)
        return get_web(data)
    elif name in ['Chameleon', 'Squirrel']:
        data = WikipediaNetwork('./pyg', name)
        return get_web(data)
    elif name in ['Wikics']:
        data = WikiCS('./pyg', name)
        data.data.test_mask = data.data.test_mask.unsqueeze(-1)
        return get_web(data)
    elif name in ['Actor']:
        data = Actor('./pyg')
        data.data.test_mask = data.data.test_mask.unsqueeze(-1)
        return get_web(data)
    elif name in ['cora', 'citeseer', 'pubmed']:
        return get_citation(name)

In [8]:
def argmax(arr, ix):
    best_epoch = -1
    best_value = -1
    for i, el in enumerate(arr):
        if el[ix] > best_value:
            best_value = el[ix]
            best_epoch = i
    return best_value, best_epoch 

In [9]:
def train_gnn(gnn, optimizer, graph, node_features, train_idx, train_labels):
    gnn.train()
    
    logits = gnn(graph, node_features).squeeze()[train_idx]
    loss = F.cross_entropy(logits, train_labels.long())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss

def eval_gnn(gnn, optimizer, graph, node_features, idxs, labels, metrics):
    gnn.eval()
    
    logits = gnn(graph, node_features).squeeze()
    
    losses = []
    accs = []
    for idx in idxs:
        y = labels[idx]
        with torch.no_grad():
            pred = logits[idx]
            losses.append(F.cross_entropy(pred, y.long()).detach().item())
            accs.append((torch.Tensor([(y == pred.max(1)[1]).sum().item()/y.shape[0]])).detach().item())
    
    metrics['loss'].append(losses)
    metrics['acc'].append(accs)

In [78]:
repeats = 1
    
name = 'Texas'
# graph, node_features, num_nodes, in_dim, labels, classes, num_classes, train_idx, val_idx, test_idx = get_input(name)
data = WebKB('./pyg', name)
graph, node_features, num_nodes, in_dim, labels, classes, num_classes, train_idx, val_idx, test_idx = pyg_to_dgl_graph(data)

label_mapping = get_label_mapping(classes, repeat=2)
hidden_dim = 8

test_acc = []
for r in range(repeats):
# gnn = GAT(in_dim, hidden_dim, num_classes).cuda()
    gnn = GATOptimized(in_dim, hidden_dim, num_classes).cuda()

    parameters = [gnn.parameters()]
    optimizer = torch.optim.Adam(itertools.chain(*parameters), lr=0.005, weight_decay=5e-4)

    train_labels = labels[train_idx]

    metrics = ddict(list)
    for epoch in range(500):
        train_gnn(gnn, optimizer, graph, node_features, train_idx, train_labels) 
        eval_gnn(gnn, optimizer, graph, node_features, [train_idx, val_idx, test_idx], labels, metrics)

        if epoch % 200 == 0:
            print(metrics['acc'][-1], metrics['loss'][-1])


    best_acc, best_epoch = argmax(metrics['acc'], ix=1)
    print(f"Best epoch: {best_epoch}, Best accuracy: {metrics['acc'][best_epoch]} repeatitions")
    test_acc.append(metrics['acc'][best_epoch][-1])
    
print(f'{name}: Mean test accuracy {np.mean(test_acc):.3f}+-{np.std(test_acc):.3f} in {repeats}')
plot_interactive([metrics], ['Train', 'Val', 'Test'], title='Accuracy', metric_name='acc')

[0.5517241358757019, 0.5423728823661804, 0.5675675868988037] [1.3296223878860474, 1.3659027814865112, 1.295862078666687]
[0.9080459475517273, 0.5593220591545105, 0.4864864945411682] [0.35996708273887634, 1.595511555671692, 1.7725038528442383]
[0.9080459475517273, 0.5593220591545105, 0.4054054021835327] [0.28869491815567017, 1.680111289024353, 2.025010585784912]
Best epoch: 36, Best accuracy: [0.7471264600753784, 0.6610169410705566, 0.6486486196517944] repeatitions
Texas: Mean test accuracy 0.649+-0.000 in 1


In [10]:
def combine_inputs(features, combined_idx):
    return torch.hstack([features[list(node_idx)] for node_idx in zip(*combined_idx)])

def combine_labels(labels, label_mapping, combined_idx):
    return torch.Tensor([label_mapping[tuple(labels[i].item() for i in idx)] for idx in combined_idx])

def get_label_mapping(classes, repeat=2):
    combined_classes = list(itertools.product(classes, repeat=repeat))
    return {c: i for i, c in enumerate(combined_classes)}

def compute_accuracy(labels, logits, idx):
    pred = logits[idx]
    y = labels[idx]
    return torch.Tensor([(y == pred.max(1)[1]).sum().item()/y.shape[0]]).item()

def get_neighborhood_classes(graph, train_idx):
    us, vs = graph.edges()
    us = us.cpu().numpy()
    vs = vs.cpu().numpy()

    combined_idx = []
    for ix in range(us.shape[0]):
        if us[ix] in train_idx and vs[ix] in train_idx:
            combined_idx.append((us[ix], vs[ix]))
    return combined_idx

def get_edge_node_probs(combined_logits_all, graph, num_classes):
    edge_label_probs = F.softmax(combined_logits_all, dim=1)
    
    nodes = graph.nodes().cpu().numpy()
    edges = graph.edges()
    node_combined_label_probs = []
    for idx in nodes:
        node_edge_probs = edge_label_probs[edges[1][edges[0] == idx]]
        node_edge_probs_agg = node_edge_probs.mean(0)
        pair_to_ind_probs = node_edge_probs_agg.reshape((num_classes, num_classes)).sum(1)
        node_combined_label_probs.append(F.softmax(pair_to_ind_probs, dim=0))

    return torch.vstack(node_combined_label_probs)

In [42]:
def train_model(gnn, mlp_unary, mlp_binary, optimizer, graph, node_features, train_idx, labels, combined_idx, combined_labels, use_combined_loss, use_edge_loss, num_classes):
    gnn.train(), mlp_unary.train(), mlp_binary.train()
    
    h = gnn(graph, node_features).squeeze()
    
    unary_logits = mlp_unary(h)[train_idx]
    unary_loss = F.cross_entropy(unary_logits, labels[train_idx].long())
    
    combined_loss = 0
    if use_combined_loss:
        combined_inputs = combine_inputs(h, combined_idx)
        combined_logits = mlp_binary(combined_inputs)
        combined_loss = F.cross_entropy(combined_logits, combined_labels.long())    
    
    edge_loss = 0
    if use_edge_loss:
        combined_inputs = combine_inputs(h, combined_idx)
        combined_logits = mlp_binary(combined_inputs)
        edge_logits = get_edge_node_probs(combined_logits, graph, num_classes) 
        edge_loss = F.cross_entropy(edge_logits[train_idx], labels[train_idx].long())
    
    loss = unary_loss + combined_loss + edge_loss 
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return unary_loss.detach().item(), combined_loss

In [43]:
def evaluate_model(gnn, mlp_unary, mlp_binary, optimizer, graph, node_features, labels, idxs, metrics, combined_idx, num_classes):
    gnn.eval(), mlp_unary.eval(), mlp_binary.eval()
    
    h = gnn(graph, node_features).squeeze()
    logits = mlp_unary(h)
    
    combined_inputs = combine_inputs(h, combined_idx)
    combined_logits = mlp_binary(combined_inputs)
    edge_logits = get_edge_node_probs(combined_logits, graph, num_classes) 
    
    losses = []
    accs = []
    accs_edge = []
    accs_total = []
    for idx in idxs:
        y = labels[idx]
        with torch.no_grad():
            pred = logits[idx]
            pred_edge = edge_logits[idx]
            pred_total = pred + pred_edge
            losses.append(F.cross_entropy(pred, y.long()).detach().item())
            accs.append((torch.Tensor([(y == pred.max(1)[1]).sum().item()/y.shape[0]])).detach().item())
            accs_edge.append((torch.Tensor([(y == pred_edge.max(1)[1]).sum().item()/y.shape[0]])).detach().item())
            accs_total.append((torch.Tensor([(y == pred_edge.max(1)[1]).sum().item()/y.shape[0]])).detach().item())
    
    metrics['loss'].append(losses)
    metrics['acc'].append(accs)
    metrics['acc_edge'].append(accs_edge)
    metrics['acc_total'].append(accs_total)

In [17]:
def plot_interactive(metrics_list, legend, title, logx=False, logy=False, metric_name='loss', start_from=0):
    import plotly.graph_objects as go
    fig = go.Figure()
    dash_opt = ['dash', 'dot']
    
    for mi, metrics in enumerate(metrics_list):
        metric_results = metrics[metric_name]
        xs = [list(range(len(metric_results)))] * len(metric_results[0])
        ys = list(zip(*metric_results))


        for i in range(len(ys)):
            fig.add_trace(go.Scatter(x=xs[i][start_from:], y=ys[i][start_from:],
                                     mode='lines+markers',
                                     name=legend[i + mi*3], line={'dash': dash_opt[mi]}))

    fig.update_layout(
        title=title,
        title_x=0.5,
        xaxis_title='Epoch',
        yaxis_title='',
        font=dict(
            size=40,
        ),
        height=600,
    )

    if logx:
        fig.update_layout(xaxis_type="log")
    if logy:
        fig.update_layout(yaxis_type="log")

    fig.show()

In [45]:
def format_lst(lst):
    return ','.join([f"{el:.3f}" for el in lst])

def run(num_epochs=10, hidden_dim=8, out_dim=64, label_repeat=2, use_combined_loss=False, use_edge_loss=True, print_epochs=10, num_runs=1, name='cora'):
    
    
    device = torch.device('cuda:0') # if torch.cuda.is_available() else 'cpu')
    
    graph, node_features, num_nodes, in_dim, labels, classes, num_classes, train_idx, val_idx, test_idx = get_input(name)
    label_mapping = get_label_mapping(classes, repeat=label_repeat)
    
    combined_idx = get_neighborhood_classes(graph, train_idx)
    combined_labels = combine_labels(labels, label_mapping, combined_idx).to(device)
#     binary_labels = combine_labels(labels, label_mapping, train_idx, repeat=label_repeat).to(device)

    test_acc = []
    for r in range(num_runs):
        metrics = ddict(list)
        print(f'Round {r}')
        gnn = GATOptimized(in_dim, hidden_dim, out_dim).to(device)
        mlp_unary = MLP(out_dim, num_classes).to(device)
        mlp_binary = MLP(2*out_dim, len(label_mapping)).to(device)

        parameters = [gnn.parameters(), mlp_unary.parameters(), mlp_binary.parameters()]
        optimizer = torch.optim.Adam(itertools.chain(*parameters), lr=0.005, weight_decay=5e-4)
        
        
        for epoch in range(num_epochs):
            epoch_time = time.time()
            unary_loss, binary_loss = train_model(gnn, mlp_unary, mlp_binary, optimizer, graph, node_features, train_idx, labels, 
                                                  combined_idx, combined_labels, use_combined_loss, use_edge_loss, num_classes)
            evaluate_model(gnn, mlp_unary, mlp_binary, optimizer, graph, node_features, labels, [train_idx, val_idx, test_idx], metrics, combined_idx, num_classes)

    #         pbar.set_description(f"Loss: {unary_loss + binary_loss}, Unary loss: {unary_loss}, Binary loss: {binary_loss}")
            if print_epochs and epoch % print_epochs == 0:
#                 print(f"Epoch: {epoch} Loss: {unary_loss + binary_loss}, Unary loss: {unary_loss}, Binary loss: {binary_loss} "
#                           f"Accuracy: {metrics['acc'][-1]} Time: {time.time() - epoch_time:.2f}")
                print('Epoch:', epoch, '---Acc:', format_lst(metrics['acc'][-1]), '---Loss:', format_lst(metrics['loss'][-1]),
                     "Time:", format_lst([time.time() - epoch_time]))


        best_acc, best_epoch = argmax(metrics['acc'], ix=1)
        print(f"Best epoch: {best_epoch}, Best accuracy: {metrics['acc'][best_epoch]}")
        test_acc.append(metrics['acc'][best_epoch][-1])
    
    print(f'{name}: Mean test accuracy {np.mean(test_acc):.3f}+-{np.std(test_acc):.3f} in {num_runs} rounds')
    
    return metrics, best_epoch

In [47]:
name='cora'
names = ['Texas', 'Cornell', 'Wisconsin', 'Chameleon', 'Squirrel', 'Wikics', 'Actor', 'cora', 'citeseer', 'pubmed']
names = ['Texas']
for name in names[:1]:
    print(name)    
    print('With combined loss', end=' ')
    metrics_all, best_epoch = run(num_epochs=1, use_combined_loss=True, use_edge_loss=True, print_epochs=0, label_repeat=2, num_runs=1, name=name) # For big graphs it takes lot of time to compute binary loss (40x more)
    print()

Texas
With combined loss 

DGLError: [08:14:09] /opt/dgl/src/runtime/cuda/cuda_device_api.cc:97: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading: CUDA: device-side assert triggered
Stack trace:
  [bt] (0) /mnt/nfs/home/s.ivanov/envs/default/lib/python3.6/site-packages/dgl/libdgl.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x4f) [0x7f0679c4414f]
  [bt] (1) /mnt/nfs/home/s.ivanov/envs/default/lib/python3.6/site-packages/dgl/libdgl.so(dgl::runtime::CUDADeviceAPI::AllocDataSpace(DLContext, unsigned long, unsigned long, DLDataType)+0xfc) [0x7f067a45fcfc]
  [bt] (2) /mnt/nfs/home/s.ivanov/envs/default/lib/python3.6/site-packages/dgl/libdgl.so(dgl::runtime::NDArray::Empty(std::vector<long, std::allocator<long> >, DLDataType, DLContext)+0x177) [0x7f067a31f287]
  [bt] (3) /mnt/nfs/home/s.ivanov/envs/default/lib/python3.6/site-packages/dgl/libdgl.so(dgl::runtime::NDArray::CopyTo(DLContext const&) const+0xc0) [0x7f067a353ee0]
  [bt] (4) /mnt/nfs/home/s.ivanov/envs/default/lib/python3.6/site-packages/dgl/libdgl.so(dgl::UnitGraph::CopyTo(std::shared_ptr<dgl::BaseHeteroGraph>, DLContext const&)+0x2eb) [0x7f067a43310b]
  [bt] (5) /mnt/nfs/home/s.ivanov/envs/default/lib/python3.6/site-packages/dgl/libdgl.so(dgl::HeteroGraph::CopyTo(std::shared_ptr<dgl::BaseHeteroGraph>, DLContext const&)+0xf5) [0x7f067a365105]
  [bt] (6) /mnt/nfs/home/s.ivanov/envs/default/lib/python3.6/site-packages/dgl/libdgl.so(+0xc5a19b) [0x7f067a37219b]
  [bt] (7) /mnt/nfs/home/s.ivanov/envs/default/lib/python3.6/site-packages/dgl/libdgl.so(DGLFuncCall+0x48) [0x7f067a303328]
  [bt] (8) /home/s.ivanov/sync/miniconda3/envs/default/lib/python3.6/lib-dynload/../../libffi.so.7(+0x69dd) [0x7f070c03a9dd]



In [19]:
names = ['Texas', 'Cornell', 'Wisconsin', 'Chameleon', 'Squirrel', 'Wikics', 'Actor', 'cora', 'citeseer', 'pubmed']
names = ['Texas']
for name in names:
    print('Dataset:', name)

    device = torch.device('cuda:0') # if torch.cuda.is_available() else 'cpu')
    num_epochs=400
    use_combined_loss=True
    print_epochs=0
    label_repeat=2
    num_runs=1
    hidden_dim=8
    out_dim=64

    graph, node_features, num_nodes, in_dim, labels, classes, num_classes, train_idx, val_idx, test_idx = get_input(name)
    label_mapping = get_label_mapping(classes, repeat=label_repeat)

    combined_idx = get_neighborhood_classes(graph, train_idx)
    combined_labels = combine_labels(labels, label_mapping, combined_idx).to(device)
    #     binary_labels = combine_labels(labels, label_mapping, train_idx, repeat=label_repeat).to(device)

    test_acc = []
    for r in range(num_runs):
        metrics = ddict(list)
        print(f'Round {r}')
        gnn = GATOptimized(in_dim, hidden_dim, out_dim).to(device)
        mlp_unary = MLP(out_dim, num_classes).to(device)
        mlp_binary = MLP(2*out_dim, len(label_mapping)).to(device)

        parameters = [gnn.parameters(), mlp_unary.parameters(), mlp_binary.parameters()]
        optimizer = torch.optim.Adam(itertools.chain(*parameters), lr=0.005, weight_decay=5e-4)


        for epoch in range(num_epochs):
            epoch_time = time.time()
            unary_loss, binary_loss = train_model(gnn, mlp_unary, mlp_binary, optimizer, graph, node_features, train_idx, labels, combined_idx, combined_labels, use_combined_loss)
            evaluate_model(gnn, mlp_unary, mlp_binary, optimizer, graph, node_features, labels, [train_idx, val_idx, test_idx], metrics)

    #         pbar.set_description(f"Loss: {unary_loss + binary_loss}, Unary loss: {unary_loss}, Binary loss: {binary_loss}")
            if print_epochs and epoch % print_epochs == 0:
    #                 print(f"Epoch: {epoch} Loss: {unary_loss + binary_loss}, Unary loss: {unary_loss}, Binary loss: {binary_loss} "
    #                           f"Accuracy: {metrics['acc'][-1]} Time: {time.time() - epoch_time:.2f}")
                print('Epoch:', epoch, '---Acc:', format_lst(metrics['acc'][-1]), '---Loss:', format_lst(metrics['loss'][-1]),
                     "Time:", format_lst([time.time() - epoch_time]))


        best_acc, best_epoch = argmax(metrics['acc'], ix=1)
        print(f"Best epoch: {best_epoch}, Best accuracy: {metrics['acc'][best_epoch]}")
        test_acc.append(metrics['acc'][best_epoch][-1])

    print(f'{name}: Mean test accuracy {np.mean(test_acc):.3f}+-{np.std(test_acc):.3f} in {num_runs} rounds')

    print(f'Last iteration accuracy', metrics['acc'][-1])

    gnn.eval(), mlp_unary.eval(), mlp_binary.eval()

    h = gnn(graph, node_features).squeeze()
    logits = mlp_unary(h)

    nodes = graph.nodes().cpu().numpy()
    combined_idx_all = get_neighborhood_classes(graph, idx)
    combined_labels_all = combine_labels(labels, label_mapping, combined_idx_all).to(device)
    combined_inputs_all = combine_inputs(h, combined_idx_all)
    combined_logits_all = mlp_binary(combined_inputs_all)

    node_ind_label_probs = F.softmax(logits, dim=1)
    edge_label_probs = F.softmax(combined_logits_all, dim=1)

    edges = graph.edges()
    node_combined_label_probs = []
    for idx in nodes:
        node_edge_probs = edge_label_probs[edges[1][edges[0] == idx]]
        node_edge_probs_agg = node_edge_probs.mean(0)
        pair_to_ind_probs = node_edge_probs_agg.reshape((num_classes, num_classes)).sum(1)
        node_combined_label_probs.append(F.softmax(pair_to_ind_probs, dim=0))

    node_combined_label_probs = torch.vstack(node_combined_label_probs)
    node_total_label_probs = node_ind_label_probs + node_combined_label_probs

    df = pd.DataFrame()

    df['true'] = labels.cpu().numpy()
    df['ind'] = node_ind_label_probs.max(1)[1].cpu().numpy()
    df['combined'] = node_combined_label_probs.max(1)[1].cpu().numpy()
    df['total'] = node_total_label_probs.max(1)[1].cpu().numpy()
    df['ind_+'] = df['ind'] == df['true']
    df['total_+'] = df['total'] == df['true']
    # labels, node_ind_label_probs.max(1)[1],  node_combined_label_probs.max(1)[1], node_total_label_probs.max(1)[1]

    print('Accuracy with combined predictions', df['total_+'].iloc[train_idx].mean(), df['total_+'].iloc[val_idx].mean(), df['total_+'].iloc[test_idx].mean())

Dataset: Texas
Round 0
Best epoch: 111, Best accuracy: [0.7816091775894165, 0.6271186470985413, 0.6216216087341309]
Texas: Mean test accuracy 0.622+-0.000 in 1 rounds
Last iteration accuracy [0.8275862336158752, 0.5423728823661804, 0.45945945382118225]
Accuracy with combined predictions 0.7586206896551724 0.5423728813559322 0.4864864864864865


In [22]:
nodes = graph.nodes().cpu().numpy()
combined_idx_all = get_neighborhood_classes(graph, nodes)
combined_labels_all = combine_labels(labels, label_mapping, combined_idx_all).to(device)
combined_inputs_all = combine_inputs(h, combined_idx_all)
combined_logits_all = mlp_binary(combined_inputs_all)

node_ind_label_probs = F.softmax(logits, dim=1)
edge_label_probs = F.softmax(combined_logits_all, dim=1)

edges = graph.edges()
node_combined_label_probs = []
for idx in nodes:
    node_edge_probs = edge_label_probs[edges[1][edges[0] == idx]]
    node_edge_probs_agg = node_edge_probs.mean(0)
    pair_to_ind_probs = node_edge_probs_agg.reshape((num_classes, num_classes)).sum(1)
    node_combined_label_probs.append(F.softmax(pair_to_ind_probs, dim=0))

node_combined_label_probs = torch.vstack(node_combined_label_probs)
node_total_label_probs = node_ind_label_probs + node_combined_label_probs

In [195]:
get_neighborhood_classes(graph, train_idx).__len__()

199

In [188]:
combined_logits_all[0].shape
[(0,0), (0,1), (0,2), (0,3), (0,4),
(1,0), (1,1), (1,2), (1,3), (1,4),
]

torch.Size([25])

In [154]:
idx = 1909
print(edges[1][edges[0] == idx], Counter(labels[edges[1][edges[0] == idx]].cpu().detach().numpy())) # why 88 node in cora gets index 1 for combined model (it should get 0 or 5)
node_ind_label_probs[idx].cpu().detach().numpy(), node_combined_label_probs[idx].cpu().detach().numpy(), node_total_label_probs[idx].cpu().detach().numpy()

tensor([ 211,  239, 1131, 1171, 1848, 2305, 1909], device='cuda:0') Counter({0: 4, 3: 3})


(array([0.4113, 0.0497, 0.0150, 0.4415, 0.0586, 0.0075, 0.0164],
       dtype=float32),
 array([0.1611, 0.1227, 0.1872, 0.1226, 0.1593, 0.1236, 0.1235],
       dtype=float32),
 array([0.5724, 0.1724, 0.2022, 0.5640, 0.2179, 0.1311, 0.1399],
       dtype=float32))

## TODO
* Chek different datasets with current pipeline
* Try combining heads at train time

In [197]:
qgrid.show_grid(df)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

(0.8706896551724138, 0.7942340305257207, 0.7774927313152044)

In [None]:
(torch.Tensor([(y == pred.max(1)[1]).sum().item()/y.shape[0]])).detach().item())

In [310]:
reverse_mapping[17]

(3.0, 2.0)

In [308]:
node_edge_probs.sum(dim=0).argmax()

tensor(18, device='cuda:0')

In [280]:
node_label_probs.shape, edge_label_probs.shape

(torch.Size([183, 5]), torch.Size([492, 25]))

In [250]:
df = pd.DataFrame(combined_idx_all, columns=['u', 'v'])

In [252]:
df['u_in_train'] = df['u'].apply(lambda x: x in train_idx)
df['v_in_train'] = df['v'].apply(lambda x: x in train_idx)

In [253]:
reverse_mapping = {label: pair for pair, label in label_mapping.items()}

In [254]:
trues = []
preds = []
preds_ind = []
preds_ind_labels = logits.max(1)[1]
preds_labels = combined_logits_all.max(1)[1]
for ix in range(combined_labels_all.shape[0]):
    trues.append(reverse_mapping[combined_labels_all[ix].item()])
    preds.append(reverse_mapping[preds_labels[ix].item()])
    
    u, v = combined_idx_all[ix]
    preds_ind.append((preds_ind_labels[u].item(), preds_ind_labels[v].item()))

In [255]:
df2 = df.join(pd.DataFrame(trues, columns=['u_true', 'v_true']))
df2 = df2.join(pd.DataFrame(preds, columns=['u_pred_combined', 'v_pred_combined']))
df2 = df2.join(pd.DataFrame(preds_ind, columns=['u_pred_ind', 'v_pred_ind']))
df2

Unnamed: 0,u,v,u_in_train,v_in_train,u_true,v_true,u_pred_combined,v_pred_combined,u_pred_ind,v_pred_ind
0,0,58,True,True,3.0,2.0,3.0,2.0,3,3
1,0,121,True,True,3.0,0.0,3.0,0.0,3,0
2,1,80,False,False,0.0,4.0,0.0,0.0,0,0
3,2,8,True,True,2.0,3.0,3.0,3.0,3,3
4,4,66,True,True,4.0,0.0,4.0,0.0,4,0
...,...,...,...,...,...,...,...,...,...,...
487,178,178,False,False,3.0,3.0,4.0,4.0,4,4
488,179,179,True,True,3.0,3.0,3.0,3.0,3,3
489,180,180,False,False,4.0,4.0,0.0,0.0,0,0
490,181,181,True,True,3.0,3.0,3.0,3.0,3,3


In [256]:
df2['u_combined_+'] = df2['u_pred_combined'] == df2['u_true']
df2['v_combined_+'] = df2['v_pred_combined'] == df2['v_true']

df2['u_ind_+'] = df2['u_pred_ind'] == df2['u_true']
df2['v_ind_+'] = df2['v_pred_ind'] == df2['v_true']

In [257]:
new_columns = [col for col in df2.columns if col.startswith('u')] + [col for col in df2.columns if col.startswith('v')]
df2 = df2[new_columns]

In [258]:
df3 = qgrid.show_grid(df2)

In [259]:
df3

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [245]:
df3.get_changed_df()

Unnamed: 0,u,u_in_train,u_true,u_pred_combined,u_pred_ind,u_combined_+,u_ind_+,v,v_in_train,v_true,v_pred_combined,v_pred_ind,v_combined_+,v_ind_+
100,30,True,3,2,3,False,True,1358,False,2,2,2,True,True
210,55,True,4,0,4,False,True,2021,False,0,0,0,True,True
550,118,True,6,0,6,False,True,1538,False,5,5,5,True,True
665,146,False,0,3,0,False,True,1907,False,4,3,4,False,True
731,160,False,3,0,3,False,True,277,False,3,0,0,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12712,2156,False,0,5,0,False,True,2156,False,0,5,0,False,True
12809,2253,False,3,4,3,False,True,2253,False,3,4,3,False,True
12886,2330,False,0,1,0,False,True,2330,False,0,1,0,False,True
13157,2601,False,3,1,3,False,True,2601,False,3,1,3,False,True


In [129]:
name='cora'
names = ['Texas', 'Cornell', 'Wisconsin', 'Chameleon', 'Squirrel', 'Wikics', 'Actor', 'cora', 'citeseer', 'pubmed']
for name in names[:1]:
    print(name)
    print('Normal', end=' ')
    metrics_unary_only, best_epoch = run(num_epochs=400, use_combined_loss=False, print_epochs=0, label_repeat=2, num_runs=1, name=name) # For big graphs it takes lot of time to compute binary loss (40x more)
    print('With combined loss', end=' ')
    metrics_unary_only, best_epoch = run(num_epochs=400, use_combined_loss=True, print_epochs=0, label_repeat=2, num_runs=1, name=name) # For big graphs it takes lot of time to compute binary loss (40x more)
    print()

Texas
Normal Round 0
Best epoch: 26, Best accuracy: [0.7356321811676025, 0.6440678238868713, 0.6216216087341309]
Round 1
Best epoch: 85, Best accuracy: [0.7816091775894165, 0.6101694703102112, 0.6216216087341309]
Round 2
Best epoch: 70, Best accuracy: [0.8390804529190063, 0.6101694703102112, 0.5405405163764954]
Round 3
Best epoch: 26, Best accuracy: [0.6436781883239746, 0.6271186470985413, 0.6756756901741028]
Round 4
Best epoch: 22, Best accuracy: [0.6781609058380127, 0.6440678238868713, 0.7027027010917664]
Texas: Mean test accuracy 0.632+-0.056 in 5 rounds
With combined loss Round 0
Best epoch: 27, Best accuracy: [0.7011494040489197, 0.6101694703102112, 0.6216216087341309]
Round 1
Best epoch: 52, Best accuracy: [0.7471264600753784, 0.6101694703102112, 0.6216216087341309]
Round 2
Best epoch: 69, Best accuracy: [0.7471264600753784, 0.6440678238868713, 0.6216216087341309]
Round 3
Best epoch: 41, Best accuracy: [0.6781609058380127, 0.6271186470985413, 0.6216216087341309]
Round 4
Best epoc

Best epoch: 58, Best accuracy: [0.9928571581840515, 0.8040000200271606, 0.8009999990463257]
cora: Mean test accuracy 0.809+-0.011 in 5 rounds
With combined loss Round 0
Best epoch: 206, Best accuracy: [1.0, 0.800000011920929, 0.8090000152587891]
Round 1
Best epoch: 123, Best accuracy: [1.0, 0.8080000281333923, 0.8029999732971191]
Round 2
Best epoch: 77, Best accuracy: [0.9642857313156128, 0.8100000023841858, 0.7919999957084656]
Round 3
Best epoch: 222, Best accuracy: [1.0, 0.7960000038146973, 0.7979999780654907]
Round 4
Best epoch: 122, Best accuracy: [1.0, 0.8040000200271606, 0.7990000247955322]
cora: Mean test accuracy 0.800+-0.006 in 5 rounds

citeseer
Normal Round 0
Best epoch: 17, Best accuracy: [0.925000011920929, 0.734000027179718, 0.7139999866485596]
Round 1
Best epoch: 36, Best accuracy: [0.925000011920929, 0.699999988079071, 0.6990000009536743]
Round 2
Best epoch: 25, Best accuracy: [0.8999999761581421, 0.699999988079071, 0.7039999961853027]
Round 3
Best epoch: 20, Best accur

In [92]:
metrics_full, best_epoch = run(num_epochs=500, use_binary_loss=True, print_epochs=200, label_repeat=2, num_runs=5, name='Texas')

Round 0
Epoch: 0 Loss: 5.484375, Unary loss: 1.8671287298202515, Binary loss: 3.617246389389038 Accuracy: [0.517241358757019, 0.5254237055778503, 0.6216216087341309] Time: 0.02
Epoch: 200 Loss: 2.7386791706085205, Unary loss: 0.9233140349388123, Binary loss: 1.815365195274353 Accuracy: [0.8390804529190063, 0.5254237055778503, 0.5405405163764954] Time: 0.02
Epoch: 400 Loss: 3.5367431640625, Unary loss: 1.198860764503479, Binary loss: 2.3378822803497314 Accuracy: [0.7816091775894165, 0.5593220591545105, 0.5405405163764954] Time: 0.02
Best epoch: 30, Best accuracy: [0.6781609058380127, 0.6101694703102112, 0.6216216087341309]
Round 1
Epoch: 0 Loss: 5.777241230010986, Unary loss: 2.049337148666382, Binary loss: 3.7279040813446045 Accuracy: [0.5287356376647949, 0.5254237055778503, 0.6216216087341309] Time: 0.02
Epoch: 200 Loss: 2.677123785018921, Unary loss: 0.9039499759674072, Binary loss: 1.7731738090515137 Accuracy: [0.8620689511299133, 0.5423728823661804, 0.5135135054588318] Time: 0.02
E

### Next todo: 
- Try to narrow down the neighborhoods for binary loss
- how performance is different per node; is it better classified with more nodes of different class

### Backlog
- Add more datasets
- Try just binary loss and adopt it to predict a class of a single node 
- Try triples of nodes
- Change binary loss to similarity loss so that embeddings of the nodes with the same labels are close to each other

In [93]:
# plot_interactive([metrics_unary_only, metrics_full], ['Train', 'Val', 'Test'], title='Loss', metric_name='loss')
plot_interactive([metrics_unary_only, metrics_full], ['Train', 'Val', 'Test', 'Train-Full', 'Val-Full', 'Test-Full'], title='Accuracy', metric_name='acc')

In [304]:
plot_interactive(metrics, ['Train', 'Val', 'Test'], title='Loss', metric_name='loss')
plot_interactive(metrics, ['Train', 'Val', 'Test'], title='Accuracy', metric_name='acc')

In [241]:
loss, unary_loss, binary_loss

(tensor(5.7202, grad_fn=<AddBackward0>),
 tensor(1.8280, grad_fn=<NllLossBackward>),
 tensor(3.8922, grad_fn=<NllLossBackward>))