In [None]:
import networkx as nx
import numpy as np
from itertools import chain
from collections import Counter
import torch
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle
import time

from Graphs import matrix_to_graph, graph_to_matrix, ErdosRenyiGraph, dRegularGraph
from Algorithms import shortestDistances_networkx, shortestDistances_GNN, sampleSets, offlineSketch, shortestDistances_Sarma, shortestDistances_Bourgain
from Models import build,predict,predict_allBatches,run,run_out1

In [None]:
def generateSamples_inner(num_graphs,function,*args,**kwargs):
    graphs = []
    sampleSets = []
    samples_x = []
    samples_y = []
    samples_edge_index = []
    samples_weights = []
    k = 0
    n_rejected1 = 0
    n_rejected2 = 0
    while k < num_graphs:
        try:
            G,directed,weighted = function(*args,**kwargs)
            n = len(G.nodes())
            r = int(np.floor(np.sqrt(n)))
            largest_component = max(nx.strongly_connected_components(G), key=len)
            num_nodes = len(largest_component)
            if num_nodes >= max(r,10):
                G = G.subgraph(largest_component)
                G = nx.relabel_nodes(G, {node: index for index, node in enumerate(G.nodes())})
                graphs.append((G,directed,weighted))
                sampleSets.append(sampleSets(graphs[-1],1),sampleSets(graphs[-1],2),sampleSets(graphs[-1],3),sampleSets(graphs[-1],4))
                seeds = np.random.choice(range(num_nodes),size=r,replace=False)
                x = np.zeros((num_nodes,r))
                y = np.zeros((num_nodes,r))
                for i in range(r):
                    u = seeds[i]
                    x[u,i] = 1
                    y[:,i] = shortestDistances_networkx(graphs[-1],source=u) ## edit this line to train another type of model
                samples_x.append(torch.tensor(x.astype(np.float32), requires_grad=True))
                samples_y.append(torch.tensor(y).to(torch.float32))
                samples_edge_index.append(torch.tensor(np.array(list(G.edges())).T).to(torch.int64))
                if weighted: 
                    samples_weights.append(torch.tensor(nx.get_edge_attributes(G,'weight').values()).to(torch.float32))
                k += 1
            else:
                n_rejected2 += 1
        except:
            n_rejected1 += 1
        if n_rejected1 + n_rejected2 >= 10000:
            raise ValueError('Possibly stuck in an infinite loop.')
    print('Number of graphs rejected because Bourgain\'s and Sarma\'s algorithms yield errors: ',n_rejected1)
    print('Number of graphs rejected because the largest component has insufficient size: ',n_rejected2)
    return graphs,sampleSets,[samples_x,samples_y,samples_edge_index,samples_weights]

def generateSamples(n_train,n_val,n_test,function,*args,**kwargs):
    print('Generating training data...')
    train = generateSamples_inner(n_train,function,*args,**kwargs)
    print('Generating validation data...')
    val = generateSamples_inner(n_val,function,*args,**kwargs)
    print('Generating test data...')
    test = generateSamples_inner(n_test,function,*args,**kwargs)
    return train, val, test

def shortestDistances_Sarma_Bourgain(model,criterion_type,graph_info,sampleSet,actual):
    pred = np.zeros((6,4))
    mse = np.zeros((6,4))
    dur1 = np.zeros((6,4))
    dur2 = np.zeros((6,4))
    for i in range(len(sampleSet)):
        dist,d1,d2 = shortestDistances_Sarma(graph_info,sampleSet[i])
        pred[0,i] = dist
        mse[0,i] = mean_squared_error(actual, dist)
        dur1[0,i] = d1
        dur2[0,i] = d2
        dist,d1,d2 = shortestDistances_Sarma(graph_info,sampleSet[i],method='GNN',model=model,criterion_type=criterion_type)
        pred[1,i] = dist
        mse[1,i] = mean_squared_error(actual, dist)
        dur1[1,i] = d1
        dur2[1,i] = d2
        dist,d1,d2 = shortestDistances_Sarma(graph_info,sampleSet[i],method='BFS')
        pred[2,i] = dist
        mse[2,i] = mean_squared_error(actual, dist)
        dur1[2,i] = d1
        dur2[2,i] = d2
        dist,d1,d2 = shortestDistances_Bourgain(graph_info,sampleSet[i])
        pred[3,i] = dist
        mse[3,i] = mean_squared_error(actual, dist)
        dur1[3,i] = d1
        dur2[3,i] = d2
        dist,d1,d2 = shortestDistances_Bourgain(graph_info,sampleSet[i],method='GNN',model=model,criterion_type=criterion_type)
        pred[4,i] = dist
        mse[4,i] = mean_squared_error(actual, dist)
        dur1[4,i] = d1
        dur2[4,i] = d2
        dist,d1,d2 = shortestDistances_Bourgain(graph_info,sampleSet[i],method='BFS')
        pred[5,i] = dist
        mse[5,i] = mean_squared_error(actual, dist)
        dur1[5,i] = d1
        dur2[5,i] = d2
    return pred,mse,dur1,dur2

def evaluate_all_distances(model,criterion_type,graphs,sampleSets,title=None,display_results=False):
    
    all_actual = []
    all_dur_network = []
    all_pred_GNN = []
    all_mse_GNN = []
    all_dur_GNN = []
    all_pred = []
    all_mse = []
    all_dur1 = []
    all_dur2 = []
    for graph_info,sampleSet in list(zip(graphs,sampleSets)):
        actual,dur_network = shortestDistances_networkx(graph_info)
        pred_GNN,dur_GNN = shortestDistances_GNN(model,criterion_type,graph_info)
        pred,mse,dur1,dur2 = shortestDistances_Sarma_Bourgain(model,criterion_type,graph_info,sampleSet,actual)
        all_actual.append(actual)
        all_dur_network.append(dur_network)
        all_pred_GNN.append(pred_GNN)
        all_mse_GNN.append(mean_squared_error(actual, pred_GNN))
        all_dur_GNN.append(dur_GNN)
        all_pred.append(pred)
        all_mse.append(mse)
        all_dur1.append(dur1)
        all_dur2.append(dur2)
    all_dur_network = np.array(all_dur_network)
    mean_dur_network = np.mean(all_dur_network,axis=0)
    all_mse_GNN = np.array(all_mse_GNN)
    mean_mse_GNN = np.mean(all_mse_GNN,axis=0)
    all_dur_GNN = np.array(all_dur_GNN)
    mean_dur_GNN = np.mean(all_dur_GNN,axis=0)
    all_mse = np.array(all_mse)
    mean_mse = np.mean(all_mse,axis=0)
    all_dur1 = np.array(all_dur1)
    mean_dur1 = np.mean(all_dur1,axis=0)
    all_dur2 = np.array(all_dur2)
    mean_dur2 = np.mean(all_dur2,axis=0)
    
    if display_results:
        pass

    return [all_actual,all_pred_GNN,all_pred],[mean_mse_GNN,mean_mse],[mean_dur_network,mean_dur_GNN,mean_dur1,mean_dur2]
    
def evaluate_random_distances(alpha,model,criterion_type,graphs,sampleSets,title=None,display_results=False):

    all_actual = []
    all_dur_network = []
    all_pred_GNN = []
    all_mse_GNN = []
    all_dur_GNN = []
    all_pred = []
    all_mse = []
    all_dur1 = []
    all_dur2 = []
    for graph_info,sampleSet in list(zip(graphs,sampleSets)):
        num_nodes = len(graph_info[0].nodes())
        num_pairs = (num_nodes*(num_nodes-1)/2)**(1/alpha)
        k = 0
        nodes = range(num_nodes)
        pairs = []
        pairs_dump = []
        while k <= num_pairs:
            pair = tuple(np.random.choice(nodes,size=2,replace=False))
            if pair not in pairs:
                pairs.append(pair)
                pairs_dump.append(pair)
                k += 1

        if graph_info[1]: # if directed
            seeds = list(set(np.array(pairs_dump)[:,0]))
            dist = np.zeros((num_nodes,num_seeds))
            #for s in seeds:
            #actual,dur_network = shortestDistances_networkx(graph_info)

            pred_GNN,dur_GNN = shortestDistances_GNN(model,criterion_type,graph_info,seeds=seeds)

            pred,mse,dur1,dur2 = shortestDistances_Sarma_Bourgain(model,criterion_type,graph_info,sampleSet,actual)
            all_actual.append(actual)
            all_dur_network.append(dur_network)
            all_pred_GNN.append(pred_GNN)
            all_mse_GNN.append(mean_squared_error(actual, pred_GNN))
            all_dur_GNN.append(dur_GNN)
            all_pred.append(pred)
            all_mse.append(mse)
            all_dur1.append(dur1)
            all_dur2.append(dur2)











        else:
            seeds = []
            while len(pairs_dump) > 0:
                flattened = list(chain(*pairs_dump))
                most_frequent_item = Counter(flattened).most_common(flattened[-1])
                seeds.append(most_frequent_item[0][0] if most_frequent_item else None)
                pairs_dump = [p for p in pairs_dump if seeds[-1] not in p]
            
        
        


In [None]:
graph_sizes = list(np.array(range(1,10))*10)+list(2**np.array(range(9))*100)
for n in graph_sizes:
    for lbd in [2,4,6,8]:
        train,val,test = generateSamples(200,50,50,dRegularGraph,n,lbd)
        graphs_train = train[0]
        sampleSets_k1_train = train[1]
        sampleSets_k2_train = train[2]
        sampleSets_k3_train = train[3]
        sampleSets_k4_train = train[4]
        graphs_val = val[0]
        sampleSets_k1_val = val[1]
        sampleSets_k2_val = val[2]
        sampleSets_k3_val = val[3]
        sampleSets_k4_val = val[4]
        graphs_val = test[0]
        sampleSets_k1_test = test[1]
        sampleSets_k2_test = test[2]
        sampleSets_k3_test = test[3]
        sampleSets_k4_test = test[4]
        
        