# Graph Generation

In [None]:
import numpy as np
import networkx as nx
import math
import time
import datetime
import os
import pickle as pkl
import seaborn as sns

import sys
sys.path.insert(0, '../')
import utils
from ahk import AHK_graphon

from matplotlib import pyplot as plt


In [None]:
# Loading training data

def load(name,trainfilepath):
    assert name in ["ego","comm"]

    A_train, A_val, A_test = np.load(trainfilepath,allow_pickle=True)

    train, val, test = [],[],[]

    for A in A_train:
        train.append(nx.from_numpy_array(A))
    for A in A_val:
        val.append(nx.from_numpy_array(A))
    for A in A_test:
        test.append(nx.from_numpy_array(A))
        
    return train,val,test

In [None]:
name="ego"

datadir="../dataset/"+name+"/"

filepath=datadir+name+"_train_val_test.npy"

train_nx,val_nx,test_nx=load(name,filepath)

print("{} train {} val {} test graphs".format(len(train_nx),len(val_nx),len(test_nx)))
sizes=np.array(list(G.number_of_nodes() for G in train_nx))
print("Max train size: ", np.max(sizes), "Min train size: ", np.min(sizes))

traindata=utils.batch_nx_to_world(train_nx)

In [None]:
# Sample induced sub-graphs (e.g. for comm data)


minnodes=10 # min/max number of nodes selected from each graph
maxnodes=15
numsubgraphs=2 # number of random induced subgraphs from each original graph

data_sel_nx=[]

for G in train_nx:
    if G.number_of_nodes()>=minnodes:
        nodes=list(G.nodes())
        for j in range(numsubgraphs):
            nn=np.minimum(np.random.randint(minnodes,high=maxnodes),G.number_of_nodes())
            nbunch=np.random.choice(nodes,size=nn,replace=False)
            data_sel_nx.append(nx.induced_subgraph(G,nbunch))
        

#sbmdict={'features':{0:0,1:1}}
sbmdict={}

traindata=utils.batch_nx_to_world(data_sel_nx,sbmdict)


In [None]:
# display random training examples
to_display=10

for i in range(to_display):
    idx=np.random.randint(len(traindata))
    print("Graph ",idx)
    nx.draw_networkx(traindata[idx].to_nx())
    plt.show()

In [None]:
# Learning settings:

# Initialize model
binbounds=utils.uni_bins(1)
learnmodel=AHK_graphon(traindata[0].sig,binbounds)

settings={}

settings['num_pi_b']=20
settings['batchsize']=5
settings['soft']=0.001
settings['numepochs']=50
settings['early_stop']=3 #Number of epochs with no log-likelihood improvement required for early stopping
settings['bingain']=0.01 #the factor by which a bin refinement has improved log-likelihood in order
                         #to continue bin refinements
settings['learn_bins']=False
settings['with_trace']=False
settings['randombatches']=False
settings['adaptbatchsize']=False
settings['ubias']=0.0
settings['savepath']='../Experiments/DAG/'

#Adam params:
settings['ad_alpha']=0.01
settings['ad_beta1']=0.9
settings['ad_beta2']=0.999
settings['ad_epsilon']=1e-8

settings['seed']=0
settings['method']="adam"

In [None]:
# learn:
rng=np.random.default_rng(seed=settings['seed'])
learnmodel.rand_init(rng)
best,loglik,_=learnmodel.learn(settings,traindata,rng,exact_gradients=False,info_each_epoch=False)

# Generate graphs

In [None]:
# specify a range of target sample sizes,
# baseline: the empirical size distribution in the test set:
sizes=np.array(list(G.number_of_nodes() for G in test_nx))

# scale sizes:
scalefactor=1.5
sizes=scalefactor*sizes

print("Target sizes: avg:", np.average(sizes), "Min: ", np.min(sizes), "Max: ", np.max(sizes))

In [None]:
# generate and plot:
numsamples=10
for n in range(numsamples):
    w=best.sample_world(int(rng.choice(sizes)),rng)
    w_nx=w.to_nx()
    nx.draw_networkx(w_nx)
    plt.show()