## 1 Counting paths in graphlets

In [6]:
import numpy as np
import igraph as ig

#create the depicted graphs unter 1.2)
g1 = ig.Graph()
g1.add_vertices(4)
g1.add_edges([(0,1),(1,2),(1,3),(2,3)])
#print(g1)

g2 = ig.Graph()
g2.add_vertices(4)
g2.add_edges([(0,1),(1,2),(1,3),(2,3),(0,3)])
#print(g2)

g3 = ig.Graph()
g3.add_vertices(5)
g3.add_edges([(0,2),(0,3),(1,2),(2,3),(3,4),(1,4)])
#print(g3)

g4 = ig.Graph()
g4.add_vertices(5)
g4.add_edges([(0,4),(1,4),(1,3),(1,2),(2,3),(3,4)])
#print(g4)

g5 = ig.Graph()
g5.add_vertices(5)
g5.add_edges([(0,1),(1,2),(2,3),(3,4),(4,0),(0,2),(2,4),(1,3)])
#print(g5)


In [7]:
#1.1)
#implement DFS
def init_algo(g):
    #for every start node, span a new directed tree and sum the count of "complete" paths 
    sum = 0
    for v in g.vs:
        c = ig.Graph() #create a new empty graph c
        c.to_directed() 
        n = 0
        c.add_vertices(1) #add the baase vertex
        c.vs[n]['num']=v.index #label it the its index in graph g
        count_paths(g,c,n)
        
        #get the in-neighboorhood (order = #node if graph g) of each node in graph c and store it in an array.
        #where we have a size of the neighborhood = the node count of graph g: we know that this path connect all nodes
        res = np.where(np.array([len(v) for v in c.neighborhood(c.vs,order=g1.vcount(),mode='in')]) == g1.vcount(),1,0)
        sum += np.sum(res) #get only those "complete" paths 
    return sum
    
def count_paths(g,c,n): 
    #get all predecessors of the current lastly added node and get their labels (not indices!)
    pred = c.neighborhood(c.vs[n],order=g.vcount(),mode="in")
    pred = np.array([c.vs[v]['num'] for v in pred])
    #for each neighbor of the current node in g check whether it was already part of the new path in c.
    for neigh in g.neighbors(g.vs[c.vs[n]['num']]):
        cut = False   
        if neigh in pred: #if it was in the path in c already, cut branch
            cut = True
        if not cut: #otherwise, add it as a new node with the correct label and continue with its neighbors in graph g
            c.add_vertices(1)
            c.add_edges([(n,c.vcount()-1)])        
            c.vs[c.vcount()-1]['num'] = neigh
            count_paths(g,c,c.vcount()-1)

In [8]:
#1.2
#count possible paths:
count = init_algo(g1)
print("g1 path count: ",count)

count = init_algo(g2)
print("g2 path count: ",count)

count = init_algo(g3)
print("g3 path count: ",count)

count = init_algo(g4)
print("g4 path count: ",count)

count = init_algo(g5)
print("g5 path count: ",count)

g1 path count:  4
g2 path count:  12
g3 path count:  20
g4 path count:  20
g5 path count:  56


## 2 Sampled harmonic closeness centrality

In [101]:
#2
import igraph as ig
import numpy as np
import random

# load the graph as undirected
g = ig.Graph.Read_Pickle('ogbn-arxiv.pickle').as_undirected()
subg_idx = np.array([v.index for v in g.vs.select(label_eq=12)])
alpha = np.array([0.001,0.01,0.1,1])
alpha = np.array([0.001])
runs = 30
runs = 1

## I assume he meant slide 40???
#2.1
def SCC(landmarks,g,vidx):
    wdist=0
    for mark in landmarks:
        #print("l: ",mark)
        if mark!=v:
            spath = g.get_shortest_paths(g.vs[vidx], to=g.vs[mark], weights=None, mode='all', output='epath') #indices are those of edges!!
            #print(spath, "- ", len(spath[0]))
            wdist+=1/len(spath[0])
    Cl=1/(np.size(landmarks))*wdist
    return Cl



In [104]:
#2.2
result = np.zeros((np.size(alpha),np.size(subg_idx),runs))
for idx, a in enumerate(alpha):
    #select #random landmarks given alpha
    nodes_L = int(alpha[0]*len(g.vs)) #just convert float to int
    #print(nodes_L)
    #print(range(0, len(g.vs)-1))
    for vidx, v in enumerate(subg_idx):
        #print("basis: ",vidx, "-" ,v)
        for r in np.arange(0,runs):
            #select random landmark node indices - part of graph G 
            landmarks = random.sample(range(0,len(g.vs)-1), nodes_L)
            #Make sure V is not in landmarks (else one node would be missing)
            while vidx in landmarks:
                landmarks = random.sample(range(0,len(g.vs)-1), nodes_L)
            
            #print("landmarks: ",landmarks[0:5])
            res = SCC(landmarks,g,v)
            print("base: ",vidx,"- run: ",r, "- ",res)
            result[idx,vidx,r-1] = res
print("done")            

base:  0 - run:  0 -  0.18355426624657387
base:  1 - run:  0 -  0.17235136658213576
base:  2 - run:  0 -  0.19512069127453735
base:  3 - run:  0 -  0.12333854508410712
base:  4 - run:  0 -  0.16708526516218813
base:  5 - run:  0 -  0.15718533603148974
base:  6 - run:  0 -  0.16421783344860266
base:  7 - run:  0 -  0.15630011014626388
base:  8 - run:  0 -  0.16818608741685656
base:  9 - run:  0 -  0.10424581992629321
base:  10 - run:  0 -  0.21387971003355616
base:  11 - run:  0 -  0.1550511454357607
base:  12 - run:  0 -  0.1647906170095518
base:  13 - run:  0 -  0.13016349199781138
base:  14 - run:  0 -  0.1305019766558227
base:  15 - run:  0 -  0.1814362987439909
base:  16 - run:  0 -  0.160565995477238
base:  17 - run:  0 -  0.20708650324034933
base:  18 - run:  0 -  0.24073938881631204
base:  19 - run:  0 -  0.1549708838170376
base:  20 - run:  0 -  0.1153733265419655
base:  21 - run:  0 -  0.17467169582554187
base:  22 - run:  0 -  0.13685203685203676
base:  23 - run:  0 -  0.1294

In [118]:
# get mean and standard deviation for each alpha
for idx, a in enumerate(alpha):
    mean = np.array([np.mean(result[idx,e,:]) for e,_ in enumerate(subg_idx)])
    std = np.array([np.std(result[idx,e,:]) for e,_ in enumerate(subg_idx)])
    print("alpha: ",a)
    print("Mean: ",mean)
    print("Std: ",std)

alpha:  0.001
Mean:  [0.18355427 0.17235137 0.19512069 0.12333855 0.16708527 0.15718534
 0.16421783 0.15630011 0.16818609 0.10424582 0.21387971 0.15505115
 0.16479062 0.13016349 0.13050198 0.1814363  0.160566   0.2070865
 0.24073939 0.15497088 0.11537333 0.1746717  0.13685204 0.12946419
 0.13617728 0.16933451 0.16619257 0.18109327 0.13112663]
Std:  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0.]


## 3 Degeneracy

In [72]:
import igraph
import numpy as np

In [81]:
# load the graph as undirected
g = igraph.Graph.Read_Pickle('ogbn-arxiv.pickle').as_undirected()

In [82]:
def degeneracy(g):
    # get adjacency list
    adj = g.get_adjlist()
    # create result array
    deg = np.zeros((g.vcount()))
    # create A
    arrA = [[] for _ in range(g.vcount())]
    # create B and init with degreee of each vertex
    arrB = g.degree(range(g.vcount()))
    # fill A with lists of vertices that have each degree
    for i, degree in enumerate(arrB):
        arrA[degree].append(i)
    
    # loop over all degrees 
    k = d_min = 0        
    while d_min < g.vcount():
        # if there are no vertices with degree d_min, consider d_min + 1
        if arrA[d_min] == []:
            d_min += 1
        # otherwise, consider a vertex with degree d_min
        else:
            # remove the vertex from A
            vertex = arrA[d_min].pop()
            # for each neighbor of A
            for neigh in adj[vertex]:
                # find its degree
                neigh_degree = arrB[neigh]
                # move the neighbor in A to its new degree list
                arrA[neigh_degree].remove(neigh)
                arrA[neigh_degree - 1].append(neigh)
                # decrease its degree in B
                arrB[neigh] -= 1
                # remove vertex from the adjacency list of its neighbor
                adj[neigh].remove(vertex)
            # compute the coreness of the vertex
            k = max(k, d_min)
            deg[vertex] = k
            # decrease d_min to consider possible ex-neighbors of vertex
            d_min -= 1
            
    return deg

In [83]:
deg = degeneracy(g)

In [84]:
# check if implementation gives the correct result
is_correct = np.all(np.equal(deg, g.coreness()))
print(f'implementation is correct: {is_correct}')

implementation is correct: True
