In [1]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
import math

from collections import Counter
a4_dims = (11.69,8.27)
from scipy.stats import poisson, zipf, rv_discrete
from scipy.stats import ks_2samp
import seaborn as sns
from scipy.special import comb

In [2]:
def binom(k,k_sampled,p):
    return comb(k,k_sampled)* np.power(p,k_sampled)*np.power(1-p,k-k_sampled)

In [3]:
def edge_sample(G,p,random_seed=None):
    # optionally seed for reproducibility
    random.seed(random_seed)
    H = nx.Graph()
    for (u,v) in G.edges:
        if random.random()<p:
            H.add_edge(u,v)
    return H

In [4]:
## Basic estimator for degree sequence which does not redistribute any spare or missing links
def deg_MME_basic(k_seq,sample_prob,as_int=False):
    if sample_prob==0.0:
        return np.zeros(shape=len(k_seq), dtype=float)
    estimated = k_seq/sample_prob
    if (as_int):
        return np.array([*map(int,estimated)])
    return estimated

print(deg_MME_basic(np.array([3,2,5,2]),0.5,True))

[ 6  4 10  4]


In [6]:
## Estimator which first obtains MME one then deals with rounding discrepency by redistributing links
def deg_MME(sampled_graph, probability_keep, redistribute_links=True):

    # Number of nodes/links in sampled graph
    N_prime, M_prime = sampled_graph.number_of_nodes(), sampled_graph.number_of_edges()

    ## Estimated number of links from sampled graph
    M_estimated = int(M_prime/p)

    ## Sampled and scaled-up degree distribution
    k_prime = np.array([d for (_,d) in nx.degree(sampled_graph)])
    k_est = deg_MME_basic(k_seq=k_prime,sample_prob=probability_keep,as_int=True)

    if (redistribute_links==False):
        return np.array(k_est)

    ## "Left over" stubs from the rounding process
    k_spare = 2*M_estimated - sum(k_est)

    ## Distribute these random stubs if there are any
    if k_spare>0:
        sampled_nodes = random.sample(range(N_prime),k_spare)
        for node in sampled_nodes:
            k_est[node]+=1

    ## If we have given nodes more connections than there are links, randomly remove some
    if k_spare<0:
        non_isolated_nodes = list(filter(lambda ind: k_est[ind]>0, range(N_prime)))
        sampled_nodes = random.sample(non_isolated_nodes,abs(k_spare))
        for node in sampled_nodes:
            k_est[node]-=1
    
    return np.array(k_est) 

In [19]:
def monte_carlo_degree(sampled_graph,probability_keep):

    # Number of nodes/links in sampled graph
    N_prime, M_prime = sampled_graph.number_of_nodes(), sampled_graph.number_of_edges()

    observed_degree = np.array([d for (_,d) in nx.degree(sampled_graph)])
    estimated_degree = deg_MME(sampled_graph=sampled_graph, probability_keep=probability_keep,redistribute_links=True)

    ## Expected degree of sampled network according to binomial
    sampled_sequence = estimated_degree * probability_keep *1.0
    #print(sampled_sequence)

    ## Sum of squared distances as base quality metric
    ssd_current = sum((sampled_sequence - observed_degree)**2)
    #print(observed_degree)
    
    # Commence Monte Carlo process
    cts_accept=0
    for i in range(15000):
        ## Randomly rewire an edge
        [n1, n2] = random.sample(range(N_prime),2)
        # if i==1:
        #     print(n1,n2)

        ## Ensure we don't leave any isolated nodes
        while(estimated_degree[n1]<=1):
            [n1,n2] = random.sample(range(N_prime),2)
        estimated_degree[n1]-=1
        estimated_degree[n2]+=1

        sampled_sequence = estimated_degree * probability_keep
        ssd_new = sum((sampled_sequence - observed_degree)**2)

        ## Reject step if error is larger:
        if (ssd_new > ssd_current):
            estimated_degree[n1]+=1
            estimated_degree[n2]-=1
        else:
            ssd_current = ssd_new
            cts_accept+=1

    return estimated_degree, ssd_current, cts_accept



In [None]:
def bayes_approx(approx_sequence,observed,prob_retain):
    N_approx = len(approx_sequence)

    ## transform from degree sequence to degree distribution
    deg_counts = Counter(approx_sequence)

    ## max value to use for the degree
    k_max = max(deg_counts.keys)

    ## construct prior from approx sequence
    prior = [deg_counts[k]/N_approx for k in range(k_max+1)]
    posterior = np.zeros(N_approx)

    for i in range(no_nodes):
        k_observed = observed[i]
        k_range = range(k_observed,k_max+1)

        ## lambda function so can be applied to numpy array
        binom_ev = lambda k : binom(k,k_observed,prob_retain)

        denom = np.dot(binom_ev(k_range),prior[k_observed:k_max])
        numer = np.dot(binom_ev(k_range),prior[k_observed:k_max])
    

In [24]:
N,M=1000,5000
G = nx.gnm_random_graph(N,M)
p_range = [random.random() for _ in range(20)]
for p in p_range:
    H = edge_sample(G,p)
    true_degrees = np.array([d for (n,d) in nx.degree(G) if n in H.nodes])
    deg, ssd_error, accepted = monte_carlo_degree(H,p)
    true_error = sum((true_degrees - deg)**2)/len(deg)
    print(p,true_error,accepted)

    

0.521943003820738 26.235412474849095 631
0.43630180748627145 30.01011122345804 606
0.5642903764225362 27.29748743718593 566
0.6848185609546276 22.864 620
0.2459706495284646 43.65033407572383 362
0.8663824914800778 19.574 590
0.8994917925043322 19.19 584
0.8484950794813758 20.83 580
0.6834326970404706 22.896 585
0.9863826270236359 20.272 315
0.7558715973624709 21.105105105105107 584
0.6945178139031419 23.538 629
0.9350640514074872 20.382 586
0.663398347303056 24.299899699097292 535
0.008232888451799925 17272.603174603173 1023
0.936373182229168 20.498 600
0.3216267846122026 37.78630705394191 583
0.9203265167582974 19.326 554
0.5214034449487971 26.857715430861724 637
0.7860667966142867 21.9 610
