In [1]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
import math

from collections import Counter
a4_dims = (11.69,8.27)
from scipy.stats import poisson, zipf, rv_discrete
from scipy.stats import ks_2samp
import seaborn as sns
from scipy.special import comb

In [2]:
def binom(k,k_sampled,p):
    return comb(k,k_sampled)* np.power(p,k_sampled)*np.power(1-p,k-k_sampled)

In [3]:
def edge_sample(G,p,random_seed=None):
    # optionally seed for reproducibility
    random.seed(random_seed)
    H = nx.Graph()
    for (u,v) in G.edges:
        if random.random()<p:
            H.add_edge(u,v)
    return H

In [4]:
## Basic estimator for degree sequence which does not redistribute any spare or missing links
def deg_MME_basic(k_seq,sample_prob,as_int=False):
    if sample_prob==0.0:
        return np.zeros(shape=len(k_seq), dtype=float)
    estimated = k_seq/sample_prob
    if (as_int):
        return np.array([*map(int,estimated)])
    return estimated

print(deg_MME_basic(np.array([3,2,5,2]),0.5,True))

[ 6  4 10  4]


In [22]:
## Estimator which first obtains MME one then deals with rounding discrepency by redistributing links
def deg_MME(sampled_graph, probability_keep, redistribute_links=True):

    # Number of nodes/links in sampled graph
    N_prime, M_prime = sampled_graph.number_of_nodes(), sampled_graph.number_of_edges()

    ## Estimated number of links from sampled graph
    M_estimated = int(M_prime/p)

    ## Sampled and scaled-up degree distribution
    k_prime = np.array([d for (_,d) in nx.degree(sampled_graph)])
    k_est = deg_MME_basic(k_seq=k_prime,sample_prob=probability_keep,as_int=True)

    if (redistribute_links==False):
        return np.array(k_est)

    ## "Left over" stubs from the rounding process
    k_spare = 2*M_estimated - sum(k_est)

    ## Distribute these random stubs if there are any
    if k_spare>0:
        sampled_nodes = random.sample(range(N_prime),k_spare)
        for node in sampled_nodes:
            k_est[node]+=1

    ## If we have given nodes more connections than there are links, randomly remove some
    if k_spare<0:
        non_isolated_nodes = list(filter(lambda ind: k_est[ind]>0, range(N_prime)))
        sampled_nodes = random.sample(non_isolated_nodes,abs(k_spare))
        for node in sampled_nodes:
            k_est[node]-=1
    
    return np.array(k_est) 

In [37]:
def monte_carlo_degree(sampled_graph,probability_keep):

    # Number of nodes/links in sampled graph
    N_prime, M_prime = sampled_graph.number_of_nodes(), sampled_graph.number_of_edges()

    observed_degree = np.array([d for (_,d) in nx.degree(sampled_graph)])
    estimated_degree = deg_MME(sampled_graph=sampled_graph, probability_keep=probability_keep,redistribute_links=True)

    ## Expected degree of sampled network according to binomial
    sampled_sequence = estimated_degree * probability_keep

    ## Sum of squared distances as base quality metric
    ssd_current = sum((sampled_sequence - observed_degree)**2)
    
    # Commence Monte Carlo process
    for i in range(15000):
        cts_accept=0

        ## Randomly rewire an edge
        [n1, n2] = random.sample(range(N_prime),2)
        # if i==1:
        #     print(n1,n2)

        ## Ensure we don't leave any isolated nodes
        while(estimated_degree[n1]<=1):
            [n1,n2] = random.sample(range(N_prime),2)
        estimated_degree[n1]-=1
        estimated_degree[n2]+=1

        sampled_sequence = estimated_degree * probability_keep
        ssd_new = sum((sampled_sequence - observed_degree)**2)

        ## Reject step if error is larger:
        if (ssd_new > ssd_current):
            estimated_degree[n1]+=1
            estimated_degree[n2]-=1
        else:
            ssd_current = ssd_new
            cts_accept+=1

    return estimated_degree, ssd_current



In [None]:
def bayes_approx(approx_prior,observed,prob_retain):
    prior = approx_prior/sum(approx_prior)
    posterior = np.zeros(len(prior))
    


In [39]:
N,M=1000,5000
G = nx.gnm_random_graph(N,M)
p_range = np.linspace(0.05,1.0,20)
for p in p_range:
    H = edge_sample(G,p)
    true_degrees = np.array([d for (n,d) in nx.degree(G) if n in H.nodes])
    deg, ssd_error = monte_carlo_degree(H,p)
    true_error = sum((deg - true_degrees)**2)
    if (p>0.25):
        print(deg)
    

0
0
0
0
0
0
[ 7 37 13 13  3  7 17 10 17 13  3 23 13 10 17 10 27 17 27  7 20 10 17 17
  7  3 23 13 26 20 16 27  3 10  3 17  7 10 10 10 17  7 13  7  6 10 10 10
 13 17 10  7 26 20 10 17 13 10  3 13 13 17  7  7 17 10 10  7 10  3  3 13
 10 10  3 17  3 13  3 16 13 17  7  7 13 20 20 10 17 17 16 14 14 13 13  7
  7 14 10 20  4 10  3  7 10  7  7 20 16  7 10  7 20 10  7 13 10 20 23 17
 10 13  7 17 10  6 13  4 24 13  7 10 10 14 13 23 17 13  3 13 20 13  7 10
 20 10  7 14 10 23  4 20 14 10 13  7 10  7 17 13 10 20 17 13 17 20 23 10
 10 13 14 17 17  3  7 13 20 10 10  6 10 10 13  7 10 10  3 10 13 16 10 13
 13  7 23 17  6  7 10 13  3 10  7 23 20 27 14  7 16 10 13 17 10 10 10  3
  7 17  7 10 10 20  7 13  6 10 13 17 16 10 13 17  3 10 10 16 17  7  7 17
  3 16 17 10  7  3  4  6  7 13 20 10 16  7  3  6 17 14  7 10  7 13 10 13
 17 17 13 10 13  3 10 13 10 10 24  7  7 10 24 13 16 17  3  3  4 13 10  7
 10  6 20  7 14  7 10 17 17  6 10  7 17  3 13  7 14 13 13 17 10  7 17  7
 13  3 16 13 10  3 13 13 23 17 17  3  7