# setup

In [1]:
import numpy as np
import networkx as nx
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rc, rcParams
import scipy
import warnings
import csv
from IPython.display import display
warnings.filterwarnings('ignore')

%matplotlib inline

In [2]:
%load_ext autoreload
%aimport networks
from networks import RAGraph
%aimport log_bin
# helper functions
%aimport utils
font_size = 20
%autoreload 1

# Fixed N, varying m ks comparison

## numerical data sets

In [3]:
N = 10**6
M = [1, 2, 4, 8, 16, 32]
raw_degrees_all = []
for repeat in range(20):
    folder = 'data/ra/deg_dist' + str(repeat+1)
    data = []
    for m in M:
        fn = '{0}/{1}_{2}.txt'.format(folder, N, m)
        with open(fn, 'r') as f:
            reader = csv.reader(f)
            data_as_list = list(reader)
        x = data_as_list[0]
        x = [int(i) for i in x]
        data.append(x)
#     raw_degrees = np.array(data)
    raw_degrees_all.append(data)
raw_degrees_all = np.array(raw_degrees_all)

In [4]:
raw_degrees_all[1][5].max()

506

## synthetic datasets

In [5]:
folder = "data/ra/synthetic"
synthetic = []
for m in M:
    data = []
    for i in range(25):
        fn = "{0}/{1}_{2}_{3}.csv".format(folder, N, m, i)
        with open(fn, 'r') as f:
            reader = csv.reader(f)
            data_as_list = list(reader)
        x = data_as_list[0]
        x = [int(i) for i in x[:-1]]
        data.append(x)
    synthetic.append(data)
synthetic_degrees = np.array(synthetic)

In [6]:
for i, d in enumerate(raw_degrees_all[0]):
    reference = synthetic_degrees[i][0]
    ks, raw_p = scipy.stats.ks_2samp(d, reference)
    print(ks)

0.001024
0.000496
0.000795
0.001498
0.000684
0.000695


In [13]:
# temp = np.array(plist_all)
# pd.DataFrame(temp, columns=M).to_csv('data/ra/ks_test.csv')
# pd.read_csv('data/ba/ks_test.csv', index_col=0)

In [18]:
plist_df = pd.read_csv('data/ba/ks_test.csv', index_col=0)
plist_df.columns = plist_df.columns.astype(int)

In [21]:
plist_df.mean()

1     0.714
2     0.446
4     0.502
8     0.580
16    0.706
32    0.456
dtype: float64

## power law comparison

In [32]:
import plfit_py
import importlib
importlib.reload(plfit_py)
from plfit_py import plfit

In [47]:
for repeat in range(20):
    raw_degrees = raw_degrees_all[repeat]
    for i, d in enumerate(raw_degrees):
        mypl = plfit(np.array(raw_degrees[i]), xmin=M[i], silent=True, verbose=False)
        print(mypl.kstest_(raw_degrees[i])(M[i]))
    break

xmin: 1 n(>xmin): 1000000 alpha: 2.9687 +/- 0.0019687   Log-Likelihood: -830578   ks: 0.499916
0.999999
xmin: 2 n(>xmin): 1000000 alpha: 2.81838 +/- 0.00181838   Log-Likelihood: -1.64514e+06   ks: 0.332901
0.999999
xmin: 4 n(>xmin): 1000000 alpha: 2.74666 +/- 0.00174666   Log-Likelihood: -2.40111e+06   ks: 0.200248
0.999999
xmin: 8 n(>xmin): 1000000 alpha: 2.71116 +/- 0.00171116   Log-Likelihood: -3.12667e+06   ks: 0.124627
0.999999
xmin: 16 n(>xmin): 1000000 alpha: 2.69391 +/- 0.00169391   Log-Likelihood: -3.8359e+06   ks: 0.113165
0.999999
xmin: 32 n(>xmin): 1000000 alpha: 2.68531 +/- 0.00168531   Log-Likelihood: -4.53715e+06   ks: 0.10745
0.999999
