# ER Model Comparison

In [12]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline

In [13]:
G = nx.read_edgelist('../networkx_format_network.txt', comments='#',
                     create_using=nx.DiGraph(), 
                     delimiter=',', 
                     nodetype=int, 
                     encoding='utf-8')

In [18]:
G_undirected = nx.read_edgelist('../networkx_format_network.txt', comments='#',
                     create_using=nx.Graph(), 
                     delimiter=',', 
                     nodetype=int, 
                     encoding='utf-8')

## Basic Statistics of Network

### Plot Function

In [None]:
#TBD

### Degree (in+out)

In [11]:
N = len(G)
L = G.size()
degrees = [G.degree(node) for node in G]
kmin = min(degrees)
kmax = max(degrees)

In [12]:
print("Number of nodes: ", N)
print("Number of edges: ", L)
print()
print("Average degree: ", 2*L/N)
print("Average degree (alternate calculation)", np.mean(degrees))
print()
print("Minimum degree: ", kmin)
print("Maximum degree: ", kmax)

Number of nodes:  8954
Number of edges:  13998

Average degree:  3.12664730846549
Average degree (alternate calculation) 3.12664730846549

Minimum degree:  1
Maximum degree:  381


### In-Degree

In [12]:
in_degrees = [G.in_degree(node) for node in G]
k_in_min = min(in_degrees)
k_in_max = max(in_degrees)

In [13]:
print("Average In degree (alternate calculation)", np.mean(in_degrees))
print()
print("Minimum degree: ", k_in_min)
print("Maximum degree: ", k_in_max)

Average In degree (alternate calculation) 1.563323654232745

Minimum degree:  0
Maximum degree:  378


### Out-Degree

In [18]:
out_degrees = [G.out_degree(node) for node in G]
k_out_min = min(out_degrees)
k_out_max = max(out_degrees)

In [19]:
print("Average Out degree (alternate calculation)", np.mean(out_degrees))
print()
print("Minimum degree: ", k_out_min)
print("Maximum degree: ", k_out_max)

Average Out degree (alternate calculation) 1.563323654232745

Minimum degree:  0
Maximum degree:  105


## ER Network

In [47]:
ER_G = nx.gnp_random_graph(len(G),0.00036)
# this probability is what gets us the closest average k value

### Basics

In [48]:
N_ER = len(ER_G)
L_ER = ER_G.size()
degrees_er = [ER_G.degree(node) for node in ER_G]
kmin_er = min(degrees_er)
kmax_er = max(degrees_er)

print("Number of nodes: ", N_ER)
print("Number of edges: ", L_ER)
print()
print("Average degree: ", 2*L_ER/N_ER)
print("Average degree (alternate calculation)", np.mean(degrees_er))
print()
print("Minimum degree: ", kmin_er)
print("Maximum degree: ", kmax_er)

Number of nodes:  8954
Number of edges:  14407

Average degree:  3.218003127094036
Average degree (alternate calculation) 3.218003127094036

Minimum degree:  0
Maximum degree:  14


### Global Clustering Coefficient

In [49]:
clustering_coef = nx.average_clustering(ER_G)

### Transitivity

In [50]:
transitivity = nx.transitivity(ER_G)

### Average Path Length

In [51]:
avg_path_length = nx.average_shortest_path_length(ER_G)

NetworkXError: Graph is not connected.

In [23]:
import csv


In [26]:
clustering_coefs= []
transitivity_metrics = []
probability = 0.00036

for i in range(100):   
    print(i)
    ER_G = nx.gnp_random_graph(len(G),probability, directed=True)
    degrees_er = [ER_G.degree(node) for node in ER_G]
    avg_k = np.mean(degrees_er)
    if (avg_k >=3.08 and avg_k <=3.145):
        #Then run the calculate metrics here 
        transitivity_metrics.append(nx.transitivity(ER_G))
        clustering_coefs.append(nx.average_clustering(ER_G))
        # Persist the node degrees to a csv
        degrees = [ER_G.degree(node) for node in ER_G]
        out_degrees = [ER_G.out_degree(node) for node in ER_G]
        in_degrees = [ER_G.in_degree(node) for node in ER_G]
        
        with open('er_degrees.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(degrees) 
                
        with open('er_in_degrees.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(out_degrees) 
                
        with open('er_out_degrees.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(in_degrees) 
                
        
        
    else:
        # Find the ratio 
        ratio_change = 3.12664730846549 / avg_k
        probability = ratio_change * probability


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [2]:
import pandas as pd

In [27]:

c  = pd.DataFrame(clustering_coefs)
t = pd.DataFrame(transitivity_metrics)

c.to_csv("er_clustering.csv")
t.to_csv("er_transitivity.csv")

In [4]:
#load the Data in from Pandas and average it
c_df = pd.read_csv("clustering_coef_er_models")
t_df = pd.read_csv("transitivity")

In [6]:
c_df.tail()

Unnamed: 0.1,Unnamed: 0,0
624,624,0.000141
625,625,0.000254
626,626,0.000264
627,627,0.000513
628,628,0.000102


In [7]:
t_df.tail()

Unnamed: 0.1,Unnamed: 0,0
624,624,0.000278
625,625,0.000416
626,626,0.000354
627,627,0.000631
628,628,0.000206


In [10]:
c_df["0"].mean()

0.0002902406586596367

In [11]:
t_df["0"].mean()

0.00034953341610985004

In [15]:
#Actual Graph Transitivity
nx.transitivity(G)

0.020056849493865474

In [19]:
#Actual Graph Cluster Coefficient 
nx.average_clustering(G_undirected)

0.016744035180022453

In [21]:
nx.average_shortest_path_length(G_undirected)

NetworkXError: Graph is not connected.