1. Graph with randomized edges (degree unpreserved) but unweighted
2. Graph with fixed, original edges (degree preserved) but weights are randomized

- Original graph measures on weighted graph (clustering coefficient, degree centrality, eigen centrality)

- Calculate clustering and path length (average and standard deviation)

In [2]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from random import seed
import statistics

In [3]:
nodes = pd.read_csv("subreddit_nodes.csv", index_col=0)
edges = pd.read_csv("subreddit_edges.csv", delimiter=';')

#Weighted graph
G_weighted = nx.Graph()
for index, row in nodes.iterrows():
    G_weighted.add_node(index, name=row['Label'], category=row['Category'])

for index, row in edges.iterrows():
    G_weighted.add_edge(row['Source'], row['Target'], weight=row['Weight'])


#print(f"Degrees are:${G.degree()}")

degrees = [val for (node,val) in G_weighted.degree()]
#print(degrees)


#Unweighted
G_unweighted = nx.Graph()
for index, row in nodes.iterrows():
    G_unweighted.add_node(index, name=row['Label'], category=row['Category'])

for index, row in edges.iterrows():
    G_unweighted.add_edge(row['Source'], row['Target'])


#Random graph
def randomWeightedGraph(nodes, edges):
    G = nx.Graph()
    for index, row in nodes.iterrows():
        G.add_node(index, name=row['Label'], category=row['Category'])

    for index, row in edges.iterrows():
        random_weight = random.randint(1,673)
        G.add_edge(row['Source'], row['Target'], weight=random_weight)
    
    return G


def getWeights(g):
    avg = 0
    for item in g.edges.data():
        avg+=item[2]['weight']
    print(avg/len(g.edges.data()))
    
    
#for i in range(5):
   # getWeights(randomWeightedGraph(nodes, edges))


In [None]:
#Graph with edges randomized (degree preserved), unweighted graph

G_unweighted_random = nx.expected_degree_graph(degrees, selfloops=False)
#print(G.edges.data())
#print(nx.info(G_unweighted_random))

clustering = []
path_length = []
RUNS = 1000


def getAveragePathLength(G):
    num_nodes = G.number_of_nodes()
    all_lengths = []
    
    for i in range(0, num_nodes):
        for j in range(i+1, num_nodes):
            try:
                shortest_path = nx.shortest_path_length(G, source=i, target=j)
                all_lengths.append(shortest_path)
            except:
                continue

    avg_shortest = statistics.mean(all_lengths)
    return avg_shortest
    
shortest_path = nx.shortest_path_length(G_unweighted_random, source=0, target=0)

for i in range(0, RUNS):
    G = nx.expected_degree_graph(degrees, selfloops=False)
    
    avg_clustering = nx.average_clustering(G)
    avg_path = getAveragePathLength(G)

    clustering.append(avg_clustering)
    path_length.append(avg_path)    
    
avg_clustering = statistics.mean(clustering)
avg_path_length = statistics.mean(path_length)
print(f'Average clustering coefficient: {avg_clustering}\nAverage path length: {avg_path_length}\nRuns: {RUNS}')




In [None]:
#Graph with fixed, original edges (degree preserved) but weights are randomized

#G = randomWeightedGraph(nodes, edges)
#G1 = randomWeightedGraph(nodes, edges)
#print(G.degree())

#clustering = []
#path_length = []