# Synthetic Network Community Detection

In [76]:
from collections import Counter
import csv
import build_influence_network as bn
import infomap
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from matplotlib import pylab as pl
import networkx as nx
import numpy as np
import statistics as stat

In [3]:
# As a csv.reader is an expendible resource we write the lines of our csv file to a list
with open("influence_data.csv", "rt") as infile:
        reader = csv.reader(infile)
        # We discard the column labels as they are unneccesary 
        next(reader)
        inf_data = list(reader)

In [4]:
# As a csv.reader is an expendible resource we write the lines of our csv file to a list
with open("data_by_artist.csv", "rt") as infile:
        reader = csv.reader(infile)
        # We discard the column labels as they are unneccesary 
        next(reader)
        artist_data = list(reader)

In [5]:
graph = bn.build_graph(inf_data, artist_data)

Our graph is an acyclic directed graph: False
Name: 
Type: DiGraph
Number of nodes: 5603
Number of edges: 42770
Average in degree:   7.6334
Average out degree:   7.6334
Our artistic influence network has 5603 artists.
Our artistic influence network has 42770 edges out of a possible 31388006 possible edges.
Our network density is: 0.0013626223978675167
Our graph is strongly connected: False
The number of strongly connected components is: 4747
The average clustering coefficient is: 0.09128001728442253


In [6]:
in_degrees = [val for (node, val) in graph.in_degree()]
out_degrees = [val for (node, val) in graph.out_degree()]

In [72]:
def findCommunities(G, run_num):
    """
    Partition network with the Infomap algorithm.
    Annotates nodes with 'community' id and return number of communities found.
    """
  
    im = infomap.Infomap("-N 25 --directed --two-level")
    
    print("Building network...")
    for e in G.edges():
        im.add_node(int(e[0]))
        im.add_node(int(e[1]))
        im.add_link(int(e[0]),int(e[1]))

    # Cluster network
    im.run();
    
    comm_list = []
    for node in im.nodes:
        comm_list.append(node.module_id)
    comm_dict = Counter(comm_list)
    with open("configuration_model_communites_run_"+str(run_num)+".csv", mode='w') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=',', quotechar='|')
        csv_writer.writerow(["Community_Number", "Number_Nodes_in_Community"])
        for value in comm_dict:
            csv_writer.writerow([value, comm_dict[value]])
    
    comm_count = comm_dict.values()
    largest_comm_count = max(comm_count)
    num_comm = len(comm_dict)
    print("Run number " + str(run_num) + " found " + str(len(comm_dict)) + " communities in our synthetic network.")
    print("The largest community has " + str(largest_comm_count) + " members.")
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
    print("~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~")
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
    
    return num_comm, largest_comm_count

We generate a directed graph with a degree sequence that matches the degree sequence of our artistic influence network. This is done via the configuration model of <a href="https://journals.aps.org/pre/abstract/10.1103/PhysRevE.64.026118">Newman et. al.</a>
 

Please see the NetworkX documentation for details on how this is implemented in Python:
https://networkx.org/documentation/stable/reference/generated/networkx.generators.degree_seq.directed_configuration_model.html#networkx.generators.degree_seq.directed_configuration_model

Here we use the aforementioned configuration model to generate 100 synthetic networks. We then use the <a href="https://github.com/mapequation/infomap">python implementation</a> of the Infomap community detection of <a href="https://link.springer.com/article/10.1140%2Fepjst%2Fe2010-01179-1">Rosvall et. al.</a> to detect the number of communities present in our synthetic network. 

The results of each run are stored in configuration_model_communites_run_[x].csv, where x runs from 0 to 100. Inside you will find the number of communities detected as well as the number of nodes each community counts as members. 

In [75]:
run_num = 1
num_comm = []
largest_comm_count = []
while run_num <= 100:
    conf_model_graph = nx.directed_configuration_model(in_degrees, out_degrees)
    conf_model_graph.remove_edges_from(nx.selfloop_edges(D))
    inf = findCommunities(conf_model_graph,run_num)
    num_comm.append(inf[0])
    largest_comm_count.append(inf[1])
    run_num +=1

Building network...
Run number 1 found 554 communities in our synthetic network.
The largest community has 83 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 2 found 542 communities in our synthetic network.
The largest community has 79 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 3 found 560 communities in our synthetic network.
The largest community has 88 members.
~~~~~~~

Building network...
Run number 20 found 540 communities in our synthetic network.
The largest community has 78 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 21 found 534 communities in our synthetic network.
The largest community has 73 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 22 found 551 communities in our synthetic network.
The largest community has 64 members.
~~~~

Building network...
Run number 39 found 553 communities in our synthetic network.
The largest community has 80 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 40 found 562 communities in our synthetic network.
The largest community has 81 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 41 found 541 communities in our synthetic network.
The largest community has 115 members.
~~~

Building network...
Run number 58 found 515 communities in our synthetic network.
The largest community has 80 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 59 found 517 communities in our synthetic network.
The largest community has 97 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 60 found 542 communities in our synthetic network.
The largest community has 67 members.
~~~~

Building network...
Run number 77 found 542 communities in our synthetic network.
The largest community has 70 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 78 found 553 communities in our synthetic network.
The largest community has 80 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 79 found 571 communities in our synthetic network.
The largest community has 67 members.
~~~~

Building network...
Run number 96 found 561 communities in our synthetic network.
The largest community has 69 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 97 found 537 communities in our synthetic network.
The largest community has 75 members.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~~*~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building network...
Run number 98 found 550 communities in our synthetic network.
The largest community has 85 members.
~~~~

In [81]:
print("The mean number of communities in our 100 runs was found to be " + str(int(stat.mean(num_comm)))+".")
print("The mean numnber of members in the largest community was found to be " + str(int(stat.mean(largest_comm_count)))+".")

The mean number of communities in our 100 runs was found to be 543.
The mean numnber of members in the largest community was found to be 80.
