In [None]:
# In the following, we will look at community detection
# We use a dataset widely used in network analysis to illustrate 
# different methods; the dataset describes the friendships of members in a karate club
# We have seen different features of this network from the perspective of basic network characteristics
# Now we look at the possible communities we may find

In [None]:
# We start with the common libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

In [None]:
# Additionally to community detection, I want to show one package that could be used as an laternative for visualzing
# pyvis. This is not part of the main anaconda distribution, so we have to install it first
# As before, use anaconda prompt and write
# conda install -c conda-forge pyvis

import pyvis

In [None]:
# We can start by loading the network as it is part of networkx

karate_net = nx.karate_club_graph()

In [None]:
# In order to use the visualization tool of pyvis
# the first step is to create a Network object of the library
# First we initialize the object

karate_vis = pyvis.network.Network()

In [None]:
# Then we convert the format

karate_vis.from_nx(karate_net)

In [None]:
# Finally we visualize the network
# We need to give a name as an html file will be created in the folder where we currently work with the notebook
# The created visualization will pop-up in a new tab in your browser

karate_vis.show('karate.html')

In [None]:
# To see another example and what you can do with pyvis, here is the example from the official tutorial page
# There is no need to understand everything in this code here, you will not be required to create a similar visualization
# https://pyvis.readthedocs.io/en/latest/tutorial.html#example-visualizing-a-game-of-thrones-character-network

got_net = pyvis.network.Network(height='750px', width='100%', bgcolor='#222222', font_color='white')

# set the physics layout of the network
got_net.barnes_hut()
got_data = pd.read_csv('https://www.macalester.edu/~abeverid/data/stormofswords.csv')

sources = got_data['Source']
targets = got_data['Target']
weights = got_data['Weight']

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    got_net.add_node(src, src, title=src)
    got_net.add_node(dst, dst, title=dst)
    got_net.add_edge(src, dst, value=w)

neighbor_map = got_net.get_adj_list()

# add neighbor data to node hover data
for node in got_net.nodes:
    node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

got_net.show('gameofthrones.html')

In [None]:
# Going back to community detection, we can run edge betweenness algorithm as follows
# In networkx it is actually names after the researchers creating the algorithm
# In order to avoid lots of typing, we can import separately a subset of networkx focusing on communities

import networkx.algorithms.community as nx_comm

communities = nx_comm.girvan_newman(karate_net)

# As we can see it is a generator object 
# We will not discuss these in detail, you can just simply think them as something that you can iterate over
# An example that you are familiar with is for example range()

communities

In [None]:
# The way we will utilize the outcome is to convert it into a list
# Thisissimilar that you do with range for example, if you want to print it, an dnot simply use it in a loop

comm_list = list(communities)

In [None]:
# What we have in the list is one element for each step in running the community detection algorithm
# So the first element in the list is the two subcommunities we obtain when removing one edge
# The second element of the list is the three subcommunities that we ontain from the previous step removing one edge
# etc.

comm_list

In [None]:
# The focus here is not on visualization, but we can checl how to do it
# For example if we want to visualize two communities, 
# we need to assign a color to each node based on the community it belongs to
# First we extract the community, the first element of the list we created above

comm_2 = comm_list[0]

# We create an empty lost to store the colors
color_map = []

# Then we iterate over the nodes of the network, and if the node is in the first subcommunity, we assign blue
# otherwise we assign green

for node in karate_net:
    if node in comm_2[0]:
        color_map.append('blue')
    else: 
        color_map.append('green')  
        
# After this we can draw the network with different coloring specified in node_color parameter
nx.draw_kamada_kawai(karate_net, node_color=color_map, with_labels=True)

In [None]:
# We can do the same thing with three communities

comm_3 = comm_list[1]

# We create an empty lost to store the colors
color_map_3 = []

# Then we iterate over the nodes of the network, and if the node is in the first subcommunity, we assign blue
# if in the second subcommunity, we assign red, otherwise we assign green

for node in karate_net:
    if node in comm_3[0]:
        color_map_3.append('blue')
    elif node in comm_3[1] :
        color_map_3.append('red')
    else: 
        color_map_3.append('green')  
        
# After this we can draw the network with different coloring specified in node_color parameter
nx.draw_kamada_kawai(karate_net, node_color=color_map_3, with_labels=True)

In [None]:
# How do we detrmine which community structure to use?
# This is similar to using the number of clusters in clustering, there is no definite answer
# One useful measure we can use is modularity
# We can calcuate the modularity for each created community

modularity_list = []

for comm in comm_list:
    mod = nx_comm.modularity(karate_net, comm)
    modularity_list.append(mod)
    print('The modularity with', len(comm), 'subcommunities is', mod)
    

In [None]:
# As we can see from the list, we have the highest modularity with 5 communities
# We can also plot the values
sns.lineplot(x = range(33), y = modularity_list)

In [None]:
# We can also visualize the 5 communities

comm_5 = comm_list[3]

# We create an empty lost to store the colors
color_map_5 = []

# List of colors 
colors = ['blue', 'red', 'green', 'yellow', 'orange']

# Then we iterate over the nodes of the network, and also the 5 possible communities
# And get the color based on that

for node in karate_net:
    for i in range(len(comm_5)):
        if node in comm_5[i]:
            color_map_5.append(colors[i])
            
# Finally visualize the network
nx.draw_kamada_kawai(karate_net, node_color=color_map_5, with_labels=True)

In [None]:
# For later use, we can also create a list of membership labels
# eg. if node n is in community i, the the nth element of the list is i

comm_memb = []

for node in karate_net:
    for i in range(len(comm_5)):
        if node in comm_5[i]:
            comm_memb.append(i)
            
print(comm_memb)

In [None]:
# How we can use this? Let's load some additional data about club members

karate_data = pd.read_csv('karate_data.csv', sep = ';')

# We have information about the gender and age of the members
# And we know how much time they spend in the club per week and for how many days they have been members

karate_data.head()

In [None]:
# We can add the meberships as a new column

karate_data['memb'] = comm_memb

print(karate_data.head())

In [None]:
# From this point, the analysis becomes the same as we would do in clustering
# We have 5 distinct groups of users, and we want to know whether we can differentiate them 
# based on the available information we have
# So we group by the membership and look at the summaries of the different columns

comm_karate = karate_data.groupby('memb')

# We can check gender
comm_karate['Gender'].value_counts()

In [None]:
# The other variables we can look at with summary

comm_karate[['Hours in club/week', 'Days in club', 'Age']].agg('mean')