In [1]:
import pandas as pd
from wordcloud import WordCloud
from nltk.corpus import stopwords
import nltk
import re
from string import punctuation
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import community as community_louvain
import networkx as nx
from fa2 import ForceAtlas2
from collections import defaultdict
from nltk.corpus import wordnet as wn
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import seaborn as sns

<p style="font-family:Muro;font-weight:bold;font-size:35px;margin:10px 0px 10px 0px;color:#ADD8E6">Communities</p>

In [11]:
G = nx.readwrite.adjlist.read_adjlist("Data/GCC.gz")
#G.remove_node("nan")

In [12]:
#Using the .best_partition method of the louvain algorithm, randomize is set to false so the results stay uniform
partition = community_louvain.best_partition(G, randomize=False)

#Counting the amount of communities
count = len(set(partition.values()))

#As well as the modularity
modu = community_louvain.modularity(partition, G)

#Printing out the answers to these two questions.
print(f"We found {count} communities in total.")
print(f"The modularity of the partition created is {modu}.")

We found 11 communities in total.
The modularity of the partition created is 0.532073984565235.


In [7]:
communities = defaultdict(list)

# Group characters into their communities 
for character, community in zip(partition.keys(),partition.values()):
    communities[community].append(character)

In [8]:
named_communities = {}

for community in communities.values():
    sorted_com = sorted(community, key = lambda x : G.degree[x], reverse = True)
    name = ", ".join(sorted_com[:3]).replace("_", " ")
    print("----------")
    print(f"Community of {name}:")
    print("----------")
    for character in sorted_com[:10]:
        print(character.replace("_", " "))
    print()

----------
Community of Andy Bernard, List of IT guys, Walter Bernard Jr.:
----------
Andy Bernard
List of IT guys
Walter Bernard Jr.
Walter Bernard Sr.
Ellen Bernard
Nick
Sadiq
Cathy Simms
Tiffany
Unnamed Cousin

----------
Community of Erin Hannon, The Gypsy, Erin's Mother:
----------
Erin Hannon
The Gypsy
Erin's Mother
Erin's Father
Irene
The Killer
Glenn (Florida)
Reed

----------
Community of Dwight Schrute, Angela Martin, Robert Lipton:
----------
Dwight Schrute
Angela Martin
Robert Lipton
Mose Schrute
List of aliases
Garbage
Aunt Shirley
Meredith Palmer
Phillip Schrute
Isabel Poreba

----------
Community of Jim Halpert, Pam Beesly, Cecelia Halpert:
----------
Jim Halpert
Pam Beesly
Cecelia Halpert
Phillip Halpert
Helene Beesly
Betsy Halpert
William Beesly
Gerald Halpert
Penny Beesly
Sylvia

----------
Community of Robert California, Toby Flenderson, Kelly Kapoor:
----------
Robert California
Toby Flenderson
Kelly Kapoor
Darryl Philbin
Gabe Lewis
Todd Packer
Deangelo Vickers
Jo B

In [16]:
labels = {}
for community in communities.values():
    main_char = sorted(community, key = lambda x : G.degree[x], reverse = True)[0]
    labels[main_char] = main_char

In [13]:
# We run Force Atlas 2 to get positions for the nodes

forceatlas2 = ForceAtlas2(
    # Behavior alternatives
    outboundAttractionDistribution=True,  # Dissuade hubs
    linLogMode=False,  # NOT IMPLEMENTED
    adjustSizes=False,  # Prevent overlap (NOT IMPLEMENTED)
    edgeWeightInfluence=1.0,

    # Performance
    jitterTolerance=1.0,  # Tolerance
    barnesHutOptimize=True,
    barnesHutTheta=1.2,
    multiThreaded=False,  # NOT IMPLEMENTED

    # Tuning
    scalingRatio=2.0,
    strongGravityMode=False,
    gravity=1.0,

    # Log
    verbose=True)

positions = forceatlas2.forceatlas2_networkx_layout(G, pos=None, iterations=5000)

100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:13<00:00, 365.05it/s]

BarnesHut Approximation  took  2.54  seconds
Repulsion forces  took  9.95  seconds
Gravitational forces  took  0.13  seconds
Attraction forces  took  0.07  seconds
AdjustSpeedAndApplyForces step  took  0.49  seconds





In [14]:
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap

colors = ["slateblue","maroon", "darkorange", "gold", "lime", "forestgreen", "deepskyblue", "blue", "darkviolet", "hotpink", "red", "teal", "beige"]

newcmp = ListedColormap(colors[:count])

len(colors)

13

In [24]:
plt.figure(figsize=(15, 15))
plt.title("Force Atlas Representation of The Office network, with nodes colored by community", c='black',  y=1.0, pad=-20)
d = dict(G.degree)
nx.draw_networkx(G, positions, nodelist=list(d.keys()), node_size=[v*15 for v in d.values()], node_color = list(partition.values()), with_labels=False, edge_color='grey', cmap=newcmp, with_labels = True)
#nx.draw_networkx_labels(G, positions, labels, font_size=10)
plt.show()

SyntaxError: keyword argument repeated (<ipython-input-24-3e9b170258b5>, line 4)