In [34]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from networkx.algorithms.community.centrality import girvan_newman
from networkx import edge_betweenness_centrality
from networkx.algorithms.community import greedy_modularity_communities
import itertools
import community as community_louvain

In [35]:
#loading the dataset
nodes = pd.read_csv("subreddit_nodes.csv", index_col=0)
edges = pd.read_csv("subreddit_edges.csv", delimiter=';')
print(nodes)
print(edges)

            Label       Category
Id                              
0    "depression"     Discussion
1     "NFL_Draft"  Entertainment
2     "RepLadies"     Discussion
3          "ESFP"     Discussion
4           "UBC"    Educational
..            ...            ...
669       "tarot"  Entertainment
670  "activision"  Entertainment
671        "intj"      Lifestyle
672      "Baking"      Lifestyle
673    "shoebots"     Technology

[674 rows x 2 columns]
       Source  Target  Weight
0           0      65       1
1           0     108       1
2           0     126       1
3           0     131       1
4           0     142       1
...       ...     ...     ...
13135     664     669       1
13136     664     671       1
13137     666     671       1
13138     668     671       1
13139     669     671       1

[13140 rows x 3 columns]


In [36]:
# creating the graph
G = nx.Graph()
for index, row in nodes.iterrows():
  G.add_node(index, name=row['Label'], category=row['Category'])

for index, row in edges.iterrows():
    G.add_edge(row['Source'], row['Target'], weight=row['Weight'])

print("there are {} nodes and {} edges".format(len(G.nodes), len(G.edges)))

there are 674 nodes and 13140 edges


In [4]:
# detecting communities in unweighted graph using gready algorithm
greedy_communities_unweighted = list(greedy_modularity_communities(G))
node_groups = []
for com in greedy_communities_unweighted:
  node_groups.append(list(com))

print(node_groups)
print(len(node_groups))

[[5, 518, 7, 519, 520, 522, 11, 524, 525, 13, 527, 16, 17, 21, 533, 536, 25, 537, 28, 30, 543, 33, 34, 545, 37, 40, 555, 557, 559, 47, 560, 50, 566, 58, 59, 572, 63, 64, 575, 579, 68, 581, 583, 72, 585, 587, 75, 77, 81, 83, 85, 600, 601, 603, 604, 92, 605, 96, 100, 105, 106, 621, 109, 113, 629, 119, 632, 633, 121, 128, 139, 141, 143, 144, 146, 660, 662, 663, 156, 157, 158, 160, 672, 163, 166, 174, 176, 183, 185, 189, 195, 201, 203, 204, 214, 229, 230, 231, 232, 234, 235, 238, 247, 248, 250, 251, 255, 257, 263, 267, 269, 270, 273, 277, 278, 279, 281, 286, 294, 296, 297, 299, 300, 302, 303, 311, 313, 314, 320, 322, 325, 326, 331, 332, 338, 340, 341, 342, 356, 358, 363, 366, 367, 372, 378, 381, 384, 388, 393, 397, 398, 399, 407, 408, 411, 412, 415, 422, 426, 429, 436, 441, 460, 461, 465, 472, 475, 477, 483, 485, 490, 498, 500, 504, 505, 507, 510], [512, 513, 260, 4, 261, 6, 266, 15, 532, 22, 538, 26, 282, 541, 32, 289, 546, 35, 548, 38, 552, 43, 45, 46, 306, 563, 569, 571, 318, 319, 580, 

In [5]:
#parsing the community detection results using gready algorithm for unweighted graph
greedy_community_unweighted = []
for node in range(len(G.nodes)):
    for g_index in range(len(node_groups)):
        if node in node_groups[g_index]:
            greedy_community_unweighted.append(g_index)
print(greedy_community_unweighted)
print(len(greedy_community_unweighted))
nodes['greedy_community_unweighted'] = greedy_community_unweighted

[7, 2, 5, 4, 1, 0, 1, 0, 3, 8, 5, 0, 4, 0, 2, 1, 0, 0, 3, 9, 8, 0, 1, 4, 3, 0, 1, 3, 0, 9, 0, 10, 1, 0, 0, 1, 2, 0, 1, 3, 0, 5, 3, 1, 2, 1, 1, 0, 6, 9, 0, 8, 9, 5, 2, 9, 3, 3, 0, 0, 3, 2, 4, 0, 0, 7, 3, 6, 0, 1, 6, 1, 0, 2, 3, 0, 3, 0, 7, 9, 10, 0, 7, 0, 8, 0, 5, 2, 4, 7, 1, 10, 0, 3, 1, 7, 0, 1, 2, 1, 0, 4, 4, 8, 2, 0, 0, 1, 7, 0, 2, 2, 3, 0, 6, 10, 2, 3, 6, 0, 3, 0, 2, 1, 3, 3, 7, 5, 0, 2, 1, 7, 1, 1, 3, 3, 4, 4, 1, 0, 1, 0, 7, 0, 0, 1, 0, 2, 1, 3, 3, 3, 2, 2, 1, 4, 0, 0, 0, 7, 0, 1, 4, 0, 6, 7, 0, 5, 6, 4, 2, 5, 3, 6, 0, 2, 0, 3, 4, 3, 2, 10, 1, 0, 2, 0, 8, 5, 2, 0, 9, 6, 7, 3, 2, 0, 2, 7, 1, 6, 2, 0, 6, 0, 0, 5, 2, 1, 5, 1, 4, 1, 5, 4, 0, 1, 7, 6, 2, 2, 1, 2, 1, 3, 9, 7, 6, 5, 5, 0, 0, 0, 0, 1, 0, 0, 8, 8, 0, 5, 10, 4, 2, 6, 2, 6, 6, 0, 0, 6, 0, 0, 8, 2, 1, 0, 7, 0, 2, 2, 1, 1, 2, 0, 5, 8, 1, 0, 8, 0, 0, 3, 5, 0, 3, 6, 7, 0, 0, 0, 8, 0, 1, 9, 2, 2, 0, 6, 5, 1, 6, 6, 3, 3, 0, 4, 0, 0, 9, 0, 0, 5, 0, 0, 5, 6, 1, 7, 5, 4, 3, 0, 2, 0, 0, 2, 10, 2, 1, 1, 0, 8, 0, 5, 6, 0, 0, 3, 1, 9, 9,

In [6]:
#getting the list of the categories
categories = nodes['Category'].unique()
print(categories)

['Discussion' 'Entertainment' 'Educational' 'Technology' 'Humor' 'Other'
 'Lifestyle' 'Defunct' 'General' 'Hobbies/Occupations' 'Animals' 'NSFW']


In [7]:
# getting the percentage of each category
category_weights = []
total_nodes = len(nodes)
for category in categories:
    weight = len(nodes.loc[nodes['Category'] == category])/total_nodes
    category_weights.append(weight)

In [12]:
# calculating the number and percentage of each category in each community detected using greedy algorithm
community_percentage = pd.DataFrame()
community_percentage_weight = pd.DataFrame()
community_category_count = pd.DataFrame()
for index in range(len(node_groups)):
    com = nodes.loc[nodes['greedy_community_unweighted'] == index]
    print('Result for community {}'.format(index))
    dict_percentage = {}
    dict_community_category_count = {}
    for category in categories:
        percentage = float("{:0.2f}".format(len(com.loc[com['Category'] == category])/len(com)))
        category_count = len(com.loc[com['Category'] == category])
        dict_percentage[category] = percentage
        dict_community_category_count[category] = int(category_count)
        print('Category: {} in Community {}: Percentage = {:0.2f}, {:0.0f}'.format(category, index, percentage, category_count))
    print(dict_percentage)
    community_percentage = community_percentage.append(dict_percentage, ignore_index = True)
    community_category_count = community_category_count.append(dict_community_category_count, ignore_index = True)
    print()
    print()

Result for community 0
Category: Discussion in Community 0: Percentage = 0.07, 13
Category: Entertainment in Community 0: Percentage = 0.32, 57
Category: Educational in Community 0: Percentage = 0.12, 22
Category: Technology in Community 0: Percentage = 0.05, 8
Category: Humor in Community 0: Percentage = 0.08, 14
Category: Other in Community 0: Percentage = 0.10, 17
Category: Lifestyle in Community 0: Percentage = 0.14, 25
Category: Defunct in Community 0: Percentage = 0.01, 2
Category: General in Community 0: Percentage = 0.02, 3
Category: Hobbies/Occupations in Community 0: Percentage = 0.03, 5
Category: Animals in Community 0: Percentage = 0.02, 4
Category: NSFW in Community 0: Percentage = 0.04, 7
{'Discussion': 0.07, 'Entertainment': 0.32, 'Educational': 0.12, 'Technology': 0.05, 'Humor': 0.08, 'Other': 0.1, 'Lifestyle': 0.14, 'Defunct': 0.01, 'General': 0.02, 'Hobbies/Occupations': 0.03, 'Animals': 0.02, 'NSFW': 0.04}


Result for community 1
Category: Discussion in Community 1:

Category: Discussion in Community 9: Percentage = 0.04, 1
Category: Entertainment in Community 9: Percentage = 0.44, 11
Category: Educational in Community 9: Percentage = 0.00, 0
Category: Technology in Community 9: Percentage = 0.00, 0
Category: Humor in Community 9: Percentage = 0.20, 5
Category: Other in Community 9: Percentage = 0.28, 7
Category: Lifestyle in Community 9: Percentage = 0.04, 1
Category: Defunct in Community 9: Percentage = 0.00, 0
Category: General in Community 9: Percentage = 0.00, 0
Category: Hobbies/Occupations in Community 9: Percentage = 0.00, 0
Category: Animals in Community 9: Percentage = 0.00, 0
Category: NSFW in Community 9: Percentage = 0.00, 0
{'Discussion': 0.04, 'Entertainment': 0.44, 'Educational': 0.0, 'Technology': 0.0, 'Humor': 0.2, 'Other': 0.28, 'Lifestyle': 0.04, 'Defunct': 0.0, 'General': 0.0, 'Hobbies/Occupations': 0.0, 'Animals': 0.0, 'NSFW': 0.0}


Result for community 10
Category: Discussion in Community 10: Percentage = 0.16, 3
Category: E

In [13]:
# creating csv files for communities in unweighted graph using gready algorithm
print(community_percentage)
community_percentage.to_csv("greedy_community_unweighted_percentage.csv", sep=',')
community_category_count.to_csv("greedy_community_unweighted_category_count.csv", sep=',')

    Discussion  Entertainment  Educational  Technology  Humor  Other  \
0         0.07           0.32         0.12        0.05   0.08   0.10   
1         0.06           0.15         0.35        0.06   0.00   0.18   
2         0.10           0.18         0.12        0.08   0.04   0.13   
3         0.03           0.38         0.08        0.20   0.06   0.14   
4         0.10           0.31         0.02        0.10   0.02   0.08   
5         0.06           0.21         0.13        0.11   0.02   0.06   
6         0.14           0.08         0.08        0.11   0.08   0.19   
7         0.06           0.29         0.03        0.03   0.06   0.19   
8         0.07           0.43         0.04        0.00   0.18   0.18   
9         0.04           0.44         0.00        0.00   0.20   0.28   
10        0.16           0.00         0.21        0.05   0.05   0.05   

    Lifestyle  Defunct  General  Hobbies/Occupations  Animals  NSFW  
0        0.14     0.01     0.02                 0.03     0.02  0.

In [14]:
# detecting community in weighted graph using gready algorithm
greedy_communities_weighted = list(greedy_modularity_communities(G, weight = 'weight'))
node_groups = []
for com in greedy_communities_weighted:
  node_groups.append(list(com))

print(node_groups)
print(len(node_groups))

[[1, 518, 7, 520, 521, 524, 13, 525, 14, 16, 17, 18, 533, 534, 23, 536, 537, 24, 544, 545, 548, 37, 36, 38, 39, 559, 560, 566, 54, 58, 572, 62, 63, 575, 64, 576, 67, 69, 70, 581, 582, 73, 589, 81, 595, 87, 603, 93, 94, 96, 98, 100, 615, 616, 105, 104, 618, 620, 621, 110, 113, 627, 629, 632, 122, 638, 641, 642, 132, 644, 647, 651, 653, 143, 144, 145, 146, 657, 147, 663, 665, 153, 156, 668, 158, 672, 166, 168, 170, 175, 176, 180, 182, 183, 184, 185, 188, 194, 200, 201, 206, 214, 217, 229, 231, 232, 238, 242, 243, 244, 247, 249, 250, 253, 258, 259, 260, 262, 263, 267, 270, 274, 279, 281, 284, 285, 286, 292, 293, 294, 297, 302, 311, 312, 315, 326, 335, 336, 337, 338, 340, 346, 350, 352, 358, 361, 363, 366, 367, 368, 378, 381, 382, 383, 384, 387, 388, 392, 393, 394, 398, 399, 400, 405, 418, 419, 429, 436, 446, 451, 456, 457, 461, 467, 469, 473, 475, 477, 480, 482, 490, 498, 499, 500, 501, 510], [512, 257, 513, 4, 261, 519, 266, 522, 523, 11, 15, 273, 22, 25, 538, 282, 26, 28, 541, 32, 289, 

In [15]:
# parsing the community detection results using gready algorithm for weighted graph 
greedy_community_weighted = []
for node in range(len(G.nodes)):
    for g_index in range(len(node_groups)):
        if node in node_groups[g_index]:
            greedy_community_weighted.append(g_index)
print(greedy_community_weighted)
print(len(greedy_community_weighted))
nodes['greedy_community_weighted'] = greedy_community_weighted

[7, 0, 5, 6, 1, 5, 10, 0, 2, 4, 5, 1, 6, 0, 0, 1, 0, 0, 0, 9, 4, 4, 1, 0, 0, 1, 1, 2, 1, 9, 4, 8, 1, 3, 5, 1, 0, 0, 0, 0, 3, 5, 2, 1, 1, 1, 1, 3, 2, 9, 3, 4, 9, 5, 0, 9, 2, 2, 0, 3, 2, 3, 0, 0, 0, 7, 2, 0, 3, 0, 0, 1, 1, 0, 2, 3, 2, 3, 7, 9, 8, 0, 7, 3, 4, 1, 5, 0, 6, 7, 5, 8, 1, 0, 0, 7, 0, 1, 0, 1, 0, 6, 6, 4, 0, 0, 4, 8, 7, 3, 0, 3, 2, 0, 1, 8, 3, 2, 2, 4, 2, 4, 0, 10, 2, 2, 7, 5, 3, 3, 8, 7, 0, 8, 2, 2, 6, 6, 1, 3, 8, 4, 7, 0, 0, 0, 0, 0, 1, 2, 2, 2, 1, 0, 1, 6, 0, 3, 0, 7, 3, 1, 6, 5, 2, 7, 0, 5, 0, 6, 0, 5, 2, 2, 4, 0, 0, 2, 6, 2, 0, 8, 0, 0, 0, 0, 4, 5, 0, 3, 9, 2, 7, 2, 0, 3, 1, 7, 1, 2, 0, 0, 1, 3, 4, 5, 0, 10, 5, 1, 6, 1, 5, 6, 0, 1, 7, 0, 3, 3, 10, 3, 1, 2, 9, 7, 2, 5, 5, 0, 1, 0, 0, 8, 3, 1, 4, 4, 0, 5, 8, 6, 0, 0, 0, 2, 2, 0, 4, 0, 0, 3, 4, 0, 1, 3, 7, 1, 0, 0, 0, 1, 0, 0, 5, 4, 1, 0, 4, 3, 0, 2, 5, 1, 0, 2, 7, 4, 3, 0, 4, 0, 1, 9, 0, 0, 0, 2, 5, 1, 2, 1, 0, 0, 0, 6, 4, 0, 9, 3, 4, 5, 0, 3, 5, 2, 10, 7, 5, 6, 2, 0, 0, 1, 1, 0, 8, 1, 1, 1, 3, 4, 3, 5, 2, 4, 0, 2, 1, 9, 9, 3

In [16]:
# calculating the number and percentage of each category in each community detected using greedy algorithm 
community_percentage = pd.DataFrame()
community_percentage_weight = pd.DataFrame()
community_category_count = pd.DataFrame()
for index in range(len(node_groups)):
    com = nodes.loc[nodes['greedy_community_weighted'] == index]
    print('Result for community {}'.format(index))
    dict_percentage = {}
    dict_community_category_count = {}
    for category in categories:
        percentage = float("{:0.2f}".format(len(com.loc[com['Category'] == category])/len(com)))
        category_count = len(com.loc[com['Category'] == category])
        dict_percentage[category] = percentage
        dict_community_category_count[category] = int(category_count)
        print('Category: {} in Community {}: Percentage = {:0.2f}, {:0.0f}'.format(category, index, percentage, category_count))
    print(dict_percentage)
    community_percentage = community_percentage.append(dict_percentage, ignore_index = True)
    community_category_count = community_category_count.append(dict_community_category_count, ignore_index = True)
    print()
    print()

Result for community 0
Category: Discussion in Community 0: Percentage = 0.08, 14
Category: Entertainment in Community 0: Percentage = 0.25, 47
Category: Educational in Community 0: Percentage = 0.10, 19
Category: Technology in Community 0: Percentage = 0.05, 10
Category: Humor in Community 0: Percentage = 0.06, 12
Category: Other in Community 0: Percentage = 0.11, 21
Category: Lifestyle in Community 0: Percentage = 0.19, 36
Category: Defunct in Community 0: Percentage = 0.01, 2
Category: General in Community 0: Percentage = 0.01, 2
Category: Hobbies/Occupations in Community 0: Percentage = 0.11, 20
Category: Animals in Community 0: Percentage = 0.02, 3
Category: NSFW in Community 0: Percentage = 0.00, 0
{'Discussion': 0.08, 'Entertainment': 0.25, 'Educational': 0.1, 'Technology': 0.05, 'Humor': 0.06, 'Other': 0.11, 'Lifestyle': 0.19, 'Defunct': 0.01, 'General': 0.01, 'Hobbies/Occupations': 0.11, 'Animals': 0.02, 'NSFW': 0.0}


Result for community 1
Category: Discussion in Community 1

Category: Humor in Community 10: Percentage = 0.00, 0
Category: Other in Community 10: Percentage = 0.07, 1
Category: Lifestyle in Community 10: Percentage = 0.07, 1
Category: Defunct in Community 10: Percentage = 0.07, 1
Category: General in Community 10: Percentage = 0.00, 0
Category: Hobbies/Occupations in Community 10: Percentage = 0.07, 1
Category: Animals in Community 10: Percentage = 0.00, 0
Category: NSFW in Community 10: Percentage = 0.00, 0
{'Discussion': 0.0, 'Entertainment': 0.33, 'Educational': 0.2, 'Technology': 0.2, 'Humor': 0.0, 'Other': 0.07, 'Lifestyle': 0.07, 'Defunct': 0.07, 'General': 0.0, 'Hobbies/Occupations': 0.07, 'Animals': 0.0, 'NSFW': 0.0}




In [17]:
# creating csv files for communities in weighted graph using gready algorithm
print(community_percentage)
community_percentage.to_csv("greedy_community_weighted_percentage.csv", sep=',')
community_category_count.to_csv("greedy_community_weighted_category_count.csv", sep=',')

    Discussion  Entertainment  Educational  Technology  Humor  Other  \
0         0.08           0.25         0.10        0.05   0.06   0.11   
1         0.11           0.12         0.37        0.03   0.03   0.17   
2         0.07           0.32         0.07        0.20   0.09   0.14   
3         0.09           0.21         0.12        0.11   0.02   0.16   
4         0.04           0.44         0.07        0.00   0.12   0.14   
5         0.06           0.23         0.13        0.10   0.02   0.08   
6         0.10           0.31         0.02        0.10   0.02   0.08   
7         0.07           0.30         0.03        0.03   0.07   0.20   
8         0.12           0.04         0.20        0.08   0.04   0.04   
9         0.04           0.44         0.00        0.00   0.20   0.28   
10        0.00           0.33         0.20        0.20   0.00   0.07   

    Lifestyle  Defunct  General  Hobbies/Occupations  Animals  NSFW  
0        0.19     0.01     0.01                 0.11     0.02  0.

In [27]:
# detecting community in weighted graph using Louvain algorithm
partition = community_louvain.best_partition(G, resolution=1.2, weight = 'weight')
node_groups = []
node_groups = set(list(partition.values()))
print(len(node_groups))
louvain_community_weighted = []
for com in partition.values():
  louvain_community_weighted.append(com)

print(louvain_community_weighted)
print(len(louvain_community_weighted))
nodes['louvain_community_weighted'] = louvain_community_weighted

12
[0, 7, 1, 11, 4, 1, 6, 7, 8, 9, 1, 2, 11, 7, 9, 4, 7, 7, 7, 3, 9, 7, 4, 11, 7, 2, 4, 8, 2, 3, 2, 7, 4, 2, 1, 4, 7, 2, 7, 4, 2, 1, 8, 4, 4, 4, 4, 2, 10, 3, 2, 9, 3, 1, 7, 3, 8, 8, 7, 2, 8, 5, 11, 7, 7, 0, 8, 7, 2, 7, 7, 4, 2, 7, 8, 2, 8, 2, 0, 3, 7, 7, 0, 2, 9, 4, 1, 8, 11, 0, 4, 7, 2, 7, 7, 0, 7, 4, 7, 4, 7, 11, 11, 9, 7, 7, 4, 7, 0, 2, 7, 5, 8, 2, 4, 7, 5, 8, 10, 7, 8, 4, 11, 6, 8, 8, 0, 1, 2, 5, 7, 0, 7, 7, 8, 8, 11, 11, 4, 2, 7, 7, 0, 7, 7, 7, 1, 7, 4, 8, 8, 8, 4, 7, 4, 11, 2, 2, 7, 0, 2, 4, 11, 1, 10, 0, 7, 1, 7, 11, 7, 1, 8, 10, 7, 7, 7, 8, 11, 8, 8, 7, 7, 7, 4, 7, 9, 1, 9, 2, 3, 8, 0, 8, 7, 2, 4, 0, 4, 10, 7, 7, 4, 2, 7, 1, 7, 6, 1, 4, 8, 4, 1, 11, 2, 4, 0, 7, 5, 5, 6, 5, 4, 8, 3, 0, 10, 1, 1, 2, 2, 7, 2, 7, 2, 2, 9, 9, 2, 1, 7, 11, 7, 7, 7, 10, 10, 7, 2, 7, 0, 2, 9, 7, 4, 2, 0, 2, 7, 7, 7, 4, 7, 7, 1, 9, 4, 7, 9, 2, 7, 8, 7, 2, 9, 10, 0, 2, 2, 1, 1, 2, 4, 3, 7, 7, 7, 8, 1, 4, 10, 4, 9, 7, 7, 11, 1, 2, 3, 2, 2, 1, 7, 2, 1, 8, 6, 0, 7, 11, 8, 7, 7, 2, 2, 7, 7, 4, 4, 4, 2, 9, 2,

In [28]:
# calculating the number and percentage of each category in each community detected using Louvain algorithm 
community_percentage = pd.DataFrame()
community_percentage_weight = pd.DataFrame()
community_category_count = pd.DataFrame()
for index in range(len(node_groups)):
    com = nodes.loc[nodes['louvain_community_weighted'] == index]
    print('Result for community {}'.format(index))
    dict_percentage = {}
    dict_community_category_count = {}
    for category in categories:
        percentage = float("{:0.2f}".format(len(com.loc[com['Category'] == category])/len(com)))
        category_count = len(com.loc[com['Category'] == category])
        dict_percentage[category] = percentage
        dict_community_category_count[category] = int(category_count)
        print('Category: {} in Community {}: Percentage = {:0.2f}, {:0.0f}'.format(category, index, percentage, category_count))
    print(dict_percentage)
    community_percentage = community_percentage.append(dict_percentage, ignore_index = True)
    community_category_count = community_category_count.append(dict_community_category_count, ignore_index = True)
    print()
    print()

Result for community 0
Category: Discussion in Community 0: Percentage = 0.06, 2
Category: Entertainment in Community 0: Percentage = 0.29, 9
Category: Educational in Community 0: Percentage = 0.03, 1
Category: Technology in Community 0: Percentage = 0.03, 1
Category: Humor in Community 0: Percentage = 0.10, 3
Category: Other in Community 0: Percentage = 0.19, 6
Category: Lifestyle in Community 0: Percentage = 0.10, 3
Category: Defunct in Community 0: Percentage = 0.00, 0
Category: General in Community 0: Percentage = 0.03, 1
Category: Hobbies/Occupations in Community 0: Percentage = 0.13, 4
Category: Animals in Community 0: Percentage = 0.03, 1
Category: NSFW in Community 0: Percentage = 0.00, 0
{'Discussion': 0.06, 'Entertainment': 0.29, 'Educational': 0.03, 'Technology': 0.03, 'Humor': 0.1, 'Other': 0.19, 'Lifestyle': 0.1, 'Defunct': 0.0, 'General': 0.03, 'Hobbies/Occupations': 0.13, 'Animals': 0.03, 'NSFW': 0.0}


Result for community 1
Category: Discussion in Community 1: Percenta

In [29]:
# creating csv files for communities in weighted graph using louvain algorithm
print(community_percentage)
community_percentage.to_csv("louvain_community_weighted_percentage.csv", sep=',')
community_category_count.to_csv("louvain_community_weighted_category_count.csv", sep=',')

    Discussion  Entertainment  Educational  Technology  Humor  Other  \
0         0.06           0.29         0.03        0.03   0.10   0.19   
1         0.06           0.28         0.09        0.09   0.02   0.09   
2         0.05           0.33         0.09        0.04   0.13   0.11   
3         0.04           0.41         0.00        0.00   0.26   0.26   
4         0.11           0.05         0.37        0.03   0.00   0.21   
5         0.15           0.00         0.23        0.23   0.00   0.23   
6         0.00           0.33         0.20        0.20   0.00   0.07   
7         0.07           0.24         0.13        0.06   0.02   0.10   
8         0.05           0.38         0.03        0.25   0.08   0.15   
9         0.09           0.47         0.09        0.00   0.15   0.12   
10        0.14           0.10         0.14        0.05   0.10   0.14   
11        0.10           0.27         0.04        0.08   0.02   0.08   

    Lifestyle  Defunct  General  Hobbies/Occupations  Animals  

In [30]:
# creating csv files for final community result
nodes.to_csv("Community.csv", sep=',')

In [37]:
# get the number of connected components
G.remove_node(185)
connected_component = nx.number_connected_components(G)
print(connected_component)
print(nx.is_connected(G))

10
False


In [40]:
#get the size of the components
component_size = [len(c) for c in sorted(nx.connected_components(G), key=len, reverse=True)]
print(component_size)

[653, 5, 3, 3, 2, 2, 2, 1, 1, 1]
