In [1]:
import networkx as nx
import pandas as pd
import networkx.algorithms.community as c
from community import community_louvain
from collections import Counter

In [2]:
edges = pd.read_csv("../out/nikhilarundesai/edges.csv")
nodes = pd.read_csv("../out/nikhilarundesai/nodes.csv")
lookup_table = nodes[["Id", "screen_name"]]
id_lookup = {n["Id"]: n["screen_name"] for _, n in lookup_table.iterrows()}
G = nx.from_pandas_edgelist(edges, source="Source", target="Target")

In [3]:
partition = community_louvain.best_partition(G, random_state=0, resolution=2.0)

In [4]:
Counter(partition.values()).most_common()

[(0, 860),
 (3, 529),
 (10, 482),
 (6, 458),
 (19, 350),
 (13, 333),
 (4, 326),
 (30, 246),
 (12, 214),
 (23, 166),
 (1, 165),
 (27, 156),
 (9, 147),
 (28, 140),
 (26, 110),
 (2, 80),
 (8, 78),
 (16, 29),
 (22, 18),
 (38, 17),
 (29, 12),
 (34, 10),
 (17, 9),
 (14, 4),
 (32, 4),
 (41, 4),
 (21, 3),
 (36, 3),
 (40, 3),
 (42, 3),
 (18, 2),
 (20, 2),
 (25, 2),
 (31, 2),
 (43, 2),
 (44, 2),
 (45, 2),
 (49, 2),
 (7, 1),
 (11, 1),
 (24, 1),
 (33, 1),
 (35, 1),
 (37, 1),
 (39, 1),
 (46, 1),
 (47, 1),
 (48, 1),
 (50, 1),
 (5, 1),
 (15, 1)]

In [5]:
clusters = {}
for cluster_id in range(max(partition.values())+1):
    clusters[cluster_id] = {id_lookup[i] for i, c in partition.items() if c == cluster_id}

In [6]:
def print_cluster(cluster_dict, cluster_id):
    for name in sorted(cluster_dict[cluster_id]):
        print(name)

In [7]:
for cluster_id, count in Counter(partition.values()).most_common():
    print(f"Cluster {cluster_id}: {count} members")
    print_cluster(clusters, cluster_id)
    print("-----" * 20)

Cluster 0: 860 members
13pt
20under20
2plus2make5
3blue1brown
50an6xy06r6n
5harad
80000Hours
Aaroth
AdityaVis
Aelkus
AgBioWorld
AlecRad
AlgebraFact
AmanQA
AnalysisFact
Andrea__M
AndrewLBeam
AndrewYNg
AngeBassa
AnjanKatta
AnjneyMidha
AnshulSamar
ApacheSpark
ArtemisAgTech
AviBagla
AvicennaSJMA
Ayasdi
BMarcusMcCann
BenFriedmann
BetsyOgburn
BillGates
Bottlenecked_Y
BrendanShilling
CERN
CS90si
CapitalOneTech
CatalinVoss
ChrisRackauckas
CodeXStanford
CompSciFact
Compoundarxiv
DLdotHub
DShankar
DaniloJRezende
DaphneKoller
DataInstituteSF
DataIsBeautiful
DataJunkie
DataKind
DataKindSF
DavidDuvenaud
DeanHach
DeepMind
DegenRolf
Dharma_HQ
DiptyDesai
DistrictDataLab
DominoDataLab
DrDabholkar
DrDanielGillis
EALTurner
EAltruist
ECorner
ESYudkowsky
EdwardTufte
Elijah_Meeks
EricaKlarreich
FHIOxford
FieldsInstitute
Foone
FreedomeVPN
FunctorFact
GSVC
GabriellaG439
GaetanBurgio
GautamAtWork
GiveWell
GoMobileFirst
GoogleAI
GretchenAMcC
HackerNewsOnion
HardcoreHistory
HarlanH
HaskellOrg
HazyResearch
Helena