In [1]:
from collections import Counter

import networkx as nx
import networkx.algorithms.community as c
import pandas as pd
from community import community_louvain

In [2]:
edges = pd.read_csv("../out/nikhilarundesai/edges.csv")

In [3]:
nodes = pd.read_csv("../out/nikhilarundesai/nodes.csv")

In [4]:
lookup_table = nodes[["Id", "screen_name"]]
id_lookup = {n["Id"]: n["screen_name"] for _, n in lookup_table.iterrows()}

In [5]:
G = nx.from_pandas_edgelist(edges, source="Source", target="Target")

In [6]:
partition = community_louvain.best_partition(G, random_state=0)

In [7]:
len(partition)

4988

In [8]:
type(partition)

dict

In [9]:
Counter(partition.values())

Counter({4: 1180, 6: 1020, 5: 577, 0: 322, 2: 876, 1: 671, 3: 342})

In [10]:
clusters = {}
for cluster_id in range(7):
    clusters[cluster_id] = {id_lookup[i] for i, c in partition.items() if c == cluster_id}

In [11]:
def print_cluster(cluster_dict, cluster_id):
    for name in sorted(cluster_dict[cluster_id]):
        print(name)

In [12]:
print_cluster(clusters, 6) # Politicians, think tanks, US news reporters

1a
2020Senate
538politics
AECFNews
AEWK
AFLCIO
AGMarkHerring
AJentleson
ASlavitt
AaronBlake
AaronDFordNV
Acosta
AdamKFetcher
Adorably_Nerdy
AftabPureval
AislingMcDL
AlanGreenblatt
AlecMacGillis
AlexConant
AlexPappas
AlexParkerDC
AlexRowell
Alex_Roarty
Alexruoff
AliLapp
AlyssaMastro44
Alyssafarah
AmericasVoice
Ameya_Pawar_IL
AndreaBozek
AndrewCPEAR
AndyBerke
AnnBeesonTX
AnnetteMagnus
AntBejarano
AnthonyMKreis
AriBerman
AshLeeStrong
AshleyRParker
AudreyLPerry
BCAppelbaum
BFriedmanDC
BPC_Bipartisan
B_RadWill
Bakari_Sellers
Bbrock67Brock
BeauWillimon
BenMarter
BenMcAdams
BenMcAdamsUT
BenWinslow
Bencjacobs
BenjySarlin
Bennett_R_Smith
BetoORourke
BettyMcCollum04
BillCassidy
BillClinton
BillHalter
BilldeBlasio
BlueCollarDems
BobGreensteinDC
BobSalera
BobbyCalvan
BobbyScott
BoutrousTed
BowlesSimpson
BrendanBuck
BrennanCenter
BrianCDeese
BrookingsGov
BrunetJagoda
BryanLowry3
BudgetHawks
Burgwinkle
ByFernandaS
CEdwardsTX
CMSgovPress
CPADisclosure
CQnow
CREWcrew
CV0
CabouJ
CaitGalla
Carrasquillo


In [13]:
print_cluster(clusters, 5) # Climate, energy, geoscience, outdoors, nature

350
350_bangladesh
50Reefs
AGU_Eos
AP_Oddities
AarneGranlund
AaronPomerantz
Al_Humphreys
AlanaMGrech
AlaskaWx
AlexHonnold
AlexSteffen
Allochthonous
Alluvium_AU
AlongsideWild
Amardevsingh
AmyAHarder
AndrewDessler
AngieLCarter
AntarcticaNZ
ArcticCouncil
ArcticDeeply
AriPeskoe
AstroKatie
AtticaMelbourne
BD_Stew
BLMOregon
BLMca
Backcountry_H_A
BadlandsNPS
BayAreaClimate
BenLong1967
BhamlaLab
BittuSahgal
BloombergNEF
BlueBizCouncil
BrendaEkwurzel
BrucejDuncan
BryanCranston
BuzzFeedStorm
ByIanJames
CCLSanFrancisco
CFigueres
CPPGeophysics
CSClapp
CaitlinInMaine
CarbonBrief
CarbonWrangler
CasinaPioIV
Cataranea
CenterForBioDiv
ChnEnergyPortal
ChristinaToms
ChristopherWr11
CinderBDT907
Cipotato
CleanGridView
ClimSciDefense
ClimateAdam
ClimateCoLab
ClimateDesk
ClimateNPS
ClimateOfGavin
ClimatePower
ClimateReality
ClimateWeave
Climate_Center
Climatologist49
CocoNell2
ConservAmerica
ConservationLF
CoralMDavenport
CostaSamaras
CourtECarpenter
CraigAWelch
CraterLakeNPS
DByers21
DalaiLama
DanBarber
Da

In [14]:
print_cluster(clusters, 4) # Catchall. Scientists, people I know personally, institutional accounts. Needs to be decomposed.

13pt
18F
20under20
2plus2make5
3blue1brown
50an6xy06r6n
538forecastbot
5harad
80000Hours
A_L
AaronJ254
Aaroth
AdamJKucharski
AdityaVis
Aelkus
AfiaZKhan
AgBioWorld
AlecRad
AlekaGurel
AlgebraFact
AmanQA
AnalysisFact
Andrea__M
AndrewJohnMarks
AndrewLBeam
AndrewYNg
AndreyOstrovsky
AngeBassa
AnjanKatta
AnjneyMidha
AnshulSamar
AnupamBJena
ApacheSpark
ArtemisAgTech
Ashoka
Atul_Gawande
AviBagla
AvicennaSJMA
Ayasdi
BCPPres
BMarcusMcCann
BarackObama
BellarmineBells
BenFriedmann
BetsyOgburn
BillGates
Bob_Wachter
Bottlenecked_Y
BradSpahn
BrendanShilling
CALandscapeBot
CAViewshedBot
CCARE
CDCgov
CERN
CS90si
CWakulchik
CapitalOneTech
CatalinVoss
CensusReporter
Cezary
ChicagoCDO
ChicagoCIO
ChrisRackauckas
CodeXStanford
CompSciFact
Compoundarxiv
CookingIssues
DLdotHub
DMcC222
DShankar
DaniloJRezende
DaphneKoller
DataDhrumil
DataInstituteSF
DataIsBeautiful
DataJunkie
DataKind
DataKindSF
DataMadeCo
DaveEvansPhD
DavidCGrabowski
DavidDuvenaud
DeanHach
DeepMind
DegenRolf
DeniceRoss46
Dereklowe
Dharma_HQ
Di

In [15]:
print_cluster(clusters, 3) # Right-wing politics

ABartonHinkle
ACTBrigitte
ASI
AceofSpadesHQ
AdamPerkinsPhD
AdelleNaz
AjitPai
AkashJC
AlanMCole
AlexEpstein
AmirSariaslan
AmyOtto8
AndrewCQuinn
AndrewMullins
AndyGrewal
AppellateDaily
ArthurSchwartz
Avi_Tuschman
Avik
BDOH
BLS_gov
B_M_Finnigan
BenSasse
BerinSzoka
BjornLomborg
BlueBoxDave
BrCo1981
BrendanCarrFCC
BrentScher
BretWeinstein
ByronYork
CEI_Michaels
CHSommers
CandiceMalcolm
CardinalJWTobin
CarolineGlick
CaseyMattox_
CatoOnCampus
CharlesFLehman
CharlesHurt
ChuckRossDC
CityGOP
ClaremontInst
ConsWahoo
CoryBMorgan
Czambul
DamonLinker
DanielJHannan
DavidLampo
DavidMMcintosh
David_Boaz
David_J_Bier
DevinNunes
DrewSpringer
Drewbueno
DustinStockton
ERLC
EbooPatel
EconTalker
EdAsante77
EdWhelanEPPC
ElectionsLawyer
Enopoletus
EricaGrieder
EsotericCD
FBillMcMorris
FakeNassimTaleb
FedSoc
FederalistPitch
FirstMondaysFM
FredLSmithJr
GKCdaily
Gryffix
HMDatMI
HadleyHeath
HarrisonKey
HarvLRev
HashtagGriswold
HdxAcademy
Heminator
HeritageOTH
HvonSpakovsky
IldefonsoOrtiz
IncidentalEcon
Isegoria
Iz

In [16]:
print_cluster(clusters, 2) # South Asia, international news and policy, finance

1843mag
38NorthNK
9DashLine
ABarnardNYT
ADAMPLOW
AJRElectionMaps
ANI
ANINewsUP
APDiploWriter
APHClarkson
APjoshgoodman
ARobertsjourno
AdityaMenon22
AidaAlami
Aidan_Regan
AkhiPill
AlbertoNardelli
AlexandraUlmer
AliTahmizian
Aliide_N
AlirezaEshraghi
AllanJClarke
AllisonLMcManus
Allison_Good1
AmarAmarasingam
Amazing_Maps
AmericaElige
AndrejNkv
AndresSchipani
AndrewBeatty
AndrewSparrow
AnshelPfeffer
AnthonyBoadle
AntonLaGuardia
AnupKaphle
ArchaeoNomad
AricToler
ArmsControlWonk
ArtofLiving
AsiaElects
AtulHatwal
AuraSalla
AyresAlyssa
AzzamAmeen
BBAIndia
BBCHamedani
BBCInOurTime
BBCKasraNaji
BBCMonitoring
BBCPaulAdams
BChappatta
BLAsia_Africa
BNODesk
Babken
BahmanKalbasi
BallouxFrancois
BankersUmbrella
Billwhiteford
BillyEhrenberg
Birdyword
BloombergQuint
BoehlerPatrick
BondHack
BrankoMilan
BrazilPolitics
BritainElects
BrookingsInst
CEQuirk
CER_Grant
CLEUCl
CSEP_Org
CanadianPM
CardiffGarcia
CarlZha
CasMudde
CatSmithMP
Charles_Lister
Charlton_AB
ChassNews
ChemiShalev
ChiOnwurah
ChitnisPurva
Ch

In [17]:
print_cluster(clusters, 1) # Left/outsider politics, journalists, other

33unitehere
6oldberg
ACLU
AOC
AROCBayArea
Abbas_Muntaqim
AbbyMartin
AbdulElSayed
AbiWilks
AdamSerwer
AdamWeinstein
Adbusters
AidanKingVT
Al_Letson
AlexEmmons
AlexisCoe
AliAbunimah
Ali_Gharib
AlleenBrown
AnandWrites
AndrewDFish
AnnieLowrey
AnnieWaldman
AntiNateSilver
AriRabinHavt
ArmyStrang
ArunChaud
AsInMarx
AsherLangton
AsterZephyrIsis
AthertonKD
Atrios
Audible_Feast
AyeshaASiddiqi
AyoCaesar
BRANDONWARDELL
BananaKarenina
BayAreaIntifada
BenjaminFogel
BenjaminNorton
BernieSanders
Bernstein
BigMeanInternet
BillMoyers
BisforBerkshire
BlanksSlate
BlueSpaceCanary
BobbyBigWheel
BrandNew535
BrandonOfTX
BrandyLJensen
BriHReed
BrotherCooper89
ByYourLogic
CHAPOTRAPHOUSE
C_Stroop
CarlBeijer
Cato_of_Utica
CharESilver
ChaseMadar
ClaraJeffery
ClickHole
ColoradoCareYES
CoreyRobin
DSAEastBay
DSA_SF
DaleBeran
DanRiffle
Dan_E_Solo
DataProgress
DaveZuckermanVT
DavidKlion
Delo_Taylor
DemSocialists
DesisRisingUp
Destiny_Lopez16
DiEM_25
DiaKayyali
Dick_Florida
Dis_Con
DonnyBridges
DoualyX
ENPancotti
Econ_M

In [18]:
print_cluster(clusters, 0) # California, Bay Area, and housing

1KatieOrr
2ndharvest
826National
8thGenCA
ACCFB
ACLU_CalAction
ACLU_NorCal
ACSOSheriffs
ACVOTE
Able_is
AccelaSoftware
AdamNMayer
AlamedaCoDem
AlamedaCoFire
AlamedaCounty
AlamedaCountyDA
AlexCVassar
AlexPadilla4CA
Aliyah_JM
AmyMQuinton
AnthonyCannella
Ash_Kalra
AsmBillQuirk
AsmEGarciaAD56
AsmRepublicans
BayAreaCouncil
BigadShaban
BillHarrisonCPA
BobWieckowskiCA
BorensteinDan
BreitbartCA
BrianDahleCA
CAFireScanner
CAIRSFBA
CAL_FIRE
CASOSvote
CATargetBot
CATeachersGR
CA_120
CA_DWR
CA_DaveJones
CAgovernor
CHPSanJose
CHP_GoldenGate
CHP_HQ
CIRonline
CPMLEGAL
CWaterC
CaWaterBoards
CalBudgetCenter
CalChamber
CalMatters
CalPolicyCenter
Cal_OES
CaliforniaLabor
Caltrain
CapitolAlert
CenterPovIneq
ChadMayes
ChrisMegerian
ChroniclesOfAzu
CityLab
ColusaBrown
ConorDougherty
ConsumerWD
DanCALmatters
DanMSiegel
DanReevo
DarwinBondGraha
Dave_Bonaccorsi
DavidCamposSF
DavidChiu
DrewTumaABC7
EBRPD
EastBayExpress
EskSF
Evan_Low
FaridaJhabvala
FedcourtJunkie
FlashReport
FmtStreetEats
FrankBigelowCA
Fremont4B