In [1]:
import networkx as nx
import pandas as pd
import networkx.algorithms.community as c
from community import community_louvain
from collections import Counter

In [2]:
edges = pd.read_csv("../out/nikhilarundesai/edges.csv")

In [3]:
nodes = pd.read_csv("../out/nikhilarundesai/nodes.csv")

In [4]:
lookup_table = nodes[["Id", "screen_name"]]
id_lookup = {n["Id"]: n["screen_name"] for _, n in lookup_table.iterrows()}

In [5]:
G = nx.from_pandas_edgelist(edges, source="Source", target="Target")

In [6]:
partition = community_louvain.best_partition(G, random_state=0)

In [7]:
len(partition)

4988

In [8]:
type(partition)

dict

In [9]:
Counter(partition.values())

Counter({4: 1180, 6: 1020, 5: 577, 0: 322, 2: 876, 1: 671, 3: 342})

In [10]:
clusters = {}
for cluster_id in range(7):
    clusters[cluster_id] = {id_lookup[i] for i, c in partition.items() if c == cluster_id}

In [11]:
clusters[6] # Politicians, think tanks, US news reporters

{'SenatePress',
 'jamie_raskin',
 'BilldeBlasio',
 'jonlovett',
 'shobawadhia',
 'RRHElections',
 'ylanmui',
 'rubycramer',
 'RepHuffman',
 'VP',
 'JNicholsonInDC',
 'sadieweiner',
 'CraigJ_tweets',
 'tyzlaw',
 'karynbruggeman',
 'TimAlberta',
 'DrewSav',
 'joepileggi',
 'entrepreneurguy',
 'jasonnobleIA',
 'SubodhChandra',
 'costareports',
 'stuartpstevens',
 'CortezMasto',
 'lilybatch',
 'JerryMarkon',
 'AlexRowell',
 'PatrickMurphyPA',
 'SvanteMyrick',
 'SenBrianSchatz',
 'ChrisVanHollen',
 'PeteWilliamsNBC',
 'electionline',
 'PeterWelch',
 'abmorgan',
 'electionland',
 'briantashman',
 'M_KleppeWellde',
 'KeenanPontoni',
 'Indivisible510',
 'mirandayaver',
 'B_RadWill',
 'JoshStein_',
 'SenateFloor',
 'Mitch_Stewart',
 'MitchLandrieu',
 'MattJonesRadio',
 'zhunterDC',
 'jessieopie',
 'alexhalperin',
 'ChrisMurphyCT',
 'RepAdamSmith',
 'KateDavidson',
 'SenGaryPeters',
 'jessicameyers',
 'ChandraKus',
 'AEWK',
 'MarkMellman',
 'KirkPWatson',
 'ewarren',
 'BenMarter',
 'nickgourevit

In [12]:
clusters[5] # Climate, energy, geoscience, outdoors, nature

{'350',
 '350_bangladesh',
 '50Reefs',
 'AGU_Eos',
 'AP_Oddities',
 'AarneGranlund',
 'AaronPomerantz',
 'Al_Humphreys',
 'AlanaMGrech',
 'AlaskaWx',
 'AlexHonnold',
 'AlexSteffen',
 'Allochthonous',
 'Alluvium_AU',
 'AlongsideWild',
 'Amardevsingh',
 'AmyAHarder',
 'AndrewDessler',
 'AngieLCarter',
 'AntarcticaNZ',
 'ArcticCouncil',
 'ArcticDeeply',
 'AriPeskoe',
 'AstroKatie',
 'AtticaMelbourne',
 'BD_Stew',
 'BLMOregon',
 'BLMca',
 'Backcountry_H_A',
 'BadlandsNPS',
 'BayAreaClimate',
 'BenLong1967',
 'BhamlaLab',
 'BittuSahgal',
 'BloombergNEF',
 'BlueBizCouncil',
 'BrendaEkwurzel',
 'BrucejDuncan',
 'BryanCranston',
 'BuzzFeedStorm',
 'ByIanJames',
 'CCLSanFrancisco',
 'CFigueres',
 'CPPGeophysics',
 'CSClapp',
 'CaitlinInMaine',
 'CarbonBrief',
 'CarbonWrangler',
 'CasinaPioIV',
 'Cataranea',
 'CenterForBioDiv',
 'ChnEnergyPortal',
 'ChristinaToms',
 'ChristopherWr11',
 'CinderBDT907',
 'Cipotato',
 'CleanGridView',
 'ClimSciDefense',
 'ClimateAdam',
 'ClimateCoLab',
 'ClimateDes

In [13]:
clusters[4] # Catchall. Scientists, people I know personally, institutional accounts. Needs to be decomposed.

{'ycombinator',
 'trijeetm',
 'enigma_data',
 'pembient',
 'annawmathews',
 'apurvazen',
 'myklebest',
 'rosenstein',
 'wycats',
 'janexwang',
 'humanutility',
 'AnjneyMidha',
 'HazyResearch',
 'fullyfuzuli',
 'SusanPotter',
 'josephreisinger',
 'dohyoungpark',
 'SymSysSociety',
 'DrTomFrieden',
 'calvinling626',
 'AaronJ254',
 'jllord',
 'mafintosh',
 'JoinUnion',
 'enlitic',
 'thefreemanlab',
 'tqbf',
 'bayesimpact',
 'earino',
 'briankrebs',
 'HHSvaccines',
 'ethereum',
 'christophamarks',
 'Qhotsokoane',
 'mayli',
 'sama',
 'HelloCTCL',
 'BillGates',
 'lindsey',
 '3blue1brown',
 'simonmar',
 'cuttingforstone',
 'ideo',
 'IbisData',
 'StanfordDeptMed',
 'PrakashLab',
 'dnag09',
 'mchillakanti',
 'evertedsphere',
 'GaetanBurgio',
 'Sarasti',
 'slashML',
 'PracheeAC',
 'CookingIssues',
 'siddjagadish',
 'coffeephoenix',
 '2plus2make5',
 'Compoundarxiv',
 'eugene_burmako',
 'eigenrobot',
 'SpaceX',
 'medriscoll',
 'm__petes',
 'DaphneKoller',
 'RanjitJhala',
 'tanay_tandon',
 'tarunchi

In [14]:
clusters[3] # Right-wing politics

{'ABartonHinkle',
 'ACTBrigitte',
 'ASI',
 'AceofSpadesHQ',
 'AdamPerkinsPhD',
 'AdelleNaz',
 'AjitPai',
 'AkashJC',
 'AlanMCole',
 'AlexEpstein',
 'AmirSariaslan',
 'AmyOtto8',
 'AndrewCQuinn',
 'AndrewMullins',
 'AndyGrewal',
 'AppellateDaily',
 'ArthurSchwartz',
 'Avi_Tuschman',
 'Avik',
 'BDOH',
 'BLS_gov',
 'B_M_Finnigan',
 'BenSasse',
 'BerinSzoka',
 'BjornLomborg',
 'BlueBoxDave',
 'BrCo1981',
 'BrendanCarrFCC',
 'BrentScher',
 'BretWeinstein',
 'ByronYork',
 'CEI_Michaels',
 'CHSommers',
 'CandiceMalcolm',
 'CardinalJWTobin',
 'CarolineGlick',
 'CaseyMattox_',
 'CatoOnCampus',
 'CharlesFLehman',
 'CharlesHurt',
 'ChuckRossDC',
 'CityGOP',
 'ClaremontInst',
 'ConsWahoo',
 'CoryBMorgan',
 'Czambul',
 'DamonLinker',
 'DanielJHannan',
 'DavidLampo',
 'DavidMMcintosh',
 'David_Boaz',
 'David_J_Bier',
 'DevinNunes',
 'DrewSpringer',
 'Drewbueno',
 'DustinStockton',
 'ERLC',
 'EbooPatel',
 'EconTalker',
 'EdAsante77',
 'EdWhelanEPPC',
 'ElectionsLawyer',
 'Enopoletus',
 'EricaGrieder'

In [15]:
clusters[2] # South Asia, international news and policy, finance

{'1843mag',
 '38NorthNK',
 '9DashLine',
 'ABarnardNYT',
 'ADAMPLOW',
 'AJRElectionMaps',
 'ANI',
 'ANINewsUP',
 'APDiploWriter',
 'APHClarkson',
 'APjoshgoodman',
 'ARobertsjourno',
 'AdityaMenon22',
 'AidaAlami',
 'Aidan_Regan',
 'AkhiPill',
 'AlbertoNardelli',
 'AlexandraUlmer',
 'AliTahmizian',
 'Aliide_N',
 'AlirezaEshraghi',
 'AllanJClarke',
 'AllisonLMcManus',
 'Allison_Good1',
 'AmarAmarasingam',
 'Amazing_Maps',
 'AmericaElige',
 'AndrejNkv',
 'AndresSchipani',
 'AndrewBeatty',
 'AndrewSparrow',
 'AnshelPfeffer',
 'AnthonyBoadle',
 'AntonLaGuardia',
 'AnupKaphle',
 'ArchaeoNomad',
 'AricToler',
 'ArmsControlWonk',
 'ArtofLiving',
 'AsiaElects',
 'AtulHatwal',
 'AuraSalla',
 'AyresAlyssa',
 'AzzamAmeen',
 'BBAIndia',
 'BBCHamedani',
 'BBCInOurTime',
 'BBCKasraNaji',
 'BBCMonitoring',
 'BBCPaulAdams',
 'BChappatta',
 'BLAsia_Africa',
 'BNODesk',
 'Babken',
 'BahmanKalbasi',
 'BallouxFrancois',
 'BankersUmbrella',
 'Billwhiteford',
 'BillyEhrenberg',
 'Birdyword',
 'BloombergQuint

In [16]:
clusters[1] # Left/outsider politics, journalists, other

{'33unitehere',
 '6oldberg',
 'ACLU',
 'AOC',
 'AROCBayArea',
 'Abbas_Muntaqim',
 'AbbyMartin',
 'AbdulElSayed',
 'AbiWilks',
 'AdamSerwer',
 'AdamWeinstein',
 'Adbusters',
 'AidanKingVT',
 'Al_Letson',
 'AlexEmmons',
 'AlexisCoe',
 'AliAbunimah',
 'Ali_Gharib',
 'AlleenBrown',
 'AnandWrites',
 'AndrewDFish',
 'AnnieLowrey',
 'AnnieWaldman',
 'AntiNateSilver',
 'AriRabinHavt',
 'ArmyStrang',
 'ArunChaud',
 'AsInMarx',
 'AsherLangton',
 'AsterZephyrIsis',
 'AthertonKD',
 'Atrios',
 'Audible_Feast',
 'AyeshaASiddiqi',
 'AyoCaesar',
 'BRANDONWARDELL',
 'BananaKarenina',
 'BayAreaIntifada',
 'BenjaminFogel',
 'BenjaminNorton',
 'BernieSanders',
 'Bernstein',
 'BigMeanInternet',
 'BillMoyers',
 'BisforBerkshire',
 'BlanksSlate',
 'BlueSpaceCanary',
 'BobbyBigWheel',
 'BrandNew535',
 'BrandonOfTX',
 'BrandyLJensen',
 'BriHReed',
 'BrotherCooper89',
 'ByYourLogic',
 'CHAPOTRAPHOUSE',
 'C_Stroop',
 'CarlBeijer',
 'Cato_of_Utica',
 'CharESilver',
 'ChaseMadar',
 'ClaraJeffery',
 'ClickHole',
 '

In [17]:
clusters[0] # California, Bay Area, and housing

{'1KatieOrr',
 '2ndharvest',
 '826National',
 '8thGenCA',
 'ACCFB',
 'ACLU_CalAction',
 'ACLU_NorCal',
 'ACSOSheriffs',
 'ACVOTE',
 'Able_is',
 'AccelaSoftware',
 'AdamNMayer',
 'AlamedaCoDem',
 'AlamedaCoFire',
 'AlamedaCounty',
 'AlamedaCountyDA',
 'AlexCVassar',
 'AlexPadilla4CA',
 'Aliyah_JM',
 'AmyMQuinton',
 'AnthonyCannella',
 'Ash_Kalra',
 'AsmBillQuirk',
 'AsmEGarciaAD56',
 'AsmRepublicans',
 'BayAreaCouncil',
 'BigadShaban',
 'BillHarrisonCPA',
 'BobWieckowskiCA',
 'BorensteinDan',
 'BreitbartCA',
 'BrianDahleCA',
 'CAFireScanner',
 'CAIRSFBA',
 'CAL_FIRE',
 'CASOSvote',
 'CATargetBot',
 'CATeachersGR',
 'CA_120',
 'CA_DWR',
 'CA_DaveJones',
 'CAgovernor',
 'CHPSanJose',
 'CHP_GoldenGate',
 'CHP_HQ',
 'CIRonline',
 'CPMLEGAL',
 'CWaterC',
 'CaWaterBoards',
 'CalBudgetCenter',
 'CalChamber',
 'CalMatters',
 'CalPolicyCenter',
 'Cal_OES',
 'CaliforniaLabor',
 'Caltrain',
 'CapitolAlert',
 'CenterPovIneq',
 'ChadMayes',
 'ChrisMegerian',
 'ChroniclesOfAzu',
 'CityLab',
 'ColusaB