In [1]:
import pandas as pd
import numpy as np 
import re
import networkx as nx 
from community import community_louvain

In [2]:
def find_rt(text): 
    try: 
        res  = re.search( r'(?<=RT \@)[A-Za-z0-9_]+',text )
    except: 
        return np.nan
    if res is not None: 
        return res.group(0)
    else: 
        return np.nan
    
def centralization(G):
    alln=G.order()
    degr_list = [e[1] for e in list(G.degree())]
    indegr_max = max(degr_list)
    centralization = float((alln*indegr_max - sum(degr_list)))/(alln-1)**2
    return centralization

In [3]:
#df = pd.read_csv('/mnt/mass_drive/Dropbox/Dropbox/Momolbox/paper/dmo_comparative/data/kenoshashooting_en.csv')
df = pd.read_csv('/mnt/mass_drive/Dropbox/Dropbox/Momolbox/paper/dmo_comparative/data/kenoshashooting_en.csv')

In [4]:
df.columns

Index(['author_id', 'created_at', 'text', 'geo', 'lang', 'public_metrics',
       'id', 'user_scr_name', 'username', 'retweeted_status', 'reply_count',
       'quote_count', 'like_count'],
      dtype='object')

In [5]:
df['rt_author'] = df['text'].apply(find_rt)

In [6]:
df['date'] = pd.to_datetime(df['created_at'])

In [7]:
df['rt_author'] = df['text'].apply(find_rt)

In [8]:
df.columns

Index(['author_id', 'created_at', 'text', 'geo', 'lang', 'public_metrics',
       'id', 'user_scr_name', 'username', 'retweeted_status', 'reply_count',
       'quote_count', 'like_count', 'rt_author', 'date'],
      dtype='object')

## network construction 

In [9]:
edges=[list(r) for r in df[['user_scr_name', 'rt_author']].to_numpy()]
    

G=nx.Graph()
for edge in edges: 
        G.add_edge(edge[0],edge[1])

In [10]:
Gd = nx.DiGraph()
for edge in edges: 
        Gd.add_edge(edge[0],edge[1])

## node attributes

In [11]:
#inserirebetwenness centrality etc

In [12]:
degrees_df = pd.DataFrame(list(Gd.in_degree()))
    

  
od = [int(o[1]) for o in  list(Gd.out_degree())]
    
degrees_df['outdegree'] = od
degrees_df.fillna(0,inplace=True)
degrees_df.rename(columns={0:'username',1:'indegree'},inplace=True)

In [13]:
degrees_df

Unnamed: 0,username,indegree,outdegree
0,PharmacyCrystal,0,1
1,wisconsin_now,8,1
2,MfolTorrance,0,1
3,po_murray,6,2
4,JJHobbs12,0,1
...,...,...,...
35459,katylied67,0,1
35460,Sonia_hm04,0,1
35461,Sunsplashsun,0,1
35462,MICAMARADAELCHE,0,1


In [14]:
part = community_louvain.best_partition(G)

In [15]:
communities =pd.DataFrame.from_dict(part,orient='index', columns=['community'])

In [16]:
communities.reset_index(inplace=True)#


In [17]:
communities.rename(columns={'index':'username'},inplace=True) 

In [18]:
communities

Unnamed: 0,username,community
0,PharmacyCrystal,0
1,wisconsin_now,0
2,MfolTorrance,1
3,po_murray,1
4,JJHobbs12,72
...,...,...
35459,katylied67,88
35460,Sonia_hm04,46
35461,Sunsplashsun,72
35462,MICAMARADAELCHE,46


In [19]:
node_attrs_df = pd.merge(degrees_df,communities, left_on='username',right_on='username')

In [20]:
node_attrs_df

Unnamed: 0,username,indegree,outdegree,community
0,PharmacyCrystal,0,1,0
1,wisconsin_now,8,1,0
2,MfolTorrance,0,1,1
3,po_murray,6,2,1
4,JJHobbs12,0,1,72
...,...,...,...,...
35458,katylied67,0,1,88
35459,Sonia_hm04,0,1,46
35460,Sunsplashsun,0,1,72
35461,MICAMARADAELCHE,0,1,46


In [21]:
communities.community.value_counts()

27     6937
72     6455
14     1857
12     1607
48     1408
       ... 
57        2
318       1
315       1
302       1
303       1
Name: community, Length: 319, dtype: int64

In [22]:
top_5_communities  = communities.community.value_counts().head(5).index.tolist()#.reset_index().community.tolist()
top_5_communities

[27, 72, 14, 12, 48]

In [23]:
for comm in top_5_communities: 
    community_df = node_attrs_df[node_attrs_df['community']==comm]
    print(community_df.sort_values(by='indegree',ascending=False).head(10))

              username  indegree  outdegree  community
279     davidmweissman      7952          2         27
21966     amylmiller12         2          0         27
17300    Mel4Americans         2          1         27
8529       YesYesBooks         2          1         27
12514  TylerBlackwell4         1          0         27
10951       wolffe2020         1          2         27
18694          cocozoe         0          1         27
18693  sistersunshine6         0          1         27
18727       Krazy4feet         0          1         27
18724  DefiantGuerilla         0          1         27
              username  indegree  outdegree  community
33825        MAGAThing         5          2         72
11814     JoshMargolin         4          1         72
1007     RonaldTooTall         4          1         72
9131    SorrentinoSean         3          1         72
11241    WeirwoodRaven         3          1         72
8834    DissonantSubLT         3          1         72
8821   Ski

In [24]:
## graph level measures

In [25]:
mod=community_louvain.modularity(part,G)
dens = nx.density(G)
centr= centralization(G)

In [26]:
mod

0.8293686734541768

In [27]:
dens

6.19181737530439e-05

In [28]:
centr

0.2242345098832925

In [30]:
bc = [int(bc[1]) for bc in  list(nx.betwenness_centrality())]

AttributeError: 'Graph' object has no attribute 'betwenness_centrality'

In [32]:
nx.betweenness_centrality(G)

KeyboardInterrupt: 