## Prepare Data 

- Read Tweet Retweet Network
- Filter for all news tweets, real news tweets and fake news tweets 

In [1]:
#import libraries
import pandas as pd
import igraph as ig
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# read tweet retweet network

df = pd.read_csv('../tweet_retweet_network.csv')
df

Unnamed: 0,tweet_id,user_id_retweet_df,user_id_tweet_df,news_id,is_fake_news
0,1033706162695356417,7.873112e+08,4.335085e+07,politifact99,0.0
1,1033706162695356417,7.873112e+08,4.335085e+07,politifact340,0.0
2,1035580865160638464,3.338247e+09,1.629771e+07,politifact99,0.0
3,1035580865160638464,3.338247e+09,1.629771e+07,politifact340,0.0
4,934206237708865537,7.543102e+17,5.820642e+06,politifact99,0.0
...,...,...,...,...,...
573632,813192381583466496,,3.001448e+09,politifact13600,1.0
573633,812814063763918848,,5.256945e+08,politifact13600,1.0
573634,918510122363744257,,1.525687e+09,politifact14621,1.0
573635,917522953390223366,,7.889922e+17,politifact14621,1.0


In [3]:
# filter for fake and real news

df_fake = df[df['is_fake_news'] == 1.0]
df_real = df[df['is_fake_news'] == 0.0]

In [4]:
# clean edge lists

df_net_all = df[['user_id_retweet_df', 'user_id_tweet_df']].dropna()
df_net_fake = df_fake[['user_id_retweet_df', 'user_id_tweet_df']].dropna()
df_net_real = df_real[['user_id_retweet_df', 'user_id_tweet_df']].dropna()

In [5]:
# all news edge list

df_net_all

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
0,7.873112e+08,4.335085e+07
1,7.873112e+08,4.335085e+07
2,3.338247e+09,1.629771e+07
3,3.338247e+09,1.629771e+07
4,7.543102e+17,5.820642e+06
...,...,...
89022,1.496571e+09,7.960258e+17
89023,9.816488e+08,7.848257e+17
89024,7.200451e+08,2.435832e+07
89025,8.990504e+08,1.342402e+08


In [6]:
# fake news edge list

df_net_fake

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
4125,8.856920e+17,3.974499e+07
4161,1.917218e+09,8.277065e+07
4235,2.559283e+08,2.147477e+07
4329,8.222716e+17,8.247972e+17
4379,1.759702e+07,1.109180e+08
...,...,...
89022,1.496571e+09,7.960258e+17
89023,9.816488e+08,7.848257e+17
89024,7.200451e+08,2.435832e+07
89025,8.990504e+08,1.342402e+08


In [7]:
# real news edge list

df_net_real

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
0,7.873112e+08,43350851.0
1,7.873112e+08,43350851.0
2,3.338247e+09,16297707.0
3,3.338247e+09,16297707.0
4,7.543102e+17,5820642.0
...,...,...
68198,2.163463e+07,860585294.0
68199,3.227421e+07,48608766.0
68200,8.013800e+05,18251414.0
68201,2.404211e+08,18251414.0


### Graph Creation

- Create graph for all news, fake news and real news.
- Create dictionary which stores network metrics for the graphs created. Chosen metrics are degree, pagerank, hub score, authority score and betweenness.

In [92]:
# create all news graph

g_all = ig.Graph.TupleList(df_net_all.itertuples(index=False), directed=True)

In [9]:
# create real news graph

g_real = ig.Graph.TupleList(df_net_real.itertuples(index=False), directed=True)

In [14]:
# sort all news nodes by degree

df_all_degree = pd.DataFrame({'name': list(g_all.vs['name']), 'degree':g_all.degree(), 'pagerank':g_all.pagerank(), 'hub_score':g_all.hub_score(), 'authority_score':g_all.authority_score(), 'betweenness':g_all.betweenness()} )


Unnamed: 0,name,degree,pagerank,hub_score,authority_score,betweenness
0,8.714661e+17,1177,0.001512,2.514437e-13,1.0,0.0
1,17112880.0,400,0.000721,8.545592e-14,0.0002307037,0.0
2,1367531.0,350,0.000902,7.480449e-14,1.346904e-07,0.0
3,2973759000.0,331,6e-06,1.0,0.0,0.0
4,2597754000.0,241,6e-06,0.7280967,0.0,0.0
5,9.000407e+17,238,0.000914,5.08475e-14,1.331196e-10,0.0
6,28785490.0,210,0.000717,4.484079e-14,5.831914e-06,0.0
7,54039050.0,177,0.000331,3.780386e-14,0.0,0.0
8,39975530.0,161,0.000515,3.439889e-14,0.0,0.0
9,1911303000.0,156,0.000654,3.335121e-14,0.0,0.0


In [10]:
# create fake news graph

g_fake = ig.Graph.TupleList(df_net_fake.itertuples(index=False), directed=True)

In [85]:
df_all_degree = df_all_degree.sort_values('betweenness', ascending = False).reset_index(drop = True)
df_all_degree.head(10)

Unnamed: 0,name,degree,pagerank,hub_score,authority_score,betweenness
0,3271255000.0,136,0.000435,1.742001e-08,1.34032e-10,976.0
1,16190480.0,17,0.000184,3.634147e-15,5.765948e-06,705.0
2,36700660.0,11,0.000184,2.513345e-15,0.0,608.0
3,346257900.0,40,2.3e-05,8.556069e-15,0.0,311.333333
4,4184335000.0,31,2.8e-05,6.65278e-15,0.0,251.5
5,7.327676e+17,15,8e-06,3.206343e-15,0.0,233.333333
6,58869220.0,28,8.5e-05,8.333437e-15,0.0,224.0
7,450941700.0,63,0.000188,1.743691e-08,4.739218e-08,219.0
8,543780300.0,22,8.5e-05,4.694925e-15,9.069482e-14,210.0
9,617619900.0,19,2.1e-05,5.524339e-15,0.0,210.0


In [86]:
df_all_degree.mean()

name               2.146357e+17
degree             1.462080e+00
pagerank           8.439816e-06
hub_score          3.001362e-05
authority_score    8.451216e-06
betweenness        1.982344e-01
dtype: float64

In [15]:
# sort fake news nodes by degree

df_fake_degree = pd.DataFrame({'name': list(g_fake.vs['name']), 'degree':g_fake.degree() } )
df_fake_degree = df_fake_degree.sort_values('degree', ascending = False).reset_index(drop = True)
df_fake_degree.head(10)

Unnamed: 0,name,degree
0,1911303000.0,153
1,21032570.0,119
2,224653800.0,83
3,2417844000.0,56
4,14294850.0,50
5,2767681000.0,44
6,1355218000.0,42
7,8953122.0,38
8,2309297000.0,37
9,52572720.0,33


## Leading Eigenvector Community Detection

For fake news network:

- Detect communities with Leading Eigenvector Algorithm
- Calculate modularity
- Find communities with the largest number of nodes

In [16]:
#leading eigenvector algorithm

comm_leading_eigenvector= g_fake.community_leading_eigenvector()

  membership, _, q = GraphBase.community_leading_eigenvector(


In [17]:
# calculate modularity with LEA algorithm

modularity = g_fake.modularity(comm_leading_eigenvector)
print(modularity)

0.9946281435098392


In [18]:
# community dataframe

df_leading_eigenvector = pd.DataFrame({'name': list(g_fake.vs['name']), 'community':comm_leading_eigenvector.membership } )
df_leading_eigenvector

Unnamed: 0,name,community
0,8.856920e+17,0
1,3.974499e+07,0
2,1.917218e+09,1
3,8.277065e+07,1
4,2.559283e+08,2
...,...,...
31504,3.995662e+09,1339
31505,6.957517e+17,11622
31506,9.816488e+08,8781
31507,7.200451e+08,10428


In [19]:
# Find largest communities

df_leading_eigenvector['community'].value_counts()

11642    386
11622    287
214      203
11627    198
11641    163
        ... 
9605       2
6340       2
4293       2
2246       2
2039       2
Name: community, Length: 11647, dtype: int64

## Get the network metrics for each community

For each community:

- Prepare a DataFrame to store the calculated metrics for each node.
- For each node, 5 network metrics will be calculated: Degree, PageRank, Hub Score, Authority Score and Betweenness.

### Community 1

In [20]:
# get node list for community 1 (11642)

df_le_com1 = df_leading_eigenvector[df_leading_eigenvector['community']==11642]
df_le_com1 

Unnamed: 0,name,community
638,1.440617e+09,11642
639,3.317085e+07,11642
678,3.453434e+08,11642
679,3.760202e+08,11642
990,3.817328e+09,11642
...,...,...
31283,3.381218e+09,11642
31287,3.805432e+08,11642
31296,3.845993e+08,11642
31337,2.159220e+09,11642


In [21]:
# convert node list to list of nodes

le_com1_list = df_le_com1['name'].tolist()

In [22]:
# create edge list

df_fake_com1 = df_net_fake[df_net_fake['user_id_retweet_df'].isin(le_com1_list)]
df_fake_com1 = df_fake_com1[df_fake_com1['user_id_tweet_df'].isin(le_com1_list)]
df_fake_com1

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
68209,1.440617e+09,3.317085e+07
68230,3.453434e+08,3.760202e+08
68402,3.817328e+09,1.341686e+09
68496,5.028776e+07,7.631806e+17
68551,2.287367e+09,5.329788e+07
...,...,...
88825,3.845993e+08,1.584306e+07
88869,2.159220e+09,9.389488e+08
88925,1.921811e+07,5.831230e+08
88926,4.898620e+07,9.617064e+08


In [23]:
# get node and id for graphframe format

com1_node_list = list(set(df_fake_com1['user_id_retweet_df'].tolist() + df_fake_com1['user_id_tweet_df'].tolist()))

com1_nodes = pd.DataFrame({'id':com1_node_list, 'node':com1_node_list})
com1_nodes

Unnamed: 0,id,node
0,9.388038e+17,9.388038e+17
1,9.337378e+17,9.337378e+17
2,7.197561e+17,7.197561e+17
3,7.872964e+17,7.872964e+17
4,8.232031e+17,8.232031e+17
...,...,...
381,1.711509e+07,1.711509e+07
382,2.755170e+07,2.755170e+07
383,1.322291e+08,1.322291e+08
384,5.553970e+07,5.553970e+07


In [24]:
# get edge list as src and dst

df_fake_com1 = df_fake_com1.rename(columns = {'user_id_retweet_df':'dst', 'user_id_tweet_df':'src'})
df_fake_com1 = df_fake_com1[['src','dst']]
df_fake_com1

Unnamed: 0,src,dst
68209,3.317085e+07,1.440617e+09
68230,3.760202e+08,3.453434e+08
68402,1.341686e+09,3.817328e+09
68496,7.631806e+17,5.028776e+07
68551,5.329788e+07,2.287367e+09
...,...,...
88825,1.584306e+07,3.845993e+08
88869,9.389488e+08,2.159220e+09
88925,5.831230e+08,1.921811e+07
88926,9.617064e+08,4.898620e+07


In [26]:
# view community degrees

df_com1_degree = pd.DataFrame({'name': list(g_com1.vs['name']), 'degree':g_com1.degree(), 'pagerank':g_com1.pagerank(), 'hub_score':g_com1.hub_score(), 'authority_score':g_com1.authority_score(), 'betweenness':g_com1.betweenness()} )

Unnamed: 0,name,degree,pagerank,hub_score,authority_score,betweenness
0,33170850.0,3,0.002017,3.253434e-09,0.0,0.0
1,1440617000.0,1,0.002588,1.0416760000000001e-17,1.084478e-09,0.0
2,376020200.0,1,0.002017,9.548694e-18,0.0,0.0
3,345343400.0,4,0.007455,4.1667030000000006e-17,1.631702e-16,0.0
4,1341686000.0,3,0.002017,0.01240374,0.0,0.0
5,3817328000.0,3,0.003518,3.472252e-17,0.004186507,0.0
6,7.631806e+17,3,0.002017,8.630296e-05,0.0,0.0
7,50287760.0,1,0.002588,1.0416760000000001e-17,2.876765e-05,0.0
8,53297880.0,12,0.002017,0.01374569,0.0,0.0
9,2287367000.0,2,0.003874,2.083351e-17,0.004609962,0.0


In [80]:
df_com1_degree = df_com1_degree.sort_values('betweenness', ascending = False).reset_index(drop = True)
df_com1_degree.head(10)

Unnamed: 0,name,degree,pagerank,hub_score,authority_score,betweenness
0,199768200.0,15,0.008401,1.805571e-16,0.3532105,66.0
1,1035115000.0,8,0.003731,0.006695756,3.263405e-16,14.0
2,905374200.0,7,0.007932,6.944505e-17,0.3442019,14.0
3,52989820.0,6,0.003731,0.006612192,2.373385e-16,12.0
4,2326604000.0,6,0.003743,5.555604000000001e-17,0.3353751,8.0
5,632378500.0,5,0.008616,0.006163169,2.670058e-16,8.0
6,4085136000.0,3,0.002744,0.006286631,2.680274e-05,6.0
7,583123000.0,5,0.005778,5.555604000000001e-17,0.004668621,4.0
8,86796380.0,5,0.00236,7.788944e-05,2.373385e-16,4.0
9,1529809000.0,2,0.003731,2.083351e-17,8.158512e-17,3.0


In [87]:
df_com1_degree.mean()

name               1.286129e+17
degree             2.062176e+00
pagerank           2.590674e-03
hub_score          3.152038e-03
authority_score    1.267733e-01
betweenness        4.378238e-01
dtype: float64

In [27]:
#com1_nodes.to_csv('le_com1_nodes.csv', index = False)
#df_fake_com1.to_csv('le_com1_edges.csv', index = False)

### Community 2

In [28]:
# get node list for communtiiy 2 (11622)

df_le_com2 = df_leading_eigenvector[df_leading_eigenvector['community']==11622]
df_le_com2 

Unnamed: 0,name,community
648,7.551275e+17,11622
649,2.371403e+08,11622
731,1.935895e+08,11622
732,2.611067e+09,11622
885,3.400552e+09,11622
...,...,...
31194,4.768710e+07,11622
31257,2.784632e+09,11622
31319,3.311945e+09,11622
31383,1.371917e+08,11622


In [29]:
# get list of nodes

le_com2_list = df_le_com2['name'].tolist()

In [30]:
# get edge list

df_fake_com2 = df_net_fake[df_net_fake['user_id_retweet_df'].isin(le_com2_list)]
df_fake_com2 = df_fake_com2[df_fake_com2['user_id_tweet_df'].isin(le_com2_list)]
df_fake_com2

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
68215,7.551275e+17,2.371403e+08
68257,1.935895e+08,2.611067e+09
68338,3.400552e+09,1.578096e+09
68403,4.668486e+07,1.364870e+09
68410,3.740733e+09,2.197834e+09
...,...,...
88735,8.531855e+08,4.768710e+07
88794,2.784632e+09,2.611067e+09
88852,3.311945e+09,1.265464e+09
88902,1.371917e+08,1.330052e+08


In [31]:
# get name and id for graphframes

com2_node_list = list(set(df_fake_com2['user_id_retweet_df'].tolist() + df_fake_com2['user_id_tweet_df'].tolist()))

com2_nodes_df = pd.DataFrame({'id':com2_node_list, 'node':com2_node_list})
com2_nodes_df

Unnamed: 0,id,node
0,7.551275e+17,7.551275e+17
1,7.461340e+17,7.461340e+17
2,7.980380e+17,7.980380e+17
3,7.972981e+17,7.972981e+17
4,8.990013e+17,8.990013e+17
...,...,...
282,1.147704e+08,1.147704e+08
283,1.689912e+09,1.689912e+09
284,2.430505e+08,2.430505e+08
285,5.209651e+07,5.209651e+07


In [32]:
# get src and dst for graphframes

df_fake_com2 = df_fake_com2.rename(columns = {'user_id_retweet_df':'dst', 'user_id_tweet_df':'src'})
df_fake_com2 = df_fake_com2[['src','dst']]
df_fake_com2

Unnamed: 0,src,dst
68215,2.371403e+08,7.551275e+17
68257,2.611067e+09,1.935895e+08
68338,1.578096e+09,3.400552e+09
68403,1.364870e+09,4.668486e+07
68410,2.197834e+09,3.740733e+09
...,...,...
88735,4.768710e+07,8.531855e+08
88794,2.611067e+09,2.784632e+09
88852,1.265464e+09,3.311945e+09
88902,1.330052e+08,1.371917e+08


In [34]:
# view degrees for com2

df_com2_degree = pd.DataFrame({'name': list(g_com2.vs['name']), 'degree':g_com2.degree(), 'pagerank':g_com2.pagerank(), 'hub_score':g_com2.hub_score(), 'authority_score':g_com2.authority_score(), 'betweenness':g_com2.betweenness()} )

Unnamed: 0,name,degree,pagerank,hub_score,authority_score,betweenness
0,3772895000.0,28,0.002648,1.0,0.0,0.0
1,24544800.0,23,0.002648,0.320588,0.0,0.0
2,7.900192e+17,14,0.002648,0.02155475,0.0,0.0
3,42685470.0,14,0.002648,0.1317462,0.0,0.0
4,550956000.0,14,0.002648,0.1030595,0.0,0.0
5,3387188000.0,12,0.002648,0.1362586,0.0,0.0
6,133005200.0,11,0.00293,0.001148401,0.030643,10.0
7,853185500.0,9,0.013728,4.882761e-16,1.0,0.0
8,3271255000.0,8,0.002648,0.04622376,0.0,0.0
9,2841715000.0,8,0.002648,0.05432048,0.0,0.0


In [76]:
df_com2_degree = df_com2_degree.sort_values('betweenness', ascending = False).reset_index(drop = True)
df_com2_degree.head(10)

Unnamed: 0,name,degree,pagerank,hub_score,authority_score,betweenness
0,133005200.0,11,0.00293,0.001148401,0.030643,10.0
1,361003800.0,7,0.00506,1.213344e-16,0.060166,10.0
2,2611067000.0,7,0.005349,0.04212567,0.0,10.0
3,1265464000.0,5,0.00715,5.716664e-05,0.0,6.0
4,3415287000.0,5,0.006775,1.128497e-16,0.037599,6.0
5,257097600.0,4,0.005349,9.750393000000001e-17,0.003122,4.0
6,749828100.0,3,0.00498,1.039163e-16,0.583798,2.0
7,40708920.0,3,0.003179,1.012713e-16,0.586988,2.0
8,7.284201e+17,3,0.003559,1.039163e-16,0.060318,2.0
9,4185510000.0,3,0.005181,1.039163e-16,0.031712,2.0


In [88]:
df_com2_degree.mean()

name               2.512130e+17
degree             2.083624e+00
pagerank           3.484321e-03
hub_score          9.665793e-03
authority_score    8.610526e-02
betweenness        2.160279e-01
dtype: float64

## Community 3

In [36]:
# get node list for community 3 (214)

df_le_com3 = df_leading_eigenvector[df_leading_eigenvector['community']==214]
df_le_com3 

Unnamed: 0,name,community
466,1.426710e+09,214
467,1.411902e+09,214
851,6.400884e+07,214
852,6.265030e+08,214
1458,1.605804e+09,214
...,...,...
31011,3.313742e+09,214
31013,2.303501e+07,214
31071,8.931119e+17,214
31082,5.685204e+08,214


In [37]:
# get list of nodes for community 3

le_com3_list = df_le_com3['name'].tolist()

In [38]:
# get edge list for community 3

df_fake_com3 = df_net_fake[df_net_fake['user_id_retweet_df'].isin(le_com3_list)]
df_fake_com3 = df_fake_com3[df_fake_com3['user_id_tweet_df'].isin(le_com3_list)]
df_fake_com3

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
49655,1.426710e+09,1.411902e+09
68321,6.400884e+07,6.265030e+08
68663,1.605804e+09,3.886379e+07
68789,5.880898e+07,1.986011e+07
68901,2.498183e+07,2.922631e+08
...,...,...
88750,2.336081e+07,1.106813e+08
88761,5.538136e+07,4.592016e+07
88848,1.053184e+09,5.797912e+08
88919,2.589525e+07,2.002245e+07


In [39]:
# get name and id for graphframe format

com3_node_list = list(set(df_fake_com3['user_id_retweet_df'].tolist() + df_fake_com3['user_id_tweet_df'].tolist()))

com3_nodes_df = pd.DataFrame({'id':com3_node_list, 'node':com3_node_list})
com3_nodes_df

Unnamed: 0,id,node
0,7.631303e+17,7.631303e+17
1,1.057778e+18,1.057778e+18
2,9.660159e+17,9.660159e+17
3,9.972780e+17,9.972780e+17
4,9.308387e+17,9.308387e+17
...,...,...
198,2.623114e+08,2.623114e+08
199,1.498574e+09,1.498574e+09
200,5.451284e+08,5.451284e+08
201,3.003809e+09,3.003809e+09


In [40]:
# get src and dst for graphframes format

df_fake_com3 = df_fake_com3.rename(columns = {'user_id_retweet_df':'dst', 'user_id_tweet_df':'src'})
df_fake_com3 = df_fake_com3[['src','dst']]
df_fake_com3

Unnamed: 0,src,dst
49655,1.411902e+09,1.426710e+09
68321,6.265030e+08,6.400884e+07
68663,3.886379e+07,1.605804e+09
68789,1.986011e+07,5.880898e+07
68901,2.922631e+08,2.498183e+07
...,...,...
88750,1.106813e+08,2.336081e+07
88761,4.592016e+07,5.538136e+07
88848,5.797912e+08,1.053184e+09
88919,2.002245e+07,2.589525e+07


In [42]:
# get degrees for nodes in community 3

df_com3_degree = pd.DataFrame({'name': list(g_com3.vs['name']), 'degree':g_com3.degree(), 'pagerank':g_com3.pagerank(), 'hub_score':g_com3.hub_score(), 'authority_score':g_com3.authority_score(), 'betweenness':g_com3.betweenness()} )
df_com3_degree = df_com3_degree.sort_values('degree', ascending = False).reset_index(drop = True)
df_com3_degree.head(10)

Unnamed: 0,name,degree,pagerank,hub_score,authority_score,betweenness
0,52572720.0,31,0.003673,1.0,0.0,0.0
1,7.537769e+17,18,0.003673,0.258586,0.0,0.0
2,38863790.0,11,0.003673,0.003731,0.0,0.0
3,317224000.0,10,0.003673,0.000833,0.0,0.0
4,626503000.0,9,0.003673,0.030997,0.0,0.0
5,829735600.0,9,0.003673,0.030688,0.0,0.0
6,34991030.0,9,0.003673,0.0,0.0,0.0
7,1426815000.0,8,0.009916,0.024818,0.0,8.0
8,292263100.0,7,0.003673,0.060092,0.0,0.0
9,2848049000.0,6,0.00831,0.0,1.0,0.0


In [89]:
df_com3_degree.mean()

name               2.551062e+17
degree             2.118227e+00
pagerank           4.926108e-03
hub_score          1.131382e-02
authority_score    5.545804e-02
betweenness        1.133005e-01
dtype: float64

### Community 4

In [44]:
# get node list for community 4 (11627)

df_le_com4 = df_leading_eigenvector[df_leading_eigenvector['community']==11627]
df_le_com4 

Unnamed: 0,name,community
984,5.066415e+08,11627
985,1.904357e+07,11627
1011,9.015851e+17,11627
1012,3.401255e+09,11627
1211,4.079558e+08,11627
...,...,...
30944,2.815371e+08,11627
31079,8.737537e+17,11627
31440,1.792504e+08,11627
31452,3.483230e+08,11627


In [45]:
# get list of nodes for community 4

le_com4_list = df_le_com4['name'].tolist()

In [46]:
# get edge list for community 4

df_fake_com4 = df_net_fake[df_net_fake['user_id_retweet_df'].isin(le_com4_list)]
df_fake_com4 = df_fake_com4[df_fake_com4['user_id_tweet_df'].isin(le_com4_list)]
df_fake_com4

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
68399,5.066415e+08,1.904357e+07
68413,9.015851e+17,3.401255e+09
68518,1.904357e+07,4.079558e+08
68570,3.286796e+09,6.343304e+08
68604,3.286796e+09,1.251284e+08
...,...,...
88889,1.655766e+09,9.841634e+07
88908,9.841634e+07,1.655766e+09
88966,3.483230e+08,8.066953e+17
88975,1.792504e+08,3.481966e+09


In [47]:
# get node and id for graphframe format

com4_node_list = list(set(df_fake_com4['user_id_retweet_df'].tolist() + df_fake_com4['user_id_tweet_df'].tolist()))

com4_nodes_df = pd.DataFrame({'id':com4_node_list, 'node':com4_node_list})
com4_nodes_df

Unnamed: 0,id,node
0,9.015851e+17,9.015851e+17
1,7.154743e+17,7.154743e+17
2,7.541154e+17,7.541154e+17
3,7.767937e+17,7.767937e+17
4,8.379208e+17,8.379208e+17
...,...,...
193,7.550310e+08,7.550310e+08
194,8.193971e+17,8.193971e+17
195,5.270098e+08,5.270098e+08
196,5.401528e+08,5.401528e+08


In [48]:
# get src and dst for graphframe format

df_fake_com4 = df_fake_com4.rename(columns = {'user_id_retweet_df':'dst', 'user_id_tweet_df':'src'})
df_fake_com4 = df_fake_com4[['src','dst']]
df_fake_com4

Unnamed: 0,src,dst
68399,1.904357e+07,5.066415e+08
68413,3.401255e+09,9.015851e+17
68518,4.079558e+08,1.904357e+07
68570,6.343304e+08,3.286796e+09
68604,1.251284e+08,3.286796e+09
...,...,...
88889,9.841634e+07,1.655766e+09
88908,1.655766e+09,9.841634e+07
88966,8.066953e+17,3.483230e+08
88975,3.481966e+09,1.792504e+08


In [50]:
# get degrees for all nodes in community 4

df_com4_degree = pd.DataFrame({'name': list(g_com4.vs['name']), 'degree':g_com4.degree(), 'pagerank':g_com4.pagerank(), 'hub_score':g_com4.hub_score(), 'authority_score':g_com4.authority_score(), 'betweenness':g_com4.betweenness()} )

Unnamed: 0,name,degree,pagerank,hub_score,authority_score,betweenness
0,3291557000.0,19,0.003844,1.0,0.0,0.0
1,2406060000.0,12,0.003844,3.340512e-16,0.0,0.0
2,634330400.0,10,0.003844,1.4653e-16,5.743289e-18,0.0
3,125128400.0,10,0.003844,1.305161e-16,5.743289e-18,0.0
4,3481966000.0,10,0.003844,1.59046e-16,5.743289e-18,0.0
5,19043570.0,7,0.004498,8.4251e-17,0.0,8.0
6,46455460.0,7,0.004171,2.885783e-16,0.0,4.0
7,4521331000.0,7,0.018686,5.57327e-17,0.0,0.0
8,8.066953e+17,7,0.003844,7.756907e-17,0.0,0.0
9,275276100.0,6,0.003844,8.452305000000001e-17,0.0,0.0


In [68]:
df_com4_degree = df_com4_degree.sort_values('betweenness', ascending = False).reset_index(drop = True)
df_com4_degree.head(10)

Unnamed: 0,name,degree,pagerank,hub_score,authority_score,betweenness
0,19043570.0,7,0.004498,8.4251e-17,0.0,8.0
1,98416340.0,5,0.006698,6.08692e-17,0.0,6.0
2,7.126901e+17,5,0.008745,5.2713440000000007e-17,0.0,6.0
3,851059700.0,5,0.009448,5.693894e-17,0.595078,5.0
4,81438840.0,5,0.007111,0.06519281,0.0,5.0
5,915137500.0,3,0.004481,3.204229e-17,0.0,4.0
6,46455460.0,7,0.004171,2.885783e-16,0.0,4.0
7,1655766000.0,5,0.005742,5.992598e-17,0.0,4.0
8,7.110044e+17,2,0.004389,2.042031e-17,0.0,2.0
9,425418900.0,2,0.007574,2.042031e-17,0.0,2.0


In [90]:
df_com4_degree.mean()

name               2.586208e+17
degree             2.050505e+00
pagerank           5.050505e-03
hub_score          6.361267e-03
authority_score    5.066530e-02
betweenness        2.878788e-01
dtype: float64

### Community 5

In [52]:
# get node list for community 5 (11641)

df_le_com5 = df_leading_eigenvector[df_leading_eigenvector['community']==11641]
df_le_com5 

Unnamed: 0,name,community
1377,3.020456e+09,11641
1378,4.236584e+09,11641
1678,8.749723e+08,11641
1679,3.145369e+08,11641
2279,8.019919e+17,11641
...,...,...
30794,3.151825e+09,11641
30848,5.693754e+07,11641
31005,7.801012e+07,11641
31051,7.855410e+17,11641


In [53]:
# get list of nodes

le_com5_list = df_le_com5['name'].tolist()

In [54]:
# get edge list for community 5

df_fake_com5 = df_net_fake[df_net_fake['user_id_retweet_df'].isin(le_com5_list)]
df_fake_com5 = df_fake_com5[df_fake_com5['user_id_tweet_df'].isin(le_com5_list)]
df_fake_com5

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
68613,3.020456e+09,4.236584e+09
68791,8.749723e+08,3.145369e+08
69148,8.019919e+17,2.341270e+08
69309,3.020456e+09,4.781576e+09
69472,7.179114e+17,4.063986e+09
...,...,...
88471,5.693754e+07,3.145369e+08
88588,2.207787e+09,7.801012e+07
88626,2.207787e+09,7.855410e+17
88971,2.140461e+08,1.289705e+09


In [55]:
# get id and node for graphframe format

com5_node_list = list(set(df_fake_com5['user_id_retweet_df'].tolist() + df_fake_com5['user_id_tweet_df'].tolist()))

com5_nodes_df = pd.DataFrame({'id':com5_node_list, 'node':com5_node_list})
com5_nodes_df

Unnamed: 0,id,node
0,8.019919e+17,8.019919e+17
1,7.179114e+17,7.179114e+17
2,8.003314e+17,8.003314e+17
3,8.831161e+17,8.831161e+17
4,3.020456e+09,3.020456e+09
...,...,...
158,3.175582e+08,3.175582e+08
159,9.068559e+08,9.068559e+08
160,1.526871e+08,1.526871e+08
161,2.481152e+08,2.481152e+08


In [56]:
# get src and dst for graphframe format

df_fake_com5 = df_fake_com5.rename(columns = {'user_id_retweet_df':'dst', 'user_id_tweet_df':'src'})
df_fake_com5 = df_fake_com5[['src','dst']]
df_fake_com5

Unnamed: 0,src,dst
68613,4.236584e+09,3.020456e+09
68791,3.145369e+08,8.749723e+08
69148,2.341270e+08,8.019919e+17
69309,4.781576e+09,3.020456e+09
69472,4.063986e+09,7.179114e+17
...,...,...
88471,3.145369e+08,5.693754e+07
88588,7.801012e+07,2.207787e+09
88626,7.855410e+17,2.207787e+09
88971,1.289705e+09,2.140461e+08


In [58]:
# get degrees for all nodes in community 5

df_com5_degree = pd.DataFrame({'name': list(g_com5.vs['name']), 'degree':g_com5.degree(), 'pagerank':g_com5.pagerank(), 'hub_score':g_com5.hub_score(), 'authority_score':g_com5.authority_score(), 'betweenness':g_com5.betweenness()} )
df_com5_degree = df_com5_degree.sort_values('degree', ascending = False).reset_index(drop = True)
df_com5_degree.head(10)

Unnamed: 0,name,degree,pagerank,hub_score,authority_score,betweenness
0,55312750.0,16,0.004474,1.0,6.880624e-17,0.0
1,314536900.0,13,0.004474,0.000139,1.6329550000000003e-17,0.0
2,234127000.0,13,0.004474,0.334853,1.6329550000000003e-17,0.0
3,16634540.0,8,0.004474,0.000385,3.440312e-17,0.0
4,4781576000.0,8,0.004474,0.004478,3.440312e-17,0.0
5,3518487000.0,7,0.004474,0.0,4.032976e-17,0.0
6,109623400.0,7,0.024632,0.0,0.1576935,0.0
7,40988390.0,6,0.016652,0.0,0.0004419022,0.0
8,2790293000.0,6,0.021402,0.0,0.3070627,0.0
9,1093983000.0,5,0.004474,0.128227,1.242088e-17,0.0


In [91]:
df_com5_degree.mean()

name               2.326058e+17
degree             2.012270e+00
pagerank           6.134969e-03
hub_score          1.501535e-02
authority_score    8.963015e-02
betweenness        1.656442e-01
dtype: float64