## Prepare Data 

In [1]:
#import libraries
import pandas as pd
import igraph as ig
import numpy as np

In [2]:
df = pd.read_csv('tweet_retweet_network.csv')
df

Unnamed: 0,tweet_id,user_id_retweet_df,user_id_tweet_df,news_id,is_fake_news
0,1033706162695356417,7.873112e+08,4.335085e+07,politifact99,0.0
1,1033706162695356417,7.873112e+08,4.335085e+07,politifact340,0.0
2,1035580865160638464,3.338247e+09,1.629771e+07,politifact99,0.0
3,1035580865160638464,3.338247e+09,1.629771e+07,politifact340,0.0
4,934206237708865537,7.543102e+17,5.820642e+06,politifact99,0.0
...,...,...,...,...,...
573632,813192381583466496,,3.001448e+09,politifact13600,1.0
573633,812814063763918848,,5.256945e+08,politifact13600,1.0
573634,918510122363744257,,1.525687e+09,politifact14621,1.0
573635,917522953390223366,,7.889922e+17,politifact14621,1.0


In [3]:
df_fake = df[df['is_fake_news'] == 1.0]
df_real = df[df['is_fake_news'] == 0.0]

In [4]:
df_net_all = df[['user_id_retweet_df', 'user_id_tweet_df']].dropna()
df_net_fake = df_fake[['user_id_retweet_df', 'user_id_tweet_df']].dropna()
df_net_real = df_real[['user_id_retweet_df', 'user_id_tweet_df']].dropna()

In [5]:
df_net_all

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
0,7.873112e+08,4.335085e+07
1,7.873112e+08,4.335085e+07
2,3.338247e+09,1.629771e+07
3,3.338247e+09,1.629771e+07
4,7.543102e+17,5.820642e+06
...,...,...
89022,1.496571e+09,7.960258e+17
89023,9.816488e+08,7.848257e+17
89024,7.200451e+08,2.435832e+07
89025,8.990504e+08,1.342402e+08


In [6]:
df_net_fake

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
4125,8.856920e+17,3.974499e+07
4161,1.917218e+09,8.277065e+07
4235,2.559283e+08,2.147477e+07
4329,8.222716e+17,8.247972e+17
4379,1.759702e+07,1.109180e+08
...,...,...
89022,1.496571e+09,7.960258e+17
89023,9.816488e+08,7.848257e+17
89024,7.200451e+08,2.435832e+07
89025,8.990504e+08,1.342402e+08


In [7]:
df_net_real

Unnamed: 0,user_id_retweet_df,user_id_tweet_df
0,7.873112e+08,43350851.0
1,7.873112e+08,43350851.0
2,3.338247e+09,16297707.0
3,3.338247e+09,16297707.0
4,7.543102e+17,5820642.0
...,...,...
68198,2.163463e+07,860585294.0
68199,3.227421e+07,48608766.0
68200,8.013800e+05,18251414.0
68201,2.404211e+08,18251414.0


### Graph Creation and metrics

In [8]:
g_all = ig.Graph.TupleList(df_net_all.itertuples(index=False), directed=True)

In [9]:
g_real = ig.Graph.TupleList(df_net_real.itertuples(index=False), directed=True)

In [10]:
g_fake = ig.Graph.TupleList(df_net_fake.itertuples(index=False), directed=True)

In [11]:
print('diameter for all news retweet network: ',g_all.diameter())
print('diameter for real news retweet network: ',g_real.diameter())
print('diameter for fake news retweet network: ',g_fake.diameter())

diameter for all news retweet network:  9
diameter for real news retweet network:  9
diameter for fake news retweet network:  5


In [12]:
print('clustering coefficient for all news retweet network: ',g_all.transitivity_undirected())
print('clustering coefficient for real news retweet network: ',g_real.transitivity_undirected())
print('clustering coeffiencent for fake news retweet network: ',g_fake.transitivity_undirected())

clustering coefficient for all news retweet network:  0.0005758266101118929
clustering coefficient for real news retweet network:  0.0005978739601975375
clustering coeffiencent for fake news retweet network:  0.000514478906364839


In [13]:
print('average degree for all news retweet network: ',np.mean(g_all.degree()))
print('average degree for real news retweet network: ', np.mean(g_real.degree()))
print('average degree for fake news retweet network: ',np.mean(g_fake.degree()))

average degree for all news retweet network:  1.4620799081748055
average degree for real news retweet network:  1.4227065873417177
average degree for fake news retweet network:  1.3010251039385572


In [14]:
df_fake_degree = pd.DataFrame({'name': list(g_fake.vs['name']), 'degree':g_fake.degree() } )
df_fake_degree = df_fake_degree.sort_values('degree', ascending = False).reset_index(drop = True)
df_fake_degree.head(10)

Unnamed: 0,name,degree
0,1911303000.0,153
1,21032570.0,119
2,224653800.0,83
3,2417844000.0,56
4,14294850.0,50
5,2767681000.0,44
6,1355218000.0,42
7,8953122.0,38
8,2309297000.0,37
9,52572720.0,33


## Leading Eigenvector Community Detection

In [14]:
comm_leading_eigenvector= g_fake.community_leading_eigenvector()

  membership, _, q = GraphBase.community_leading_eigenvector(


In [15]:
df_leading_eigenvector = pd.DataFrame({'name': list(g_fake.vs['name']), 'community':comm_leading_eigenvector.membership } )
df_leading_eigenvector

Unnamed: 0,name,community
0,8.856920e+17,0
1,3.974499e+07,0
2,1.917218e+09,1
3,8.277065e+07,1
4,2.559283e+08,2
...,...,...
31504,3.995662e+09,1339
31505,6.957517e+17,11622
31506,9.816488e+08,8781
31507,7.200451e+08,10428


In [16]:
df_leading_eigenvector['community'].value_counts()

11642    386
11622    287
214      203
11627    198
11641    163
        ... 
4476       2
4477       2
4478       2
4479       2
11614      2
Name: community, Length: 11647, dtype: int64

In [17]:
comm_leading_eigenvector.subgraphs()[11642].diameter()

3

In [18]:
modularity = g_fake.modularity(comm_leading_eigenvector)
print(modularity)

0.9946281435098392


In [19]:
df_leading_eigenvector.describe()

Unnamed: 0,name,community
count,31509.0,31509.0
mean,2.18716e+17,5893.927989
std,3.747241e+17,3769.656414
min,767.0,0.0
25%,180151200.0,2414.0
50%,1220386000.0,5757.0
75%,7.065833e+17,9258.0
max,1.074002e+18,11646.0


In [29]:
pd.DataFrame(df_leading_eigenvector['community'].value_counts()).quantile(0.999)

community    107.062
Name: 0.999, dtype: float64

## Prepare Data for Graphframes and Robustness Disruption

In [None]:
df_le_com1 = df_leading_eigenvector[df_leading_eigenvector['community']==11642]
df_le_com1 

In [None]:
le_com1_list = df_le_com1['name'].tolist()

In [None]:
df_fake_com1 = df_net_fake[df_net_fake['user_id_retweet_df'].isin(le_com1_list)]
df_fake_com1 = df_fake_com1[df_fake_com1['user_id_tweet_df'].isin(le_com1_list)]
df_fake_com1

In [None]:
com1_node_list = list(set(df_fake_com1['user_id_retweet_df'].tolist() + df_fake_com1['user_id_tweet_df'].tolist()))

com1_nodes_df = pd.DataFrame({'id':com1_node_list, 'node':com1_node_list})
com1_nodes_df

In [None]:
df_fake_com1 = df_fake_com1.rename(columns = {'user_id_retweet_df':'dst', 'user_id_tweet_df':'src'})
df_fake_com1 = df_fake_com1[['src','dst']]
df_fake_com1

In [None]:
g_com1 = ig.Graph.TupleList(df_fake_com1.itertuples(index=False), directed=True)

ig.plot(g_com1)

In [None]:
tweet_retweet_metrics = pd.read_csv('tweet_retweet_counts.csv')
tweet_retweet_metrics

In [None]:
com1_metrics = pd.merge(com1_nodes_df,tweet_retweet_metrics[['user_id','percentage_fake','tweet_retweet_fake_count']], left_on = 'node', right_on = 'user_id', how = 'left')
com1_metrics = com1_metrics.sort_values(['percentage_fake', 'tweet_retweet_fake_count'], ascending = False).drop(['user_id', 'percentage_fake','tweet_retweet_fake_count'], axis = 1).reset_index(drop=True)
com1_metrics

In [None]:
com1_nodes = com1_metrics
#com1_nodes.to_csv('le_com1_nodes.csv', index = False)
#df_fake_com1.to_csv('le_com1_edges.csv', index = False)

### Community 2

In [None]:
df_le_com2 = df_leading_eigenvector[df_leading_eigenvector['community']==11622]
df_le_com2 

In [None]:
le_com2_list = df_le_com2['name'].tolist()

In [None]:
df_fake_com2 = df_net_fake[df_net_fake['user_id_retweet_df'].isin(le_com2_list)]
df_fake_com2 = df_fake_com2[df_fake_com2['user_id_tweet_df'].isin(le_com2_list)]
df_fake_com2

In [None]:
com2_node_list = list(set(df_fake_com2['user_id_retweet_df'].tolist() + df_fake_com2['user_id_tweet_df'].tolist()))

com2_nodes_df = pd.DataFrame({'id':com2_node_list, 'node':com2_node_list})
com2_nodes_df

In [None]:
df_fake_com2 = df_fake_com2.rename(columns = {'user_id_retweet_df':'dst', 'user_id_tweet_df':'src'})
df_fake_com2 = df_fake_com2[['src','dst']]
df_fake_com2

In [None]:
g_com2 = ig.Graph.TupleList(df_fake_com2.itertuples(index=False), directed=True)

ig.plot(g_com2)

In [None]:
#com2_nodes_df.to_csv('le_com2_nodes.csv', index = False)
#df_fake_com2.to_csv('le_com2_edges.csv', index = False)

## Community 3

In [None]:
df_le_com3 = df_leading_eigenvector[df_leading_eigenvector['community']==214]
df_le_com3 

In [None]:
le_com3_list = df_le_com3['name'].tolist()

In [None]:
df_fake_com3 = df_net_fake[df_net_fake['user_id_retweet_df'].isin(le_com3_list)]
df_fake_com3 = df_fake_com3[df_fake_com3['user_id_tweet_df'].isin(le_com3_list)]
df_fake_com3

In [None]:
com3_node_list = list(set(df_fake_com3['user_id_retweet_df'].tolist() + df_fake_com3['user_id_tweet_df'].tolist()))

com3_nodes_df = pd.DataFrame({'id':com3_node_list, 'node':com3_node_list})
com3_nodes_df

In [None]:
df_fake_com3 = df_fake_com3.rename(columns = {'user_id_retweet_df':'dst', 'user_id_tweet_df':'src'})
df_fake_com3 = df_fake_com3[['src','dst']]
df_fake_com3

In [None]:
g_com3 = ig.Graph.TupleList(df_fake_com3.itertuples(index=False), directed=True)
ig.plot(g_com3)

In [None]:
#com3_nodes_df.to_csv('le_com3_nodes.csv', index = False)
#df_fake_com3.to_csv('le_com3_edges.csv', index = False)

### Community 4

In [None]:
df_le_com4 = df_leading_eigenvector[df_leading_eigenvector['community']==11627]
df_le_com4 

In [None]:
le_com4_list = df_le_com4['name'].tolist()

In [None]:
df_fake_com4 = df_net_fake[df_net_fake['user_id_retweet_df'].isin(le_com4_list)]
df_fake_com4 = df_fake_com4[df_fake_com4['user_id_tweet_df'].isin(le_com4_list)]
df_fake_com4

In [None]:
com4_node_list = list(set(df_fake_com4['user_id_retweet_df'].tolist() + df_fake_com4['user_id_tweet_df'].tolist()))

com4_nodes_df = pd.DataFrame({'id':com4_node_list, 'node':com4_node_list})
com4_nodes_df

In [None]:
df_fake_com4 = df_fake_com4.rename(columns = {'user_id_retweet_df':'dst', 'user_id_tweet_df':'src'})
df_fake_com4 = df_fake_com4[['src','dst']]
df_fake_com4

In [None]:
g_com4 = ig.Graph.TupleList(df_fake_com4.itertuples(index=False), directed=True)
ig.plot(g_com4)

In [None]:
#com4_nodes_df.to_csv('le_com4_nodes.csv', index = False)
#df_fake_com4.to_csv('le_com4_edges.csv', index = False)

### Community 5

In [None]:
df_le_com5 = df_leading_eigenvector[df_leading_eigenvector['community']==11641]
df_le_com5 

In [None]:
le_com5_list = df_le_com5['name'].tolist()

In [None]:
df_fake_com5 = df_net_fake[df_net_fake['user_id_retweet_df'].isin(le_com5_list)]
df_fake_com5 = df_fake_com5[df_fake_com5['user_id_tweet_df'].isin(le_com5_list)]
df_fake_com5

In [None]:
com5_node_list = list(set(df_fake_com5['user_id_retweet_df'].tolist() + df_fake_com5['user_id_tweet_df'].tolist()))

com5_nodes_df = pd.DataFrame({'id':com5_node_list, 'node':com5_node_list})
com5_nodes_df

In [None]:
df_fake_com5 = df_fake_com5.rename(columns = {'user_id_retweet_df':'dst', 'user_id_tweet_df':'src'})
df_fake_com5 = df_fake_com5[['src','dst']]
df_fake_com5

In [None]:
g_com5 = ig.Graph.TupleList(df_fake_com5.itertuples(index=False), directed=True)
ig.plot(g_com5)

In [None]:
#com5_nodes_df.to_csv('le_com5_nodes.csv', index = False)
#df_fake_com5.to_csv('le_com5_edges.csv', index = False)