In [295]:
import numpy as np
import pandas as pd
import snap
import math

import json

import networkx as nx
from igraph import Graph

# import pyspark.sql.functions as F
# from pyspark.sql import SparkSession, Window
# from pyspark.sql.types import IntegerType

import matplotlib.pyplot as plt
%matplotlib inline

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
import plotly.express as px
from plotly.graph_objs import Layout

import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)


from bokeh.io import output_notebook, show, save
from bokeh.plotting import output_file
from bokeh.plotting import figure
from bokeh.plotting import from_networkx
from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine
from bokeh.palettes import Viridis8, Spectral8
from bokeh.transform import linear_cmap

In [272]:
def network_plot(G, title, number_to_adjust_by = 0, layout = nx.circular_layout):
    '''
    input:
    =======================
    G: networkx object
    title: figure title
    -----------------------
    output:
    =======================
    show network plot in browser
    save network plot
    '''
    degrees = dict(nx.degree(G))
    nx.set_node_attributes(G, name='degree', values=degrees)
    # number_to_adjust_by = 0
    adjusted_node_size = dict([(node, degree+number_to_adjust_by) for node, degree in nx.degree(G)])
    nx.set_node_attributes(G, name='adjusted_node_size', values=adjusted_node_size)
    size_by_this_attribute = 'adjusted_node_size'
    color_by_this_attribute = 'adjusted_node_size'

    # Pick a color palette — Blues8, Reds8, Purples8, Oranges8, Viridis8
    color_palette =  Spectral8

    #Establish which categories will appear when hovering over each node
    HOVER_TOOLTIPS = [("User", "@index"),("Degree", "@degree")]

    #Create a plot — set dimensions, toolbar, and title
    plot = figure(tooltips = HOVER_TOOLTIPS,tools="pan,wheel_zoom,save,reset", active_scroll='wheel_zoom',
                  x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title,  width=1200, height=900)

    #Create a network graph object
    #network_graph = from_networkx(G, nx.spring_layout, scale=10, center=(0, 0))
    network_graph = from_networkx(G, layout, scale=10, center=(0, 0))
    #Set node sizes and colors according to node degree (color as spectrum of color palette)
    minimum_value_color = min(network_graph.node_renderer.data_source.data[color_by_this_attribute])
    maximum_value_color = max(network_graph.node_renderer.data_source.data[color_by_this_attribute])
    network_graph.node_renderer.glyph = Circle(size=size_by_this_attribute, fill_color=linear_cmap(color_by_this_attribute, color_palette, minimum_value_color, maximum_value_color))

    #Set edge opacity and width
    network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

    plot.renderers.append(network_graph)
    #save(plot, filename=f"writer_atag network.html")
    #output_file('writer_atag network1.html')
    output_notebook()
    show(plot, notebook_handle=True)

In [4]:
# read data
covid_vaccine_df = pd.read_csv("data/twitter_vaccine.csv")

In [6]:
covid_vaccine_df.dtypes

id                   int64
conversation_id      int64
created_at          object
date                object
time                object
timezone             int64
user_id              int64
username            object
name                object
place               object
tweet               object
language            object
mentions            object
urls                object
photos              object
replies_count        int64
retweets_count       int64
likes_count          int64
hashtags            object
cashtags            object
link                object
retweet            float64
quote_url           object
video                int64
thumbnail           object
near               float64
geo                float64
source             float64
user_rt_id         float64
user_rt            float64
retweet_id         float64
reply_to            object
retweet_date       float64
translate          float64
trans_src          float64
trans_dest         float64
dtype: object

In [16]:
print(f"Size : {len(covid_vaccine_df)}")

print(f"Min : {min(covid_vaccine_df['date'])}")
print(f"Max : {max(covid_vaccine_df['date'])}")


Size : 209929
Min : 2020-02-12
Max : 2020-10-22


In [20]:
~covid_vaccine_df["reply_to"].str.contains("'user_id': None", regex=False)

0          True
1         False
2         False
3          True
4         False
          ...  
209924     True
209925     True
209926     True
209927    False
209928     True
Name: reply_to, Length: 209929, dtype: bool

In [30]:
reply_to_info = covid_vaccine_df["reply_to"].str.extract(r"{'user_id': '(?P<reply_to_id>\d+)', 'username': '(?P<reply_to_username>.+)'}")
reply_to_info

Unnamed: 0,reply_to_id,reply_to_username
0,355989081,ANI
1,,
2,,
3,1105377223,MisseeMonis
4,,
...,...,...
209924,299273962,Laurie_Garrett
209925,580312540,girlsreallyrule
209926,53561335,LeighNapier
209927,,


In [33]:
new_covid_vaccine_df = covid_vaccine_df.merge(reply_to_info, left_index=True, right_index=True, how="left")

In [36]:
reply_to_edges = new_covid_vaccine_df[new_covid_vaccine_df["reply_to_id"].notnull()][["username", "reply_to_username"]]
reply_to_edges["width"] = 1
reply_to_edges

Unnamed: 0,username,reply_to_username,width
0,to_fly_to_live,ANI,1
3,bak_sahil,MisseeMonis,1
10,clivebennett,theJeremyVine,1
21,raquelquefois,jim_dickinson,1
22,hemagazineindia,HEmagazineIndia,1
...,...,...,...
209920,zkusovac,admirim,1
209924,derek_linders,Laurie_Garrett,1
209925,allnbowtane,girlsreallyrule,1
209926,leighnapier,LeighNapier,1


In [37]:
covid_vaccine_reply_graph = nx.from_pandas_edgelist(df=reply_to_edges, source="username", target="reply_to_username", create_using=nx.DiGraph(), edge_attr="width")

In [46]:
# Centraility
reply_centrality = nx.degree_centrality(covid_vaccine_reply_graph)
sorted_reply_centrality = dict(sorted(reply_centrality.items(), key=lambda item: item[1], reverse=True))
sorted_reply_centrality_df = pd.DataFrame.from_records(list(sorted_reply_centrality.items()), columns =['username', 'centrality'])
sorted_reply_centrality_df.head(50)

Unnamed: 0,username,centrality
0,realDonaldTrump,0.018609
1,jcho710,0.010723
2,JoeBiden,0.002838
3,CNN,0.002584
4,narendramodi,0.002131
5,WhiteHouse,0.001922
6,thehill,0.001568
7,MattHancock,0.001557
8,harleysnhotrods,0.001303
9,NYGovCuomo,0.001226


In [39]:
# Out Centraility
out_reply_centrality = nx.out_degree_centrality(covid_vaccine_reply_graph)
sorted_out_reply_centrality = dict(sorted(out_reply_centrality.items(), key=lambda item: item[1], reverse=True))
list(sorted_out_reply_centrality.items())[:20]

[('jcho710', 0.010601994500215354),
 ('emilyle30611735', 0.0011043744271057659),
 ('covidvaxine', 0.00103811196147942),
 ('scottand67', 0.00103811196147942),
 ('drtessat', 0.0010160244729373047),
 ('post1113a', 0.0007730620989740361),
 ('martina_ant79', 0.0006405371677213442),
 ('aiexpert14', 0.0006405371677213442),
 ('assocdesign', 0.0005853184463660559),
 ('blloydblloyd', 0.0005521872135528829),
 ('rob_miller12345', 0.0005411434692818253),
 ('jpatte8007', 0.0005300997250107676),
 ('innbioresearch', 0.00051905598073971),
 ('asemhamdy55', 0.000452793515113364),
 ('katapult_in', 0.000452793515113364),
 ('lulu111593', 0.0004417497708423064),
 ('threadreaderapp', 0.0004307060265712487),
 ('gerald_weaver_', 0.00040861853802913337),
 ('muaythaiguy44', 0.00036444356094490277),
 ('margare86411004', 0.00036444356094490277)]

In [56]:
grouped_reply_to_edges = reply_to_edges.groupby(["username", "reply_to_username"]).agg({"width" : "sum"}).reset_index().sort_values("width", ascending=False)
grouped_reply_to_edges.head(20)

Unnamed: 0,username,reply_to_username,width
28630,jcho710,jcho710,1432
31662,jurg_ames,realDonaldTrump,64
20958,freedomgirl2011,freedomgirl2011,54
24676,hollywdhealth,HollywdHealth,47
27953,jcho710,ChuckCallesto,41
31110,jpatte8007,KLoeffler,40
50656,rnaianalyst,RNAiAnalyst,31
17711,dukemargolis,DukeMargolis,27
1691,activistbowen2,ActivistBowen2,25
4612,apifeelgood,ApiFeelGood,24


In [54]:
grouped_covid_vaccine_reply_graph = nx.from_pandas_edgelist(df=grouped_reply_to_edges, source="reply_to_username", target="username", create_using=nx.DiGraph(), edge_attr="width")

In [55]:
# Centraility
grouped_reply_centrality = nx.degree_centrality(grouped_covid_vaccine_reply_graph)
sorted_grouped_reply_centrality = dict(sorted(grouped_reply_centrality.items(), key=lambda item: item[1], reverse=True))
sorted_grouped_reply_centrality_df = pd.DataFrame.from_records(list(sorted_grouped_reply_centrality.items()), columns =['username', 'centrality'])
sorted_grouped_reply_centrality_df.head(20)

Unnamed: 0,username,centrality
0,realDonaldTrump,0.018609
1,jcho710,0.010723
2,JoeBiden,0.002838
3,CNN,0.002584
4,narendramodi,0.002131
5,WhiteHouse,0.001922
6,thehill,0.001568
7,MattHancock,0.001557
8,harleysnhotrods,0.001303
9,NYGovCuomo,0.001226


In [121]:
covid_vaccine_df[["username", "mentions"]]

Unnamed: 0,username,mentions
0,to_fly_to_live,['ani']
1,utkarshsinha07,[]
2,batolebazi,[]
3,bak_sahil,['misseemonis']
4,ivibhatweedy,[]
...,...,...
209924,derek_linders,['laurie_garrett']
209925,allnbowtane,['girlsreallyrule']
209926,leighnapier,"['caumontsimone', 'covid_19news', 'youtube']"
209927,p_anatacio,[]


In [123]:
original_list = list(covid_vaccine_df[["username", "mentions"]].itertuples(index=False, name=None))

In [142]:
mentions_list = []
for username, mentions_str in original_list:
    mentions_str = mentions_str.replace('\'', '"')
    mentions_json = json.loads(mentions_str)
    for mention in mentions_json:
        mentions_list.append((username, mention, 1))

In [143]:
mentions_df = pd.DataFrame(mentions_list, columns =['username', 'mention', 'width'])

In [144]:
mentions_df

Unnamed: 0,username,mention,width
0,to_fly_to_live,ani,1
1,bak_sahil,misseemonis,1
2,adarshshastri,incindia,1
3,clivebennett,thejeremyvine,1
4,paulhannon29,wsj,1
...,...,...,...
172135,allnbowtane,girlsreallyrule,1
172136,leighnapier,caumontsimone,1
172137,leighnapier,covid_19news,1
172138,leighnapier,youtube,1


In [305]:
mentions_df.query("username == 'jcho710'")

Unnamed: 0,username,mention,width
12539,jcho710,arabbitorduck,1
12565,jcho710,kamalaharris,1
13028,jcho710,oregongovbrown,1
13034,jcho710,tonyrobbins,1
13045,jcho710,joebiden,1
...,...,...,...
108384,jcho710,krugermacro,1
133207,jcho710,nancy_notpelosi,1
133208,jcho710,patton6966,1
139638,jcho710,concept211,1


In [306]:
mentions_df.query("mention == 'jcho710'")

Unnamed: 0,username,mention,width
9521,bamm_redpilled,jcho710,1
40622,threadreaderapp,jcho710,1
45260,kayrofl,jcho710,1
48756,diamondlife_3,jcho710,1
52268,schilke_60,jcho710,1
53459,fateorveritas,jcho710,1
58564,kellyk84471553,jcho710,1
82577,texascheetah,jcho710,1
111766,rancexx_,jcho710,1
111867,the_crypto_ent,jcho710,1


In [145]:
reply_to_edges

Unnamed: 0,username,reply_to_username,width
0,to_fly_to_live,ANI,1
3,bak_sahil,MisseeMonis,1
10,clivebennett,theJeremyVine,1
21,raquelquefois,jim_dickinson,1
22,hemagazineindia,HEmagazineIndia,1
...,...,...,...
209920,zkusovac,admirim,1
209924,derek_linders,Laurie_Garrett,1
209925,allnbowtane,girlsreallyrule,1
209926,leighnapier,LeighNapier,1


In [307]:
reply_to_edges.query("username == 'jcho710'")

Unnamed: 0,username,reply_to_username,width
15801,jcho710,arabbitorduck,1
15839,jcho710,jcho710,1
16339,jcho710,jcho710,1
16343,jcho710,jcho710,1
16366,jcho710,jcho710,1
...,...,...,...
147986,jcho710,jcho710,1
161012,jcho710,jcho710,1
168821,jcho710,Nancy_NotPelosi,1
170706,jcho710,jcho710,1


In [308]:
reply_to_edges.query("reply_to_username == 'jcho710'")

Unnamed: 0,username,reply_to_username,width
11808,bamm_redpilled,jcho710,1
15839,jcho710,jcho710,1
16339,jcho710,jcho710,1
16343,jcho710,jcho710,1
16366,jcho710,jcho710,1
...,...,...,...
147986,jcho710,jcho710,1
161012,jcho710,jcho710,1
166065,blloydblloyd,jcho710,1
167383,ritalit39113743,jcho710,1


In [332]:
top_5_reply_to_accounts = reply_to_edges.groupby("reply_to_username").agg({"width":"sum"}).reset_index().sort_values("width", ascending=False).rename(columns={"width":"count", "reply_to_username": "username"}).head(5)
top_5_reply_to_accounts

Unnamed: 0,username,count
36794,realDonaldTrump,2011
31149,jcho710,1442
10618,JoeBiden,307
3670,CNN,258
34953,narendramodi,206


In [341]:
colors = ['lightslategray',] * 5
colors[3] = 'crimson'
fig = px.bar(top_5_reply_to_accounts.sort_values('count'), x='count', y='username', orientation='h', title="Top 5 users replied by other accounts")
fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':top_5_reply_to_accounts.sort_values('count')['username'].tolist()})

fig.update_traces(marker_color=colors)

# fig.update_layout({
#     'plot_bgcolor': 'rgba(0, 0, 0, 0)',
#     'paper_bgcolor': 'rgba(0, 0, 0, 0)',
# })

fig.show()

In [343]:
top_5_mention_accounts = mentions_df.groupby("username").agg({"width":"sum"}).reset_index().sort_values("width", ascending=False).rename(columns={"width":"count"}).head(5)
top_5_mention_accounts

Unnamed: 0,username,count
27729,jcho710,3458
6280,barbarajdurkin,852
40290,mrfunnyfff,511
11193,cheese24k,395
13015,covidvaxine,318


In [None]:
top_5_mention_accounts = mentions_df.groupby("username").agg({"width":"sum"}).reset_index().sort_values("width", ascending=False).rename(columns={"width":"count"}).head(5)
top_5_mention_accounts

In [345]:
colors = ['lightslategray',] * 5
colors[4] = 'crimson'
fig = px.bar(top_5_mention_accounts.sort_values('count'), x='count', y='username', orientation='h', title="Top 5 users mentioned other accounts")
fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':top_5_mention_accounts.sort_values('count')['username'].tolist()})

fig.update_traces(marker_color=colors)

# fig.update_layout({
#     'plot_bgcolor': 'rgba(0, 0, 0, 0)',
#     'paper_bgcolor': 'rgba(0, 0, 0, 0)',
# })

fig.show()

In [349]:
top_5_mention_accounts_by_j = mentions_df.query("username == 'jcho710'").groupby(["username", "mention"]).agg({"width":"sum"}).reset_index().sort_values("width", ascending=False).rename(columns={"width":"count"}).head(10)
top_5_mention_accounts_by_j

Unnamed: 0,username,mention,count
695,jcho710,joebiden,144
242,jcho710,chuckcallesto,98
749,jcho710,kamalaharris,83
1037,jcho710,nygovcuomo,76
479,jcho710,gavinnewsom,61
1058,jcho710,oregongovbrown,56
310,jcho710,danscavino,47
919,jcho710,matthancock,43
603,jcho710,ingrahamangle,42
743,jcho710,justintrudeau,42


In [350]:
top_5_reply_to_edges_by_j = reply_to_edges.query("username == 'jcho710'").groupby(["username", "reply_to_username"]).agg({"width":"sum"}).reset_index().sort_values("width", ascending=False).rename(columns={"width":"count"}).head(10)
top_5_reply_to_edges_by_j

Unnamed: 0,username,reply_to_username,count
776,jcho710,jcho710,1432
99,jcho710,ChuckCallesto,41
452,jcho710,OregonGovBrown,23
300,jcho710,JoeBiden,19
743,jcho710,glamelegance,17
220,jcho710,GovMurphy,16
424,jcho710,NYGovCuomo,15
877,jcho710,patton6966,15
392,jcho710,MayorOfLA,14
721,jcho710,dougducey,13


In [148]:
new_mentions_df = mentions_df.rename(columns={"username" : "from", "mention" : "to"})
new_reply_to_edges = reply_to_edges.rename(columns={"username" : "from", "reply_to_username" : "to"})

whole_network_edges = pd.concat([new_mentions_df, new_reply_to_edges])

In [153]:
whole_network_edges["from"] = whole_network_edges["from"].str.lower()
whole_network_edges["to"] = whole_network_edges["to"].str.lower()
whole_network_edges

Unnamed: 0,from,to,width
0,to_fly_to_live,ani,1
1,bak_sahil,misseemonis,1
2,adarshshastri,incindia,1
3,clivebennett,thejeremyvine,1
4,paulhannon29,wsj,1
...,...,...,...
209920,zkusovac,admirim,1
209924,derek_linders,laurie_garrett,1
209925,allnbowtane,girlsreallyrule,1
209926,leighnapier,leighnapier,1


In [172]:
grouped_whole_network_edges = whole_network_edges.groupby(["from", "to"]).agg({"width": "sum"}).reset_index().sort_values("width", ascending=False)
grouped_whole_network_edges

Unnamed: 0,from,to,width
68689,jcho710,jcho710,1432
75951,jurg_ames,realdonaldtrump,186
68732,jcho710,joebiden,163
68276,jcho710,chuckcallesto,139
68786,jcho710,kamalaharris,92
...,...,...,...
66256,james__mckenna_,4cocacoladave,1
66255,james__mckenna_,2x2andrew,1
66253,james52cute,10newsfirst,1
66251,james48495938,alexamorevip,1


In [321]:
whole_network_edges.groupby("to").agg({"width":"sum"}).reset_index().sort_values("width", ascending=False)

Unnamed: 0,to,width
56670,realdonaldtrump,7130
75727,youtube,1957
32803,jcho710,1460
34305,joebiden,1279
48221,narendramodi,1199
...,...,...
48458,naushadbijapur,1
18793,directorskims,1
48460,nav_manjari,1
18792,directorcdc,1


In [174]:
covid_vaccine_whole_graph = nx.from_pandas_edgelist(df=whole_network_edges, source="from", target="to", create_using=nx.DiGraph(), edge_attr="width")
grouped_covid_vaccine_whole_graph = nx.from_pandas_edgelist(df=grouped_whole_network_edges, source="from", target="to", create_using=nx.DiGraph(), edge_attr="width")

In [175]:
# Centraility
whole_centrality = nx.degree_centrality(covid_vaccine_whole_graph)
sorted_whole_centrality = dict(sorted(whole_centrality.items(), key=lambda item: item[1], reverse=True))
sorted_whole_centrality_df = pd.DataFrame.from_records(list(sorted_whole_centrality.items()), columns =['username', 'centrality'])
sorted_whole_centrality_df

Unnamed: 0,username,centrality
0,realdonaldtrump,0.030653
1,youtube,0.011926
2,jcho710,0.011118
3,narendramodi,0.005903
4,joebiden,0.005290
...,...,...
133653,army52hz,0.000007
133654,miguelsmother,0.000007
133655,cbjfan13_71_22,0.000007
133656,fionaapplsauce,0.000007


In [176]:
# Grouped Centraility
grouped_whole_centrality = nx.degree_centrality(grouped_covid_vaccine_whole_graph)
grouped_sorted_whole_centrality = dict(sorted(grouped_whole_centrality.items(), key=lambda item: item[1], reverse=True))
grouped_sorted_whole_centrality_df = pd.DataFrame.from_records(list(grouped_sorted_whole_centrality.items()), columns =['username', 'centrality'])
grouped_sorted_whole_centrality_df

Unnamed: 0,username,centrality
0,realdonaldtrump,0.030653
1,youtube,0.011926
2,jcho710,0.011118
3,narendramodi,0.005903
4,joebiden,0.005290
...,...,...
133653,bacchaus351,0.000007
133654,4cocacoladave,0.000007
133655,2x2andrew,0.000007
133656,alexamorevip,0.000007


In [283]:
df_for_c_plot = grouped_sorted_whole_centrality_df.head(20).copy()
df_for_c_plot["type"] = [
    "Government / Organization",
    "Company",
    "Others",
    "Government / Organization",
    "Government / Organization",
    "Government / Organization",
    "Government / Organization",
    "Media",
    "Media",
    "Government / Organization",
    "Celebrity",
    "Government / Organization",
    "Government / Organization",
    "Government / Organization",
    "Media",
    "Government / Organization",
    "Government / Organization",
    "Government / Organization",
    "Government / Organization",
    "Government / Organization"
]
df_for_c_plot

Unnamed: 0,username,centrality,type
0,realdonaldtrump,0.030653,Government / Organization
1,youtube,0.011926,Company
2,jcho710,0.011118,Others
3,narendramodi,0.005903,Government / Organization
4,joebiden,0.00529,Government / Organization
5,who,0.004826,Government / Organization
6,pmoindia,0.004362,Government / Organization
7,cnn,0.004108,Media
8,mailonline,0.003696,Media
9,potus,0.003434,Government / Organization


In [304]:
df_for_c_plot
fig = px.bar(df_for_c_plot.sort_values('centrality'), x='centrality', y='username', color="type", orientation='h', title="Top 20 Centrality")
fig.update_layout(legend_title_text='', yaxis={'categoryorder':'array', 'categoryarray':df_for_c_plot.sort_values('centrality')['username'].tolist()})
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

# fig.update_layout({
#     'plot_bgcolor': 'rgba(0, 0, 0, 0)',
#     'paper_bgcolor': 'rgba(0, 0, 0, 0)',
# })

fig.show()

In [288]:
df_for_c_plot['username'].tolist()

['realdonaldtrump',
 'youtube',
 'jcho710',
 'narendramodi',
 'joebiden',
 'who',
 'pmoindia',
 'cnn',
 'mailonline',
 'potus',
 'billgates',
 'matthancock',
 'whitehouse',
 'mohfw_india',
 'nytimes',
 'kamalaharris',
 'us_fda',
 'drharshvardhan',
 'cdcgov',
 'nygovcuomo']

In [275]:
print(grouped_covid_vaccine_whole_graph.number_of_nodes())
print(grouped_covid_vaccine_whole_graph.number_of_edges())

133658
158360


In [177]:
cleaned_covid_vaccine_whole_graph = covid_vaccine_whole_graph.copy()
grouped_cleaned_covid_vaccine_whole_graph = grouped_covid_vaccine_whole_graph.copy()

In [178]:
cleaned_covid_vaccine_whole_graph.remove_edges_from(nx.selfloop_edges(cleaned_covid_vaccine_whole_graph))
grouped_cleaned_covid_vaccine_whole_graph.remove_edges_from(nx.selfloop_edges(grouped_cleaned_covid_vaccine_whole_graph))

In [163]:
# K-Core
k_core_data = nx.k_core(cleaned_covid_vaccine_whole_graph, k=3)
k_core_data

<networkx.classes.digraph.DiGraph at 0x7fc873a63c18>

In [204]:
grouped_k_core_data = nx.k_core(grouped_cleaned_covid_vaccine_whole_graph, k=7)
grouped_k_core_data

<networkx.classes.digraph.DiGraph at 0x7fc856e40898>

In [182]:
k_core_df = nx.to_pandas_edgelist(k_core_data).sort_values("width", ascending=False)
k_core_df

Unnamed: 0,source,target,width
0,opalessense,ajenglish,1
22470,wakeupfortruth,gorillastyle,1
22468,wakeupfortruth,medwoman1,1
22467,wakeupfortruth,bitchute,1
22466,wakeupfortruth,ingrahamangle,1
...,...,...,...
11225,thetakeov,kingjames,1
11224,thetakeov,nba,1
11223,kcnickerson,microsoft,1
11222,kcnickerson,nealbrowning,1


In [205]:
grouped_k_core_df = nx.to_pandas_edgelist(grouped_k_core_data).sort_values("width", ascending=False)
grouped_k_core_df

Unnamed: 0,source,target,width
1831,jurg_ames,realdonaldtrump,186
5568,jcho710,joebiden,163
5569,jcho710,kamalaharris,92
5570,jcho710,nygovcuomo,91
5571,jcho710,gavinnewsom,72
...,...,...,...
2441,jayajiban,pib_india,1
2442,jayajiban,mygovindia,1
2443,jayajiban,mohfw_india,1
2444,jayajiban,mib_india,1


In [273]:
net_obj = nx.from_pandas_edgelist(grouped_k_core_df, 'source', 'target', 'width')# creating networkx cobject
title = 'Posters and @tags network'
network_plot(net_obj,title, -6, nx.kamada_kawai_layout)# detailed network visualisation

In [228]:
grouped_k_core_df.to_csv("data/k_core_7_edges.csv", index=False)

In [264]:
# nodes_list = grouped_k_core_df["target"].values.tolist()

# nodes_list.extend(grouped_k_core_df["target"].values.tolist())

target_df = grouped_k_core_df[["target"]].copy()
target_df = target_df.rename(columns={"target" : "source"})
target_df

source_df = grouped_k_core_df[["source"]].copy()
source_df

all_nodes = pd.concat([source_df, target_df])
unique_nodes = all_nodes.groupby("source").size().reset_index(name="count")[["source"]]
unique_nodes

# grouped_k_core_df[["target"]].rename(columns={"target", "source"})

# pd.concat([grouped_k_core_df[["source"]], grouped_k_core_df[["target"]].copy().rename(columns={"target": "source"}) ])

Unnamed: 0,source
0,100dianne
1,1americangirl
2,22vetsaday1
3,3montmonty
4,4pp34
...,...
722,zeenews
723,zephyr999999999
724,ziggystardad
725,zooeraina


In [265]:
unique_nodes.to_csv("data/k_core_7_nodes.csv", index=False)