# Twitter Graphs
## Table of Contents
* [Visualizations](#1)
* [Global Statistics](#2)
* [Page Rank - Demo on subset](#3)

In [None]:
!pip install pyvis

In [None]:
# packages

# standard
import numpy as np
import pandas as pd
import time

# plots
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

# graph
import networkx as nx
from pyvis.network import Network

In [None]:
# load edge data
t1 = time.time()
df = pd.read_csv('../input/twitter-edge-nodes/Twitter-dataset/data/edges.csv', 
                 header=None, names=['Follower','Target'])
t2 = time.time()
print('Elapsed time [s]: ', np.round(t2-t1,2))

### (1,2) means user (follower) with id "1" is following user (target) with id "2".

In [None]:
# preview
df

<a id='1'></a>
# Visualizations

### Filter by follower

In [None]:
follower = 1
df_select = df[df.Follower==follower]
df_select

In [None]:
# create graph from edges
G = nx.from_pandas_edgelist(df_select, 'Follower', 'Target', create_using=nx.DiGraph())
# and plot
nx.draw(G, with_labels=True, node_size=1000, alpha=0.5, arrows=True)
plt.title('Targets that ' + str(follower) + ' follows')
plt.show()

### Get all followers of a target

In [None]:
target = 3
df_select = df[df.Target==target]
df_select

In [None]:
# create graph from edges
G = nx.from_pandas_edgelist(df_select, 'Follower', 'Target', create_using=nx.DiGraph())
# and plot
nx.draw(G, with_labels=True, node_size=1000, alpha=0.5, arrows=True)
plt.title('Followers of target ' + str(target))
plt.show()

### Bidirectional

In [None]:
user = 100
df_out = df[df.Follower==user]
df_in = df[df.Target==user]
df_select = pd.concat([df_in,df_out])
df_select

In [None]:
# create graph from edges
G = nx.from_pandas_edgelist(df_select, 'Follower', 'Target', create_using=nx.DiGraph())
# and plot
nx.draw_circular(G, with_labels=True, node_size=1000, alpha=0.5, arrows=True)
plt.title('Bidirectional view of user ' + str(user))
plt.show()

#### Interactive Plot:

In [None]:
net = Network(notebook=True, width='800px', height='500px')
net.directed = True
net.from_nx(G)
net.show('example.html')

<a id='2'></a>
# Global Statistics

### Most active followers

In [None]:
# frequency table
f_counts = df.Follower.value_counts().rename_axis('Follower').reset_index(name='Frequency')

# top 10
f_counts[0:10]

In [None]:
# stats
f_counts.Frequency.describe()

#### Interpretation: A follower has in average 9.73 targets.

In [None]:
# plot in log scale
plt.hist(np.log10(1+f_counts.Frequency),100)
plt.yscale('log')
plt.grid()
plt.title('Distribution of following count')
plt.xlabel('log10(1+counts)')
plt.show()

### Most followed targets

In [None]:
# frequency table
t_counts = df.Target.value_counts().rename_axis('Target').reset_index(name='Frequency')

# top 10
t_counts[0:10]

In [None]:
# stats
t_counts.Frequency.describe()

#### Interpretation: A target has in average 12.88 followers.

In [None]:
# plot in log scale
plt.hist(np.log10(1+t_counts.Frequency),100)
plt.yscale('log')
plt.grid()
plt.title('Distribution of target count')
plt.xlabel('log10(1+counts)')
plt.show()

<a id='3'></a>
# Pagerank - Demo on subset

### The full graph would be too big for the 16GB memory environment, so we are just making a demo on a small subset.

In [None]:
# select subset
df_sub = df.sample(100000, random_state=987)

In [None]:
# build graph from data frame
G = nx.from_pandas_edgelist(df_sub, 'Follower', 'Target', create_using=nx.DiGraph())

In [None]:
# calc pagerank for each node
pageranks = nx.pagerank(G)

# convert to data frame
pageranks = pd.DataFrame.from_dict(pageranks, orient='index',
                                  columns=['Pagerank'])
# and sort by pagerank
pageranks = pageranks.sort_values(by=['Pagerank'], ascending=False)

In [None]:
# show top 10
pageranks[0:10]

### Compare with simple frequency count:

In [None]:
# frequency table / top 10
t_counts_sub = df_sub.Target.value_counts().rename_axis('Target').reset_index(name='Frequency')
t_counts_sub[0:10]