# **Network Visualiztion**


## **Imports**


In [None]:
import os
import sys
from warnings import filterwarnings
import matplotlib.pyplot as plt
from pathlib import Path
from dotenv import load_dotenv

%matplotlib inline
%load_ext autotime

load_dotenv()
filterwarnings("ignore")

module_path = os.path.abspath(os.path.join("../"))
parent_dir = os.path.dirname(os.getcwd())
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import tweepy
import numpy as np
import pandas as pd
import dask.dataframe as dd
import ast
from scipy.stats import gmean
import networkx as nx
import seaborn as sns
from utils.graph import get_ego_graph, draw_nx_graph, draw_plotly_graph

PATH = os.getcwd()
PROJECT = str(Path(PATH).parents[0])
TWITTER_USERNAME = os.getenv("TWITTER_USERNAME")
TWITTER_API_KEY = os.getenv("TWITTER_API_KEY")
TWITTER_API_SECRET = os.getenv("TWITTER_API_SECRET")
TWITTER_API_BEARER_TOKEN = os.getenv("TWITTER_API_BEARER_TOKEN")
CLOUD_STORAGE_BUCKET = os.getenv("CLOUD_STORAGE_BUCKET")

client = tweepy.Client(TWITTER_API_BEARER_TOKEN, wait_on_rate_limit=True)

## **Read**


### **Node Centrality (Subset)**


In [None]:
df_cen = dd.read_csv(f"{CLOUD_STORAGE_BUCKET}/measures/node/node_measures.csv").compute()
print(df_cen.shape)
df_cen.head()

### **Node Features**


In [None]:
nodes = dd.read_csv(f"{CLOUD_STORAGE_BUCKET}/features/node/node_features*.csv", dtype={'withheld': 'object'}).compute()
print(nodes.shape)
nodes.head()

### **Graph**


In [None]:
df = dd.read_csv(f"{CLOUD_STORAGE_BUCKET}/ties/ties*.csv").compute()
df.following = df.following.apply(ast.literal_eval)
df = df.explode('following')

#if you need a full graph including the user
user = client.get_user(
    username=TWITTER_USERNAME,
    user_fields=["id"],
).data.id
df_following = df.copy().dropna()
df_user = pd.DataFrame({"user": user, "following": df_following.user.unique()})
df_with_user = pd.concat([df_user, df_following])

#create  a graph
edges = df_with_user.copy().dropna()
edges.columns = ['source', 'target']
edges['source'] = edges['source'].astype(int)
edges['target'] = edges['target'].astype(int)
edges['weight'] = 1
print(df.shape, edges.shape)

G = nx.from_pandas_edgelist(
    edges,
    create_using=nx.DiGraph(),
    edge_attr=True
)
print(f"Nodes: {len(G.nodes())}, Edges: {len(G.edges())}")
print(f"Average Clustering: {round(nx.average_clustering(G), 3)}")

## **Explore Images**


In [None]:
!pip install imageio

In [None]:
import urllib.request as urllib2
from imageio import imread
import matplotlib.pyplot as plt

url = nodes.sample(1).profile_image_url.iloc[0] #_400x400
print(url)
# plt.figure(figsize = (2,2))
a =  imread(url, format='jpg')
plt.imshow(a, interpolation='nearest')
plt.show()

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob
from numpy import sqrt
import glob

path = ''

N = 10
files = list(nodes.sample(N).profile_image_url)
img = []

img_size = 200
for f in files:
    f = f.replace('_normal', f'_{img_size}x{img_size}')
    try:
        img.append(imread(f))
    except:
        continue
N = len(img)

# generate graph
G = nx.watts_strogatz_graph(N,4,0.2)
pos=nx.spring_layout(G,k=3/sqrt(N))

# draw with images on nodes
nx.draw_networkx(G,pos,width=3,edge_color="r",alpha=0.6)

# plt.figure(figsize = (10,10))
ax=plt.gca()
fig=plt.gcf()

fig.set_size_inches(20, 10)
trans = ax.transData.transform
trans2 = fig.transFigure.inverted().transform
imsize = 0.1 # this is the image size
for n in G.nodes():
    (x,y) = pos[n]
    xx,yy = trans((x,y)) # figure coordinates
    xa,ya = trans2((xx,yy)) # axes coordinates
    a = plt.axes([xa-imsize/2.0,ya-imsize/2.0, imsize, imsize ])
    a.imshow(img[n])
    a.set_aspect('equal')
    a.axis('off')
plt.savefig('./save.png') 

## **Explore**


In [None]:
G_s, _, _ = get_ego_graph(G, node=user)

node_labels_dict = dict(zip(nodes['id'], nodes['name']))
nx.set_node_attributes(G_s, node_labels_dict, "name")
node_labels = list(nx.get_node_attributes(G_s, "name").values())

print(f"{user}")
print(f"Nodes: {len(G_s.nodes())}, Edges: {len(G_s.edges())}")
# print(f"Average Clustering: {round(nx.average_clustering(G_s), 3)}")
draw_nx_graph(G_s, fig_size  = (20, 20), font_size = 10, node_size = 500, linewidths = 3, width = 0.5, alpha = 0.8)

## **Explore Pyvis**


In [None]:
from pyvis import network as py_net

g = py_net.Network(notebook = True)
g.from_nx(G_s)
g.show_buttons(filter_=['nodes', 'edges', 'physics'])
g.show('graph.html')

## **Explore Plotly**


In [None]:
nodes[nodes.id.isin([440506882, 16163627, 3257368988, 1720046887, 110445334])].head()

In [None]:
n_ = nodes.set_index('id').copy()
n_.public_metrics = n_.public_metrics.map(eval)
n_ = n_['public_metrics'].apply(pd.Series)
t_ = df_cen[df_cen.measure_name=='hubs'].set_index('node')
t_= t_.merge(n_, left_index = True, right_index = True).sort_values(by='measure_value', ascending=False)
t_.head()

In [None]:
import networkx as nx
import plotly.graph_objects as go
def draw_plotly_graph(
    graph=None,
    edge_labels=None,
    node_labels=None,
    pos=None,
    node_colors=None,
    node_sizes=None,
    write_html=False,
    title=None,
    fig_size_px=(800, 800),
    hide_color_axis=True,
):
    edge_x = []
    edge_y = []
    widths = []
    pos = nx.spring_layout(graph, threshold=1e-4, iterations=100)
    pos=nx.rescale_layout_dict(pos)
    c_ev = nx.eigenvector_centrality(graph)
    edge_trace =  [dict(type='scatter',
                   x=[pos[e[0]][0], pos[e[1]][0], None],
                   y=[pos[e[0]][1], pos[e[1]][1], None],
                   mode='lines',
                   line=dict(width=0.05, color="#888",smoothing=0.2,simplify=True,shape="spline")) for e in graph.edges(data=True)]

    node_x = []
    node_y = []
    for node in graph.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)

    node_trace = go.Scatter(
        x=node_x,
        y=node_y,
        mode="markers",
        hoverinfo="text",
        marker=dict(
            showscale=True,
            colorscale="YlOrRd",
            reversescale=False,
            color=[],
            size=10,
            colorbar=dict(thickness=5, xanchor="left", titleside="right"),
            line_width=0.0,
        ),
    )

    # color nodes
    if node_labels:
        _node_labels = node_labels
    else:
        node_adjacencies = []
        _node_labels = []
        for node, adjacencies in enumerate(graph.adjacency()):
            node_adjacencies.append(len(adjacencies[1]))
            _node_labels.append(
                "adjacent connections: " + str(len(adjacencies[1]))
            )
    node_trace.text = _node_labels

    if node_colors == "default":
        _node_colors = [1e2 * x for x in list(c_ev.values())]
    else:
        _node_colors = node_colors
    for i, k in enumerate(c_ev.keys()):
        if k in list(e_users_p.keys()):
            _node_colors[i] = "black"
    node_trace.marker.color = _node_colors

    if node_sizes == "default":
        _node_sizes = [1e2 * x for x in list(c_ev.values())]
    else:
        _node_sizes = node_sizes
    node_trace.marker.size = _node_sizes

    # create plot
    fig = go.Figure(
        # data=[edge_trace, node_trace],
        data = edge_trace+[node_trace],
        layout=go.Layout(
            title={
                "text": title,
                "font_size": 16,
                "x": 0.5,
                "xanchor": "center",
                "yanchor": "top",  # new
            },
            showlegend=False,
            hovermode="closest",
            margin=dict(b=20, l=5, r=5, t=40),
            width=fig_size_px[0],
            height=fig_size_px[1],
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        ),
    )
    if hide_color_axis:
        fig.update_coloraxes(showscale=False)

    fig.show()

    if write_html:
        fig.write_html("first_degree_test.html", auto_open=True)

In [None]:
e_users = list(t_.head(10).index)
e_users

In [None]:
# e_users = [440506882]#, 106204123, 78683448]
e_sub_graphs = []
for i in e_users: 
    print(f"{i}: {nodes[nodes.id==i].name}")
    e_sub_graphs.append(get_ego_graph(G, node=i)[0])

G_s = nx.compose_all(e_sub_graphs)

dangler_nodes = [node for node, degree in dict(G_s.degree()).items() if degree <= 10]
G_s.remove_nodes_from(dangler_nodes)

node_labels_dict = dict(zip(nodes['id'], nodes['name']))
nx.set_node_attributes(G_s, node_labels_dict, "name")
node_labels = list(nx.get_node_attributes(G_s, "name").values())

print(f"Nodes: {len(G_s.nodes())}, Edges: {len(G_s.edges())}")
draw_plotly_graph(G_s, node_sizes = 'default', node_labels=node_labels,node_colors='default', title='Twitter Ego Network')

## **Communities**

In [None]:
from community import community_louvain, generate_dendrogram, best_partition, induced_graph, modularity, partition_at_level
community_dict = community_louvain.best_partition(G_s.to_undirected(), random_state=42)
n_communities = pd.Series(community_dict.values()).nunique()
print(f"Communities at best partition: {n_communities}")
custom_colors = ['rgb(44,105,176)',
                 'rgb(240,39,32)',
                 'rgb(172,97,60)',
                 'rgb(107,163,214)',
                 'rgb(234,107,115)',
                 'rgb(65,68,81)'
                ]

custom_colors = np.resize(custom_colors, n_communities)

In [None]:
nx.set_node_attributes(G_s, community_dict, "community")
node_community_map = list(nx.get_node_attributes(G_s, "community").values())
node_colors = [custom_colors[x] for x in node_community_map]

In [None]:
draw_plotly_graph(G_s, node_labels=node_labels, node_colors=node_colors, title='Twitter Ego Network', hide_color_axis=False, node_sizes = 12)

In [None]:
G_den = generate_dendrogram(G_s.to_undirected())
G_den = partition_at_level(G_den, len(G_den)-1)

G_part = best_partition(G_s.to_undirected(), random_state= 42)
G_s_c = induced_graph(G_part, G_s.to_undirected())
print(modularity(G_part, G_s.to_undirected()))

In [None]:
# nodes_exp = nodes.copy()
# nodes_exp['community'] = nodes_exp['id'].apply(lambda x: G_part.get(x))
# TWITTER_NAME = nodes_exp[nodes_exp.username==TWITTER_USERNAME].name.unique()[0]
# nodes_exp['name'] = nodes_exp['name'].apply(lambda x: "<obf: name>" if x==TWITTER_NAME else str(x).lower())

# for n in range(n_communities):
#     print(f"\ncommunity {n}:")
#     comm_node_names = nodes_exp[nodes_exp.community==n].sort_values(by=['followers_count','tweet_count'], ascending=False).name.tolist()
#     print(*comm_node_names,sep='\n')

In [None]:
scale_factor = 50
e_weights_data = pd.DataFrame(nx.get_edge_attributes(G_s_c, "weight"), index=[0]).T
e_weights_data.columns = ['weight']
e_weights_data=e_weights_data.reset_index()
e_weights_data['total_weight'] = e_weights_data.groupby('level_0').weight.transform('sum')
e_weights_data['weight'] = np.power(e_weights_data['weight']/e_weights_data['total_weight'], 1)
e_weights_data['weight'] = scale_factor*(e_weights_data['weight']/e_weights_data['weight'].max())

# edge_weights = list(nx.get_edge_attributes(G_s_c, "weight").values())
# edge_weights = [50*x/max(edge_weights) for x in edge_weights]
edge_weights = list(e_weights_data.weight)
comm_labels = {0: 'misc', 1: 'ai', 2: 'political_outcasts', 3: 'space_cowboys', 4: 'spread_the_love_authors'}

G_s_c_rl = nx.relabel_nodes(G_s_c, comm_labels, copy=True)
draw_nx_graph(G_s_c_rl, node_labels=True, width =  edge_weights, node_label_font_color = 'black', node_size = 4000, font_size = 20, alpha=1)

In [None]:
draw_plotly_graph(G_s_c, node_sizes = 20, node_colors = list(custom_colors))