In [None]:
%matplotlib inline

import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx

In [None]:
# Load the edge list and create a directed Graph
with open("hamster.edgelist", 'rb') as fh:
    G = nx.read_edgelist(fh, create_using=nx.DiGraph())

In [None]:
# Calculate the PageRank for the nodes in directed graph G
def pagerank(G):
    return pd.DataFrame.from_dict(nx.pagerank(G), orient='index').rename(columns={0: 'pagerank'})

In [None]:
# Visualize a pandas DataFrame
def visualize(df):
    print('Pagerank distribution')
    df['pagerank'].plot.hist(bins=100)
    plt.title('PageRank Distribution')
    plt.show()
    
    print('Log scale')
    df['pagerank'].apply(np.log).plot.hist(bins=100)
    plt.title('PageRank Distribution (log scale)')
    plt.show()

In [None]:
# Sort a pandas DataFrame by PageRank
def sort_by_pagerank(df):
    return df.sort_values(by='pagerank', ascending=False)

In [None]:
# Display some PageRank statistics from a pandas DataFrame
def display_rank(df):
    # Sort the DataFrame by PageRank
    ranking = sort_by_pagerank(df)
    
    print("Top 10:\n")
    display(ranking.head(10))
    
    print("Lowest 10:\n")
    display(ranking.tail(10))

In [None]:
def compute_error(base, df):
    br = sort_by_pagerank(base)
    nr = sort_by_pagerank(df)  

#### Original graph

In [None]:
base = pagerank(G)
vizualize(base)
display_rank(base)

#### Graph with random edges removed

In [None]:
Gx = G.copy()
Gx.remove_edges_from(random.sample(G.edges(), 20*G.number_of_edges()//100))
pr = pagerank(Gx)
vizualize(pr)
display_rank(pr)