In [None]:
from neo4j import GraphDatabase
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
%matplotlib inline

In [None]:
driver = GraphDatabase.driver('bolt://localhost:7687', auth=('neo4j', 'demo'))

In [None]:
def query_to_pandas(driver, query):
    with driver.session(database="powerlaws") as session:
        result = session.run(query)
        return pd.DataFrame(result.data())

In [None]:
link_count_query = """MATCH (p:Page)
OPTIONAL MATCH (p)<-[:LINKS_TO]-(s)
WITH p, count(s) AS incomingLinks
RETURN p.p AS probability, incomingLinks, count(p) AS nodeCount
ORDER BY probability, incomingLinks"""

In [None]:
link_count_df = query_to_pandas(driver, link_count_query)

In [None]:
link_count_df.head()

In [None]:
link_count_df['prob_str']=link_count_df['probability'].astype(str) + ' probability'

In [None]:
link_count_df['incomingLinksPlusOne'] = link_count_df['incomingLinks'] + 1

In [None]:
sns.scatterplot(data=link_count_df, y="nodeCount", x="incomingLinks", hue="prob_str")

In [None]:
ax = sns.relplot(data=link_count_df, y="nodeCount", x="incomingLinksPlusOne", hue="prob_str", col="prob_str", kind="scatter")
ax.set(xscale="log", yscale="log")

In [None]:
f, ax = plt.subplots(figsize=(7,7))
ax.set(xscale="log", yscale="log")
sns.scatterplot(data=link_count_df, y="nodeCount", x="incomingLinksPlusOne", hue="prob_str")

In [None]:
long_tail_query = """MATCH (p:Page)
OPTIONAL MATCH (p)<-[:LINKS_TO]-(s)
WITH p, count(s) AS incomingLinks
WITH p.p AS probability, incomingLinks, count(p) AS nodeCount
ORDER BY probability, incomingLinks
WITH probability, collect(incomingLinks) AS linkCounts, collect(nodeCount) as nodeCounts
UNWIND RANGE(0, size(linkCounts)-1) as rowNumber
RETURN probability, 
apoc.coll.sum(nodeCounts[rowNumber..]) as nodeCount, 
linkCounts[rowNumber] as linkCount"""

In [None]:
long_tail_df = query_to_pandas(driver, long_tail_query)

In [None]:
f, ax = plt.subplots(figsize=(7,7))
sns.lineplot(data=long_tail_df[(long_tail_df['probability']==0.5)
                              & (long_tail_df['nodeCount']>10)
                              & (long_tail_df['linkCount'])>0], y="linkCount", x="nodeCount")