In [133]:
import json
from py2neo import Graph
from py2neo.data import Node, Relationship

# Need to get authentication working, currently NEO4J_AUTH=none
graph = Graph("bolt://neo4j:7687")
print("Connected to graph database with {:,} nodes and {:,} relationships!".format
     (graph.database.primitive_counts['NumberOfNodeIdsInUse'], 
      graph.database.primitive_counts['NumberOfRelationshipIdsInUse']))

Connected to graph database with 19,999,440 nodes and 13,449,435 relationships!


In [132]:
# FIRST ATTEMPT AT A QUERY THAT WRITES pagerank PROPERTY TO THE ENTIRE GRAPH
print("Running PageRank on the entire graph...", end=" ", flush=True)
query = """
CALL algo.pageRank('Quanta','CITES',{iterations:3, dampingFactor:0.85, write: true, writeProperty:'pagerank'});
"""
graph.run(query).evaluate()
print("Done.")

Running PageRank on the entire graph... 

ClientError: Procedure Call Failed: Failed to invoke procedure `algo.pageRank`: Caused by: java.lang.IndexOutOfBoundsException: Index: 0, Size: 0

In [130]:
# Run PageRank on each year from 1800 to 1805

import pandas as pd

start_year, end_year = 1900, 1950
dfs = []
for year in range(start_year, end_year+1):

    print("Running PageRank on works from <= {}...".format(year), end=" ")
    query = """
    CALL algo.pageRank(
    'MATCH (p:Quanta) WHERE p.year <= {} RETURN id(p) as id',
    'MATCH (p1:Quanta)-[:CITES]->(p2:Quanta) RETURN id(p1) as source, id(p2) as target',
    {{graph:'cypher', writeProperty:'pageRank', iterations:5, write: true}});
    """.format(year)
    graph.run(query).evaluate()
    
    print("Pulling out and saving results...", end=" ")
    query = """
    MATCH (a:Quanta) 
    WHERE a.year <= {} 
    RETURN id(a), a.title, a.pageRank""".format(year)
    df = graph.run(query).to_data_frame()
    df['year'] = year
    dfs.append(df)
    print("Done.")
    
result = pd.concat(dfs).pivot_table(index='a.title', columns='year', values='a.pageRank')    

Running PageRank on works from <= 1900... Pulling out and saving results... Done.
Running PageRank on works from <= 1901... Pulling out and saving results... Done.
Running PageRank on works from <= 1902... Pulling out and saving results... Done.
Running PageRank on works from <= 1903... Pulling out and saving results... Done.
Running PageRank on works from <= 1904... Pulling out and saving results... Done.
Running PageRank on works from <= 1905... Pulling out and saving results... Done.
Running PageRank on works from <= 1906... Pulling out and saving results... Done.
Running PageRank on works from <= 1907... Pulling out and saving results... Done.
Running PageRank on works from <= 1908... Pulling out and saving results... Done.
Running PageRank on works from <= 1909... Pulling out and saving results... Done.
Running PageRank on works from <= 1910... Pulling out and saving results... Done.
Running PageRank on works from <= 1911... Pulling out and saving results... Done.
Running PageRank

In [136]:
dfs

[      a.pageRank                                            a.title   id(a)  \
 0           0.15       Ueber die Constitution der spanischen Cortes     132   
 1           0.15  De Jongste verbeteringen in het beheer van ban...     184   
 2           0.15  CORRESPONDENCE. HARBOURS AND ESTUARIES ON SAND...     392   
 3           0.15  REPORT OF THE PROCEEDINGS OF THE SOCIETY AT IT...     864   
 4           0.15                               Plants of Madagascar     959   
 5           0.15                  Hydroxylamin aus Knallquecksilber     992   
 6           0.15                 Ostdeutsches Athenäum, 1855, No 17    1085   
 7           0.15              Elements and ephemeris of comet 1895a    1217   
 8           0.15               Letter from Toenga Te Poki to McLean    1329   
 9           0.15                                  Dear Old Broadway    1339   
 10          0.15  REPORT OF A CASE OF OBSTRUCTION OF THE BOWELS....    1439   
 11          0.15  Differenciálny sloven

In [134]:
# Write result to CSV

file_path = '/tmp/data/result/impact_20M_{}-{}.csv'.format(start_year, end_year)
print("Writing results to {}...".format(file_path), end=" ")
result.index = result.index.str.replace(",","")
result.to_csv(path_or_buf=file_path, sep=",", header=True, index=True)
print("Done.")

Writing results to /tmp/data/result/impact_20M_1900-1950.csv... Done.


In [None]:
# Graph loading (notes; do not execute)
# // Load graph
# CALL algo.graph.load('my-graph','Label','REL_TYPE',{graph:'heavy',..other config...})
#   YIELD name, graph, direction, undirected, sorted, nodes, loadMillis, alreadyLoaded,
#         nodeWeight, relationshipWeight, nodeProperty, loadNodes, loadRelationships;

# // Info on loaded graph
# CALL algo.graph.info('my-graph')
#   YIELD name, type, exists, removed, nodes;

# // Use graph
# CALL algo.pageRank(null,null,{graph:'my-graph',...})


# // Remove graph
# CALL algo.graph.remove('my-graph')
#   YIELD name, type, exists, removed, nodes;