In [None]:
import pandas as pd
import json
from py2neo import Graph, Node, Relationship
import numpy as np
import time

In [None]:
graph = Graph("bolt://neo4j-quanta:7687", auth=('neo4j','myneo'))

n_nodes = graph.database.primitive_counts['NumberOfNodeIdsInUse']
n_relationships = graph.database.primitive_counts['NumberOfRelationshipIdsInUse']
print("Connected to graph database with {:,} nodes and {:,} relationships!".format
     (n_nodes, n_relationships))

In [None]:
# Run ts-PageRank over time
start_year, end_year, step = 2008, 2008, 1

dfs = []
start_time = time.time()
for year in range(start_year+1, end_year+2, step):
    print("Calculating ts-PageRank for all Quanta from < {}".format(year))
    
    # First, write standard PageRank scores to each node
    print("\tWriting standard PageRank scores...", end=" ")
    query_start_time = time.time()
    query = """
    CALL algo.pageRank(
    'MATCH (p:Quanta) WHERE p.year < {} RETURN id(p) as id',
    'MATCH (p1:Quanta)-[:CITES]->(p2:Quanta) RETURN id(p1) as source, id(p2) as target',
    {{graph:'cypher', iterations:35, write:true, writeProperty:"tspr{}"}});
    """.format(year, year-1)
#     print(query)
#     graph.run(query)
    print("Finished ({:.2f} min).".format((time.time()-query_start_time)/60))
    
    # Second, calculate the mean and stdev of the PageRank scores
    print("\tCalculating mean and stdev of PageRank scores...", end=" ")
    query_start_time = time.time()    
    query = """
    MATCH (b:Quanta) WHERE b.year = {}
    RETURN AVG(b.tspr{}) as avgpr, stDev(b.tspr{}) as stdevpr
    """.format(year-1, year-1, year-1)
#     print(query)
    df = graph.run(query).to_data_frame()
    avg_pagerank = df['avgpr'].iloc[0]
    stdev_pagerank = df['stdevpr'].iloc[0]
    print("Finished ({:.2f} min).".format((time.time()-query_start_time)/60))    
    
    # Third, normalize the standard PageRank scores to ts-PageRank
    print("\tTime-scaling scores...", end=" ")
    query_start_time = time.time()
    query = """
    CALL apoc.periodic.iterate(
    "MATCH (p:Quanta) WHERE p.year < {} RETURN p",
    "SET p.tspr{} = ABS(p.tspr{}-{})/{}",
    {{batchSize:10000, parallel:true}})
    """.format(year, year-1, year-1, avg_pagerank, stdev_pagerank)
    print(query)
    graph.run(query)
    print("Finished ({:.2f} min).".format((time.time()-query_start_time)/60))

    
print("Finished all calculations in {:.2f} minutes.".format((time.time()-start_time)/60))

In [None]:
print(query)

In [None]:
def normalize_and_sort(df):
    df['scores_minus_mean'] = df.groupby('year').score - df.groupby('year').score.mean()
    df['scaled_scores'] = df.groupby('year').score_minus_mean / df.groupby('year').score.std()
    df = df.sort_values(['scaled_scores'], ascending=False).head(10000)
    
    #df['scores_minus_mean'] = df['score'] - mean
    #df['scaled_scores'] = df['scores_minus_mean'].divide(stDev, axis=1)

    
    return df

In [None]:
results = normalize_and_sort(df)