In [None]:
import json
import time
from functools import reduce

import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from tqdm.autonotebook import tqdm

from py2neo import Graph, Node, Relationship

In [None]:
graph = Graph("bolt://dev_neo4j:7687", auth=('neo4j','myneo'))

n_nodes = graph.database.primitive_counts['NumberOfNodeIdsInUse']
n_relationships = graph.database.primitive_counts['NumberOfRelationshipIdsInUse']
print("Connected to graph database with {:,} nodes and {:,} relationships!".format
     (n_nodes, n_relationships))

def run_query(query, graph, print_query=False, run_query=True, 
              print_only=False, to_df=False, verbose=True):
    df = 1
    if print_only: 
        print_query = True
        run_query = False
    start_time = time.time()
    if print_query:
        print(query)
    if run_query:
        if to_df:
            df = graph.run(query).to_data_frame()
        else:
            graph.run(query)
    end_time = time.time()
    minutes_elapsed = (end_time-start_time)/60
    if verbose:
        print("Query completed in {:.2f} minutes.".format(minutes_elapsed))
    return df

In [None]:
min_year, max_year = 1900, 2020
for y in tqdm(range(min_year, max_year+1)):
    
    print("{}...".format(y))
    
    # Write DeepGL scores considering all nodes from year <= y
    query = """
    CALL embedding.dl4j.deepWalk('
    MATCH (q:Quanta)-[:PUBLISHED_IN]->(z:Year)
    WHERE z.year <= {} AND ((:Quanta)-[:CITES]->(q)-[:CITES]->(:Quanta))
    RETURN id(q) as id
    ','
    MATCH (q1:Quanta)-[:CITES]->(q2:Quanta)
    RETURN id(q1) AS source, id(q2) AS target
    ',{{graph:'cypher', write:true, writeProperty:"temporary"}});
    """.format(y)
    run_query(query, graph, print_only=True)
    
    # Calculate reference set statistics for all years <= y
    query = """
    MATCH (y:Year)
    WHERE y.year <= {}
    MATCH (q:Quanta)-[:PUBLISHED_IN]->(z:Year)
    WHERE (z.year<=y.year+1) AND (z.year>=y.yearvalue-1)
    RETURN y.year as year, avg(q.temporary) as avg, stdev(q.temporary) AS std
    ORDER BY y.yearvalue DESC;
    """.format(y)
    df = run_query(query, graph, to_df=True, print_only=True)
    
    # Normalize score for each Quanta from year <= y
    df['avg_str'] = df.apply(lambda row: 'WHEN {:.0f} THEN {}'.format(row['year'], row['avg']), axis=1)
    df['std_str'] = df.apply(lambda row: 'WHEN {:.0f} THEN {}'.format(row['year'], row['std']), axis=1)
    avg_case_str = 'CASE q.year ' + reduce(lambda a,b:'{} {}'.format(a,b), df['avg_str']) + ' END'
    std_case_str = 'CASE q.year ' + reduce(lambda a,b:'{} {}'.format(a,b), df['std_str']) + ' END'
    query = """
    CALL apoc.periodic.iterate('
    MATCH (q:Quanta)-[:PUBLISHED_IN]->(z:Year) 
    WHERE z.year <= {}
    RETURN q
    ','
    SET q.temporary = ABS(q.temporary-{})/{}
    ',{{batchSize:10000, parallel:true}});
    """.format(y, avg_case_str, std_case_str)
    run_query(query, graph, print_only=True)
    
    # Write scores to METRICS_IN relationship
    query = """
    CALL apoc.periodic.iterate('
    MATCH (q:Quanta)-[:PUBLISHED_IN]->(z:Year) 
    WHERE z.year <= {}
    RETURN q
    ','
    MATCH (y:Year)
    WHERE y.year = {}
    MERGE (q)-[m:METRICS_IN]->(y)
    SET m.timeScaledPageRank = q.temporary
    ',{{batchSize:10000, parallel:true}});
    """.format(y,y)
    run_query(query, graph, print_only=True)


In [None]:
CALL apoc.export.csv.query('
MATCH (q:Quanta)--(other)
RETURN id(q) AS source, id(other) AS target
','/import/quanta.edgelist',
{quotes:false, d:' ', delimiter:' ', header:false});

In [None]:
CALL apoc.load.csv('quanta.emb',{sep:" "}) YIELD list RETURN head(list) as nodeId, tail(list) as embedding LIMIT 1