In [28]:
import json
import time
from functools import reduce

import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from tqdm.autonotebook import tqdm

from py2neo import Graph, Node, Relationship

In [29]:
graph = Graph("bolt://dev_neo4j:7687", auth=('neo4j','myneo'))

n_nodes = graph.database.primitive_counts['NumberOfNodeIdsInUse']
n_relationships = graph.database.primitive_counts['NumberOfRelationshipIdsInUse']
print("Connected to graph database with {:,} nodes and {:,} relationships!".format
     (n_nodes, n_relationships))

def run_query(query, graph, print_query=False, run_query=True, 
              print_only=False, to_df=False, verbose=True):
    df = 1
    if print_only: 
        print_query = True
        run_query = False
    start_time = time.time()
    if print_query:
        print(query)
    if run_query:
        if to_df:
            df = graph.run(query).to_data_frame()
        else:
            graph.run(query)
    end_time = time.time()
    minutes_elapsed = (end_time-start_time)/60
    if verbose:
        print("Query completed in {:.2f} minutes.".format(minutes_elapsed))
    return df

Connected to graph database with 370,269,897 nodes and 220,155,390 relationships!


In [48]:
query = """
CALL apoc.periodic.iterate("
MATCH (q:Quanta)-[:PUBLISHED_IN]->(y:Year)
RETURN [q,y] AS ns
","
WITH head(ns) AS q, last(ns) AS y
MATCH (z:Year) WHERE z.value >= y.value
MERGE (q)-[m:METRICS_IN]->(z)
SET m.cites = size((z)<-[:PUBLISHED_IN]-(:Quanta)-[:CITES]->(q))",
{batchSize:5000, iterateList:true, parallel:false});
"""
run_query(query, graph)

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

Query completed in 0.68 minutes.
Query completed in 0.03 minutes.
Query completed in 0.48 minutes.
Query completed in 0.57 minutes.
Query completed in 0.02 minutes.
Query completed in 0.25 minutes.

