In [2]:
import json
import time
from functools import reduce

import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from tqdm.autonotebook import tqdm

from py2neo import Graph, Node, Relationship

In [3]:
graph = Graph("bolt://dev_neo4j:7687", auth=('neo4j','myneo'))

n_nodes = graph.database.primitive_counts['NumberOfNodeIdsInUse']
n_relationships = graph.database.primitive_counts['NumberOfRelationshipIdsInUse']
print("Connected to graph database with {:,} nodes and {:,} relationships!".format
     (n_nodes, n_relationships))

def run_query(query, graph, print_query=False, run_query=True, 
              print_only=False, to_df=False, verbose=True):
    df = 1
    if print_only: 
        print_query = True
        run_query = False
    start_time = time.time()
    if print_query:
        print(query)
    if run_query:
        if to_df:
            df = graph.run(query).to_data_frame()
        else:
            graph.run(query)
    end_time = time.time()
    minutes_elapsed = (end_time-start_time)/60
    if verbose:
        print("Query completed in {:.2f} minutes.".format(minutes_elapsed))
    return df

Connected to graph database with 370,269,897 nodes and 220,155,390 relationships!


In [11]:
# Write community detection
min_year, max_year = 1900, 2020
for y in tqdm(range(min_year, max_year+1)):
    print("{}...".format(y))
        
    # Write edge list for graph at year=y
    query = """
    CALL algo.labelPropagation('
    MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WHERE y.value<={year} AND ((a)-[:COAUTHOR]-(:Author)) AND id(a)<>0
    RETURN id(a) AS id
    ','
    MATCH (a1:Author)-[r:COAUTHOR]-(a2:Author)
    WHERE id(a1)<>0 AND id(a2) <> 0
    RETURN id(a1) AS source, id(a2) AS target, r.strength AS weight 
    ',{{graph:'cypher', direction:'BOTH', write:true, writeProperty:'temporary'}})
//    YIELD nodes, communityCount, iterations, didConverge, 
//        loadMillis, computeMillis, writeMillis, 
//        write, weightProperty, writeProperty,
//        p1, p5, p10, p25, p50, p75, p90, p95, p99, p100;
    """.format(year=y)
    run_query(query, graph, print_only=True)

HBox(children=(IntProgress(value=0, max=121), HTML(value='')))

1900...

    CALL algo.labelPropagation('
    MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WHERE y.value<=1900 AND ((a)-[:COAUTHOR]-(:Author)) AND id(a)<>0
    RETURN id(a) AS id
    ','
    MATCH (a1:Author)-[r:COAUTHOR]-(a2:Author)
    WHERE id(a1)<>0 AND id(a2) <> 0
    RETURN id(a1) AS source, id(a2) AS target, r.strength AS weight 
    ',{graph:'cypher', direction:'BOTH', write:true, writeProperty:'temporary'})
//    YIELD nodes, communityCount, iterations, didConverge, 
//        loadMillis, computeMillis, writeMillis, 
//        write, weightProperty, writeProperty,
//        p1, p5, p10, p25, p50, p75, p90, p95, p99, p100;
    
Query completed in 0.00 minutes.
1901...

    CALL algo.labelPropagation('
    MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WHERE y.value<=1901 AND ((a)-[:COAUTHOR]-(:Author)) AND id(a)<>0
    RETURN id(a) AS id
    ','
    MATCH (a1:Author)-[r:COAUTHOR]-(a2:Author)
    WHERE id(a1)<>0 AND id(a2) <> 0
   

In [8]:
for years_post_pub in range(4):
    query = """
    CALL apoc.periodic.iterate("
    MATCH (q:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WITH DISTINCT q, y.value as year 
    MATCH (p:Quanta)-[:CITES]->(q:Quanta) 
    WHERE p.year = year+{}
    WITH DISTINCT p, q, year
    RETURN [p,q,year] as l
    ","
    WITH l[0] AS p, l[1] AS q, l[2] as year
    MATCH (a:Author)-[:AUTHORED]->(p) 
    WITH q, year+{} as p1, apoc.coll.toSet(collect(a.name)) AS alist 
    MERGE (q)-[m1:METRICS_IN]->(a:Year {{value: p1}})
    SET m1.earlyAdopters = size(alist)
    ",{{batchSize:5000, iterateList:true, parallel:false}});
    """.format(years_post_pub, years_post_pub)
    print(query)


    CALL apoc.periodic.iterate("
    MATCH (q:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WITH DISTINCT q, y.value as year 
    MATCH (p:Quanta)-[:CITES]->(q:Quanta) 
    WHERE p.year = year+0
    WITH DISTINCT p, q, year
    RETURN [p,q,year] as l
    ","
    WITH l[0] AS p, l[1] AS q, l[2] as year
    MATCH (a:Author)-[:AUTHORED]->(p) 
    WITH q, year+0 as p1, apoc.coll.toSet(collect(a.name)) AS alist 
    MERGE (q)-[m1:METRICS_IN]->(a:Year {value: p1})
    SET m1.earlyAdopters = size(alist)
    ",{batchSize:5000, iterateList:true, parallel:false});
    

    CALL apoc.periodic.iterate("
    MATCH (q:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WITH DISTINCT q, y.value as year 
    MATCH (p:Quanta)-[:CITES]->(q:Quanta) 
    WHERE p.year = year+1
    WITH DISTINCT p, q, year
    RETURN [p,q,year] as l
    ","
    WITH l[0] AS p, l[1] AS q, l[2] as year
    MATCH (a:Author)-[:AUTHORED]->(p) 
    WITH q, year+1 as p1, apoc.coll.toSet(collect(a.name)) AS alist 
    MERGE (q)-[m1:METRICS_IN]->(a:Yea