In [1]:
import json
import time
from functools import reduce

import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from tqdm.autonotebook import tqdm

from py2neo import Graph, Node, Relationship

In [2]:
public_address = '54.174.175.98'
graph = Graph('bolt://{}:7687'.format(public_address), auth=('neo4j','myneo'))

def run_query(query, graph, print_query=False, run_query=True, 
              print_only=False, to_df=False, verbose=True):
    df = 1
    if print_only: 
        print_query = True
        run_query = False
    start_time = time.time()
    if print_query:
        print(query)
    if run_query:
        if to_df:
            df = graph.run(query).to_data_frame()
        else:
            graph.run(query)
    end_time = time.time()
    minutes_elapsed = (end_time-start_time)/60
    if verbose:
        print("Query completed in {:.2f} minutes.".format(minutes_elapsed))
    return df

In [6]:
# Write community detection
min_year, max_year = 1900, 2020
for y in tqdm(range(min_year, max_year+1)):
    print("{}...".format(y))
        
    # Write edge list for graph at year=y
    query = """
    CALL algo.labelPropagation('
    MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WHERE y.year<={year}
    RETURN id(a) AS id
    ','
    MATCH (a1:Author)-[r:COAUTHOR]-(a2:Author)
    RETURN id(a1) AS source, id(a2) AS target, r.strength AS weight 
    ',{{graph:'cypher', direction:'BOTH', iterations:5,
        write:true, writeProperty:'temporary'}});
    """.format(year=y)
    run_query(query, graph, print_only=True)
    
    # Write results to METRICS_IN relationship
    query = """
    CALL apoc.periodic.iterate('
    MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WHERE y.year<={year}
    RETURN a
    ','
    WITH a
    MATCH (y:Year)
    WHERE y.year = {}
    MERGE (a)-[m:METRICS_IN]->(y)
    SET m.labelPropagation = q.temporary
    ',{{batchSize:10000, parallel:true}});
    """.format(y,y)
    run_query(query, graph, print_only=False)   
    
    
    
    
    
    
    
    
    
    

HBox(children=(IntProgress(value=0, max=121), HTML(value='')))

1900...

    CALL algo.labelPropagation('
    MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WHERE y.year<=1900
    RETURN id(a) AS id
    ','
    MATCH (a1:Author)-[r:COAUTHOR]-(a2:Author)
    RETURN id(a1) AS source, id(a2) AS target, r.strength AS weight 
    ',{graph:'cypher', direction:'BOTH', write:true, writeProperty:'temporary'});
    
Query completed in 0.00 minutes.
1901...

    CALL algo.labelPropagation('
    MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WHERE y.year<=1901
    RETURN id(a) AS id
    ','
    MATCH (a1:Author)-[r:COAUTHOR]-(a2:Author)
    RETURN id(a1) AS source, id(a2) AS target, r.strength AS weight 
    ',{graph:'cypher', direction:'BOTH', write:true, writeProperty:'temporary'});
    
Query completed in 0.00 minutes.
1902...

    CALL algo.labelPropagation('
    MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WHERE y.year<=1902
    RETURN id(a) AS id
    ','
    MATCH (a1:Author)-[r:COAUT