In [1]:
import json, time, os
from functools import reduce

import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from tqdm.autonotebook import tqdm

from py2neo import Graph, Node, Relationship

In [None]:
public_address = '54.174.175.98'
graph = Graph('bolt://{}:7687'.format(public_address), auth=('neo4j','myneo'))

def run_query(query, graph, print_query=False, run_query=True, 
              print_only=False, to_df=False, verbose=True):
    df = 1
    if print_only: 
        print_query = True
        run_query = False
    start_time = time.time()
    if print_query:
        print(query)
    if run_query:
        if to_df:
            df = graph.run(query).to_data_frame()
        else:
            graph.run(query)
    end_time = time.time()
    minutes_elapsed = (end_time-start_time)/60
    if verbose:
        print("Query completed in {:.2f} minutes.".format(minutes_elapsed))
    return df

In [None]:
# Write edge lists
min_year, max_year = 1900, 2020
for y in tqdm(range(min_year, max_year+1)):
    print("{}...".format(y))
        
    # Write edge list for graph at year=y
    query = """
    CALL apoc.export.csv.query('
    MATCH (z:Year)<-[:PUBLISHED_IN]-(a:Quanta)-[:CITES]->(b:Quanta)-[:PUBLISHED_IN]->(y:Year)
    WHERE z.year<={year} AND y.year<={year}
    RETURN id(a) as source, id(b) as target
    ','{out}',
    {{quotes:false}});
    """.format(year=y, out='/import/embeddings/quanta.{}.edgelist'.format(y))
    run_query(query, graph, print_only=True)

In [None]:
# Replace commas with spaces
find . -name 'quanta.*.edgelist' -exec sed -i "s/,/ /g" {} \;

In [None]:
for filename in quanta.*.edgelist; do
     tail -n +2 "$filename" > "$filename.tmp"
     mv -f "$filename.tmp" "$filename" 
done

In [None]:
# Run node2vec
for filename in quanta.*.edgelist; do
     ./node2vec -i:"$filename" -o:"$filename.emb" -d:12 -l:80 -dr -v
done

In [None]:
# Import results to graph
min_year, max_year = 1900, 1990
for y in tqdm(range(min_year, max_year+1)):
    print("{}...".format(y))
    
    edgelist = '/import/embeddings/quanta.{}.edgelist'.format(y)
    
    # Write edge list for graph at year=y
    query = """
    CALL apoc.load.csv('/import/embeddings/quanta.{year}.edgelist.emb', 
        {{sep:" ", header:false}})
    YIELD list 
    WITH apoc.convert.toInteger(head(list)) as nodeId, 
        [x IN tail(list) | apoc.convert.toFloat(x)] AS embedding
    MATCH (n) WHERE id(n)=nodeId
    MATCH (y:Year) WHERE y.year={year}
    MERGE(n)-[r:METRICS_IN]->(y)
    SET r.node2vec=embedding
    """.format(year=y)
    run_query(query, graph, print_only=False)