In [1]:
import time, json, glob, os

import pandas as pd
import numpy as np

from py2neo import Graph, Node, Relationship
from tqdm import tqdm
from __future__ import print_function

from sklearn.datasets import fetch_mldata
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import seaborn as sns

In [6]:
graph = Graph("bolt://neo4j-magone:7687", auth=('neo4j','myneo'))

n_nodes = len(graph.nodes)
n_edges = len(graph.relationships)
print("Connected to graph with {:,} nodes and {:,} edges!".format(n_nodes, n_edges))

Connected to graph with 278,076,495 nodes and 1,842,505,331 edges!


In [7]:
def run_query(query, graph, print_query=False, run_query=True, 
              print_only=False, to_df=False):
    df = 1
    if print_only: 
        print_query = True
        run_query = False
    start_time = time.time()
    if print_query:
        print(query)
    if run_query:
        if to_df:
            df = graph.run(query).to_data_frame()
        else:
            graph.run(query)
    end_time = time.time()
    minutes_elapsed = (end_time-start_time)/60
    print("Query completed in {:.2f} minutes.".format(minutes_elapsed))
    return df

In [None]:
n_years_to_track = 15
citation_fun = lambda x: 'size((:Quanta {{year:q.year+{}}})-[:CITES]->(q)) as c{}'.format(x,x)
citation_string = '\n'+',\n'.join([citation_fun(i) for i in range(n_years_to_track)])

query = """
MATCH (q:Quanta) 
WITH q, size((:Quanta)-[:CITES]->(q)) as numCitations
ORDER BY numCitations DESC
LIMIT 1000
RETURN q.name, q.id, {}""".format(citation_string)

df = run_query(query, graph, to_df=True)