In [None]:
import time
import json
import pandas as pd
from py2neo import Graph, Node, Relationship

In [None]:
graph = Graph("bolt://neo4j-quanta:7687", auth=('neo4j','myneo'))

print("Connected to graph database with {:,} nodes and {:,} relationships!".format(
    graph.database.primitive_counts['NumberOfNodeIdsInUse'], 
    graph.database.primitive_counts['NumberOfRelationshipIdsInUse']))

## Citations

In [None]:
venue = "" # """(q.venue="CA: A Cancer Journal for Clinicians" OR q.venue="The New England Journal of Medicine" OR q.venue="The Lancet" OR q.venue="Chemical Reviews" OR q.venue="Nature Reviews Drug Discovery" OR q.venue="JAMA" OR q.venue="Nature Reviews Cancer" OR q.venue="Nature Reviews Immunology" OR q.venue="Nature" OR q.venue="Nature Reviews Genetics" OR q.venue="Science" OR q.venue="Chemical Society Reviews" OR q.venue="Nature Materials" OR q.venue="Nature Nanotechnology" OR q.venue="Lancet Oncology" OR q.venue="Reviews of Modern Physics" OR q.venue="Nature Biotechnology" OR q.venue="Nature Reviews Molecular Cell Biology" OR q.venue="Nature Reviews Neuroscience" OR q.venue="Nature Medicine" OR q.venue="Nature Photonics" OR q.venue="Nature Reviews Microbiology" OR q.venue="Cell" OR q.venue="Advances in Physics" OR q.venue="Energy and Environmental Science" OR q.venue="World Psychiatry") AND """
min_total_citations = 100
starting_year = 2003
ending_year = 2008
n_years = 5
cite_multiple = 2
min_multiple_count = 4
cite_count_string = "\t"+",\n\t".join(["SIZE((q)<-[:CITES]-(:Quanta {{year: q.year+{}}})) as c{}".format(i,i) for i in range(1,n_years+1)])
cite_multiple_string = "\t"+",\n\t".join(["CASE WHEN (c{} > {}*c{}) THEN 1 ELSE 0 END as b{}".format(i+1,cite_multiple,i,i) for i in range(1,n_years)])
c_string = ", ".join(["c{}".format(i) for i in range(1,n_years+1)])
b_string = "+".join(["b{}".format(i) for i in range(1,n_years)])

query = """
MATCH (q:Quanta) 
WHERE EXISTS(q.fos) AND (q.n_citation>{}) AND (q.year>={} AND q.year <= {}) {}
WITH q.title as title, q.venue as venue, q.year as year, q.n_citation as n_citation, q.pageRank_2018 as pagerank,
    {}
WITH *,
    {}
WHERE ({})>={}
RETURN title, venue, year, n_citation, {}, pagerank, {} as sum_bool
LIMIT 50
""".format(min_total_citations, starting_year, ending_year, venue, cite_count_string, cite_multiple_string, b_string, min_multiple_count, c_string, b_string)

print(query)
query_start_time = time.time()
df = graph.run(query).to_data_frame()
query_end_time = time.time()
print("Done in ({:.2f} minutes).".format((query_end_time-query_start_time)/60))

In [None]:
df

In [None]:
# Get citation counts for papers of interest
n_years = 6
cite_count_string = "\t"+",\n\t".join(["SIZE((q)<-[:CITES]-(:Quanta {{year: q.year+{}}})) as c{}".format(i,i) for i in range(1,n_years+1)])
c_string = ", ".join(["c{}".format(i) for i in range(1,n_years+1)])

title = "Fog computing and its role in the internet of things"
query = """
MATCH (q:Quanta {{title: "{}"}}) 
WITH q.title as title, q.venue as venue, q.year as year, q.n_citation as n_citation, q.pageRank_2018 as pagerank,
{}  
RETURN title, n_citation, {}, pagerank
""".format(title, cite_count_string, c_string)
# print(query)
df = graph.run(query).to_data_frame()
df.head()

In [None]:
# Get average number of citaitons for Nature and Science
n_years = 11
cite_count_string = "\t"+",\n\t".join(["SIZE((q)<-[:CITES]-(:Quanta {{year: q.year+{}}})) as c{}".format(i,i) for i in range(1,n_years+1)])
c_string = ", ".join(["c{}".format(i) for i in range(1,n_years+1)])
avg_c_string = ", ".join(["avg(c{})".format(i) for i in range(1,n_years+1)])

query = """
MATCH (q:Quanta) 
WHERE (q.venue="Nature" OR q.venue="Science") AND (q.year <= 2005 AND q.year >=1950)
WITH q.year as year, q.n_citation as n_citation, q.pageRank_2018 as pagerank,
{}
RETURN avg(year), avg(n_citation), avg(pagerank), {}
""".format(cite_count_string, avg_c_string)
# print(query)
df = graph.run(query).to_data_frame()
df.head()

In [None]:
df