# Research Graph - Papers Graph database example
Load json data from files and populate in the Neo4j graph db using apoc procedure.

Aland Astudillo - 2023-10-03

In [1]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver(uri="bolt://localhost:7687", auth=("neo4j", "clavecita"))

In [2]:
session = driver.session()

In [3]:
query_paper = """
CALL apoc.load.json("file:///paps/paper_3.json")
YIELD value
WITH value
MERGE (paper:PAPER {name: value.doi, code: value.id, doi: value.doi, url: value.url})
RETURN paper
"""

In [4]:
results = session.run(query_paper)

In [5]:
results.data()

[{'paper': {'code': '10.1142/s0218213019400086',
   'name': '10.1142/s0218213019400086',
   'url': 'http://dx.doi.org/10.1142/s0218213019400086',
   'doi': '10.1142/s0218213019400086'}}]

In [None]:
# testing iteration for queries
niter = 501629 #2000

for i in range(niter):
    # add paper
    query_p1 = 'CALL apoc.load.json("file:///paps/paper_{}.json")'.format(i+1)
    query_p2 = """
    YIELD value
    WITH value
    MERGE (paper:PAPER {name: value.doi, code: value.id, doi: value.doi, url: value.url})
    RETURN paper
    """
    query_paper = query_p1 + query_p2
    results = session.run(query_paper)
    #results.data()

    #check if author is "no"
    query_p1 = 'CALL apoc.load.json("file:///paps/paper_{}.json")'.format(i+1)
    query_p2 = """
    YIELD value
    WITH value.author AS author
    RETURN author
    """
    query_paper = query_p1 + query_p2
    results = session.run(query_paper)
    data = results.data()
    di = data[0]
    if di['author']=='no':
        continue
    
    # add organisations
    query_p2 = """
    YIELD value
    WITH value.author AS authors
    UNWIND authors AS au
    UNWIND au.affiliation as affiliation
    MERGE (o:ORGANISATION {name: affiliation.name})
    RETURN o
    """
    query_organisation = query_p1 + query_p2
    try:
        results = session.run(query_organisation) # organisations without name are skiped (organisation.name =null)
        pass
    except:
        # Continue to next iteration.
        continue
    
    # add authors
    query_p2 = """
    YIELD value
    WITH value.author AS authors, value.id as code
    UNWIND authors AS au
    UNWIND au.affiliation as affiliation
    MERGE (a:AUTHOR {name: COALESCE(au.given ,"") + ',' + COALESCE(au.family ,"")}) ON CREATE SET a.given = au.given, a.family = au.family, a.affiliation = affiliation.name           
    MERGE (p:PAPER {name: code})
    MERGE (o:ORGANISATION {name: affiliation.name})
    MERGE (p)-[:WRITTEN_BY]->(a)
    MERGE (a)-[:IS_PART_OF]->(o)
    RETURN a, p, o
    """
    query_authors = query_p1 + query_p2
    
    try:
        results = session.run(query_authors) # authors when organisation without name are skiped (organisation.name =null)
        pass
    except:
        # Continue to next iteration.
        continue
        
    print('Iteration ' + str(i+1) + ', File: ' + query_p1, end='\r')

Iteration 163159, File: CALL apoc.load.json("file:///paps/paper_163159.json")