# Batches 2 Neo4j
Load batches of JSON data and populate in the Neo4j graph db using apoc procedure.

Aland Astudillo - 2023-10-05

In [5]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver(uri="bolt://localhost:7687", auth=("neo4j", "clavecita"), database='papers3')

In [6]:
session = driver.session()

In [7]:
query_paper = """
CALL apoc.load.json("file:///batches/batch_51.json")
YIELD value
WITH value
MERGE (paper:PAPER {name: value.title, code: value.id, doi: value.doi, url: value.url})
RETURN paper LIMIT 30
"""

In [4]:
# run the query to load a sample of papers
results = session.run(query_paper)

In [5]:
results.data()

[{'paper': {'code': '10.1145/22339.22340',
   'name': ['The information lens: an intelligent system for information sharing in organizations'],
   'url': 'http://dx.doi.org/10.1145/22339.22340',
   'doi': '10.1145/22339.22340'}},
 {'paper': {'code': '10.1108/imds-05-2017-0214',
   'name': ['Understanding adoption of intelligent personal assistants'],
   'url': 'http://dx.doi.org/10.1108/imds-05-2017-0214',
   'doi': '10.1108/imds-05-2017-0214'}},
 {'paper': {'code': '10.3897/biss.2.25268',
   'name': ['Supporting citizen scientists with automatic species identification using deep learning image recognition models'],
   'url': 'http://dx.doi.org/10.3897/biss.2.25268',
   'doi': '10.3897/biss.2.25268'}},
 {'paper': {'code': '10.1108/tqm-10-2019-0250',
   'name': ['New perspectives from technology adoption in senior cohousing facilities'],
   'url': 'http://dx.doi.org/10.1108/tqm-10-2019-0250',
   'doi': '10.1108/tqm-10-2019-0250'}},
 {'paper': {'code': '10.17223/22220836/39/6',
   'name'

In [8]:
# go through the JSON file batches

# define the number of iterations - has to be related to the number of total batches files
startpoint = 21
niter = 51 #51 

# query to add papers
query_p2 = """
YIELD value
WITH value
MERGE (paper:PAPER {name: value.title, code: value.id, doi: value.doi, url: value.url})
RETURN paper LIMIT 30
"""

# query to add organisations
query_p3 = """
YIELD value
WITH value.author AS authors
UNWIND authors AS au
WITH au
WHERE au<>"no"
UNWIND au.affiliation as affiliation
WITH affiliation.name as name
WHERE name IS NOT NULL 
MERGE (o:ORGANISATION {name: name})
RETURN o LIMIT 30
"""

# query to add authors
query_p4 = """
YIELD value
WITH value.author AS authors, value.id as code
UNWIND authors AS au
WITH au, code
WHERE au<>"no"
UNWIND au.affiliation as affiliation
MERGE (a:AUTHOR {name: COALESCE(au.given ,"") + ',' + COALESCE(au.family ,"")}) ON CREATE SET a.given = au.given, a.family = au.family, a.affiliation = affiliation.name           
MERGE (p:PAPER {code: code})
MERGE (p)-[:WRITTEN_BY {score: 1}]->(a)
WITH affiliation.name as affiname, p, a
WHERE affiname IS NOT NULL 
MERGE (o:ORGANISATION {name: affiname})
MERGE (a)-[:IS_PART_OF {score: 1}]->(o)
RETURN a, p, o LIMIT 30
"""

for i in range(startpoint, niter): # 1 and 2 ready, go from 2 to 51
    # add papers
    query_p1 = 'CALL apoc.load.json("file:///batches/batch_{}.json")'.format(i+1)
    print('Start Iteration ' + str(i+1) + ', File: ' + query_p1)

    query_paper = query_p1 + query_p2
    results1 = session.run(query_paper)
    print ('Papers ready')
    # add organisations
    query_organisation = query_p1 + query_p3
    results2 = session.run(query_organisation) # organisations without name are skiped (organisation.name =null)
    print ('Organisations ready')
    # add authors
    query_authors = query_p1 + query_p4
    results3 = session.run(query_authors) # authors when organisation without name are skiped (organisation.name =null)
    print ('Authors ready')
    #print('Iteration ' + str(i+1) + ', File: ' + query_p1, end='\r')
    print('End Iteration ' + str(i+1) + ', File: ' + query_p1)



Iteration 4, File: CALL apoc.load.json("file:///batches/batch_4.json")
Iteration 5, File: CALL apoc.load.json("file:///batches/batch_5.json")
Iteration 6, File: CALL apoc.load.json("file:///batches/batch_6.json")
Iteration 7, File: CALL apoc.load.json("file:///batches/batch_7.json")
Iteration 8, File: CALL apoc.load.json("file:///batches/batch_8.json")
Iteration 9, File: CALL apoc.load.json("file:///batches/batch_9.json")
Iteration 10, File: CALL apoc.load.json("file:///batches/batch_10.json")
Iteration 11, File: CALL apoc.load.json("file:///batches/batch_11.json")
Iteration 12, File: CALL apoc.load.json("file:///batches/batch_12.json")
Iteration 13, File: CALL apoc.load.json("file:///batches/batch_13.json")
Iteration 14, File: CALL apoc.load.json("file:///batches/batch_14.json")
Iteration 15, File: CALL apoc.load.json("file:///batches/batch_15.json")
Iteration 16, File: CALL apoc.load.json("file:///batches/batch_16.json")
Iteration 17, File: CALL apoc.load.json("file:///batches/batch_

KeyboardInterrupt: 

In [16]:
results1.data()

[{'paper': {'code': '10.1108/jhtt-12-2018-0118',
   'name': ['Artificial intelligence and big data in tourism: a systematic literature review'],
   'url': 'http://dx.doi.org/10.1108/jhtt-12-2018-0118',
   'doi': '10.1108/jhtt-12-2018-0118'}},
 {'paper': {'code': '10.2118/204672-ms',
   'name': ['Geomechanical Properties Estimation Utilizing Artificial Intelligence Prediction Tool'],
   'url': 'http://dx.doi.org/10.2118/204672-ms',
   'doi': '10.2118/204672-ms'}},
 {'paper': {'code': '10.20965/ijat.2023.p0091',
   'name': ['Special Issue on Application of Artificial Intelligence Techniques in Production Engineering'],
   'url': 'http://dx.doi.org/10.20965/ijat.2023.p0091',
   'doi': '10.20965/ijat.2023.p0091'}},
 {'paper': {'code': '10.1142/s0219265921430192',
   'name': ['Research on the Control Method of Unmanned Helicopter Under the Background of Artificial Intelligence'],
   'url': 'http://dx.doi.org/10.1142/s0219265921430192',
   'doi': '10.1142/s0219265921430192'}},
 {'paper': {'c

In [None]:
driver.close()  # close the driver object