In [39]:
from py2neo import Graph, Node, Relationship
import glob, os, time
 
graph = Graph('bolt://neo4j-magtwo:7687', auth=('neo4j','myneo'))
#graph = Graph('bolt://localhost:7687', auth=('neo4j', 'password'))

n_nodes = graph.database.primitive_counts['NumberOfNodeIdsInUse']
n_relationships = graph.database.primitive_counts['NumberOfRelationshipIdsInUse']
print("Connected to graph database with {:,} nodes and {:,} relationships!".format
     (n_nodes, n_relationships))

Connected to graph database with 439,366,337 nodes and 648,637,424 relationships!


In [None]:
#graph.delete_all()

## Setup Schema

### Add uniqueness constraints

In [41]:
contraint_node_property_pairs = [('n:Quanta', 'n.id'),
                                 ('t:Tag', 't.name'),
                                 ('a:Author', 'a.id'),
                                 ('o:Organization', 'o.name'),
                                 ('v:Venue', 'v.id'),
                                ('v:Venue', 'v.name')]
for n, p in contraint_node_property_pairs:
    query = "CREATE CONSTRAINT ON ({}) ASSERT {} IS UNIQUE;".format(n,p)
    print(query)
    graph.run(query)

CREATE CONSTRAINT ON (n:Quanta) ASSERT n.id IS UNIQUE;
CREATE CONSTRAINT ON (t:Tag) ASSERT t.name IS UNIQUE;
CREATE CONSTRAINT ON (a:Author) ASSERT a.id IS UNIQUE;
CREATE CONSTRAINT ON (o:Organization) ASSERT o.name IS UNIQUE;
CREATE CONSTRAINT ON (v:Venue) ASSERT v.id IS UNIQUE;
CREATE CONSTRAINT ON (v:Venue) ASSERT v.name IS UNIQUE;


### Create indices

In [5]:
indices_to_create = [':Quanta(year)', ':Quanta(lang)', ':Quanta(keywords)', 
                     ':Quanta(title)', ':Quanta(venue)', ':Quanta(doctype)',
                     ':Venue(normalizedName)', ':Year(value)',
                     ':Author(name)', ':Author(normalizedName)']
for index in indices_to_create:
    query = "CREATE INDEX ON {};".format(index)
    print(query)
    graph.run(query)

CREATE INDEX ON :Quanta(year);
CREATE INDEX ON :Quanta(lang);
CREATE INDEX ON :Quanta(keywords);
CREATE INDEX ON :Quanta(title);
CREATE INDEX ON :Quanta(venue);
CREATE INDEX ON :Quanta(doctype);
CREATE INDEX ON :Venue(name);
CREATE INDEX ON :Year(value);
CREATE INDEX ON :Author(name);
CREATE INDEX ON :Author(normalizedName);


## Import Data

### Setup directories etc

In [37]:
# Set data directory. In Neo4j 3.5+, the directory is automatically set to /import
data_dir = '' 
#data_dir = 'file:/Users/timholdsworth/code/scaling-science/notebooks/data/'

venues_file = data_dir + 'magtwo/mag_venues.txt'
papers_files = [data_dir + 'magtwo/mag_papers_{}.txt'.format(i) for i in range(11)]
authors_files = [data_dir + 'magtwo/mag_authors_{}.txt'.format(i) for i in range(13)]

v1_papers_files = [data_dir + 'magone/mag_papers_{}.txt'.format(i) for i in range(167)]
v1_papers_files_clean = [data_dir + 'magone/mag_papers_{}_clean.txt'.format(i) for i in range(167)]

print("Data directory set to `{}`.".format(data_dir))

def run_query(query, graph, print_query=False, run_query=True, print_only=False):
    if print_only: 
        print_query = True
        run_query = False
    start_time = time.time()
    if print_query:
        print(query)
    if run_query:
        graph.run(query)
    end_time = time.time()
    seconds_elapsed = end_time-start_time
    minutes_elapsed = (end_time-start_time)/60
    print("Query completed in {:.2f} seconds.".format(seconds_elapsed))

Data directory set to ``.


### Import venues as nodes


In [42]:
query = """ 
CALL apoc.periodic.iterate(
"CALL apoc.load.json('{}') YIELD value AS q RETURN q",
"CREATE (v:Venue {{id:q.id, journalId:q.JournalId, conferenceId:q.ConferenceId,
    name:q.DisplayName, normalizedName:q.NormalizedName}})", 
{{batchSize:10000, iterateList:true, parallel:true}});
""".format(venues_file)

run_query(query, graph)

Query completed in 2.22 seconds.


### Create year nodes

In [8]:
query = """
UNWIND range(1800, 2020) as yr
MERGE (y:Year {value: yr})
"""
run_query(query, graph)

Query completed in 0.44 seconds.


### Import quanta

In [9]:
#for file_name in papers_files:
for file_name in papers_files:
    print('Importing {}'.format(file_name))
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q",
    "CREATE (p:Quanta {{id:q.id, title:q.title, year:q.year, keywords:q.keywords,
        numCitations:q.n_citation, docType:q.doc_type, language:q.lang, 
        publisher:q.publisher, doi:q.doi, pdf:q.pdf, abstract:q.abstract}})",
    {{batchSize:10000, iterateList:true, parallel:true}})
    """.format(file_name)
    run_query(query, graph)
    
# CREATE (p:Quanta {{id:q.id, title:q.title, year:q.year, keywords:q.keywords,
#        numCitations:q.n_citation, docType:q.doc_type, language:q.lang, 
#        publisher:q.publisher, doi:q.doi, pdf:q.pdf, abstract:q.abstract

#MERGE (p:Quanta {{id:q.id}}) 
#        ON CREATE SET title=q.title, year=q.year, keywords=q.keywords,
#        numCitations=q.n_citation, docType=q.doc_type, language=q.lang, 
#        publisher=q.publisher, doi=q.doi, pdf=q.pdf, abstract=q.abstract

Importing magtwo/mag_papers_0.txt
Query completed in 627.13 seconds.
Importing magtwo/mag_papers_1.txt
Query completed in 686.22 seconds.
Importing magtwo/mag_papers_2.txt
Query completed in 688.79 seconds.
Importing magtwo/mag_papers_3.txt
Query completed in 692.98 seconds.
Importing magtwo/mag_papers_4.txt
Query completed in 674.89 seconds.
Importing magtwo/mag_papers_5.txt
Query completed in 906.51 seconds.
Importing magtwo/mag_papers_6.txt
Query completed in 789.81 seconds.
Importing magtwo/mag_papers_7.txt
Query completed in 878.28 seconds.
Importing magtwo/mag_papers_8.txt
Query completed in 1607.13 seconds.
Importing magtwo/mag_papers_9.txt
Query completed in 1362.97 seconds.
Importing magtwo/mag_papers_10.txt
Query completed in 45.50 seconds.


### Create relationships between quanta and year

In [None]:
for file_name in papers_files:
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "MATCH (p:Quanta {{id:q.id}})
    WITH p
    MATCH (y:Year {{value: p.year}})
    CREATE (p)-[:PUBLISHED_IN_YEAR]->(y)",
    {{batchSize:10000, iterateList:true, parallel:true}})
    """.format(file_name)
    run_query(query, graph)
    
# Changing q to p in value:p.year because year value on the node should be same
# as the year value on corresponding node in the map

### Create relationships between Quanta and Venue

In [43]:
for file_name in papers_files:
    print('Importing {}'.format(file_name))
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "MATCH (p:Quanta {{id:q.id}})
    WITH q, p
    UNWIND q.venue as venue
    WITH distinct q, p, venue
    MERGE (v:Venue {{name: venue.raw}})
    WITH p, v
    CREATE (p)-[:PUBLISHED_IN_VENUE]->(v)",
    {{batchSize:10000, iterateList:true, parallel:true}})
    """.format(file_name)
    run_query(query, graph, print_only=False)
    

Importing magtwo/mag_papers_0.txt

    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('magtwo/mag_papers_0.txt') YIELD value AS q RETURN q",
    "MATCH (p:Quanta {id:q.id})
    WITH q, p
    UNWIND q.venue as venue
    WITH distinct q, p, venue
    MERGE (v:Venue {name: venue.raw})
    WITH p, v
    CREATE (p)-[:PUBLISHED_IN_VENUE]->(v)",
    {batchSize:10000, iterateList:true, parallel:true})
    
Query completed in 0.00 seconds.
Importing magtwo/mag_papers_1.txt

    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('magtwo/mag_papers_1.txt') YIELD value AS q RETURN q",
    "MATCH (p:Quanta {id:q.id})
    WITH q, p
    UNWIND q.venue as venue
    WITH distinct q, p, venue
    MERGE (v:Venue {name: venue.raw})
    WITH p, v
    CREATE (p)-[:PUBLISHED_IN_VENUE]->(v)",
    {batchSize:10000, iterateList:true, parallel:true})
    
Query completed in 0.00 seconds.
Importing magtwo/mag_papers_2.txt

    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('magtwo/mag_papers_2.txt')

In [23]:
query = """
call apoc.periodic.iterate(
  'MATCH (q:Quanta)-[r:PUBLISHED_IN_VENUE]-(v:Venue)
   RETURN r',
  'DELETE r',
  {batchSize:100000, iterateList:true})
"""

run_query(query, graph)

Query completed in 1468.85 seconds.


### Import Authors

In [11]:
# updated version (experimental)
for file_name in authors_files: 
    print('Importing {}'.format(file_name))
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "MERGE (a:Author {{id:q.id, name:q.name}})
    SET a.normalizedName=q.normalized_name, a.position=q.position, a.lastAffiliation=q.org,
            a.numCitations=q.n_citation, a.numPublications=q.n_pubs, a.hIndex=q.h_index",
    {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(file_name)
    run_query(query, graph)

Importing magtwo/mag_authors_0.txt
Query completed in 774.70 seconds.
Importing magtwo/mag_authors_1.txt
Query completed in 876.17 seconds.
Importing magtwo/mag_authors_2.txt
Query completed in 846.06 seconds.
Importing magtwo/mag_authors_3.txt
Query completed in 885.65 seconds.
Importing magtwo/mag_authors_4.txt
Query completed in 907.23 seconds.
Importing magtwo/mag_authors_5.txt
Query completed in 862.75 seconds.
Importing magtwo/mag_authors_6.txt
Query completed in 938.40 seconds.
Importing magtwo/mag_authors_7.txt
Query completed in 930.07 seconds.
Importing magtwo/mag_authors_8.txt
Query completed in 947.97 seconds.
Importing magtwo/mag_authors_9.txt
Query completed in 1157.62 seconds.
Importing magtwo/mag_authors_10.txt
Query completed in 1295.23 seconds.
Importing magtwo/mag_authors_11.txt
Query completed in 1342.64 seconds.
Importing magtwo/mag_authors_12.txt
Query completed in 1094.48 seconds.


In [12]:
# New - create authored relationships
for file_name in authors_files: 
    print('Creating authored relationships for {}'.format(file_name))
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "MATCH (a:Author {{id:q.id}})
    WITH q, a
    UNWIND q.pubs as pubs
    WITH a, pubs
    MATCH (p:Quanta {{id:pubs.i}})
    WITH a, p, pubs
    MERGE (a)-[r:AUTHORED {{order: pubs.r}}]->(p)",
    {{batchSize:10000, iterateList:true, parallel:false}})
    """.format(file_name)
    run_query(query, graph)
    #graph.run(query)


Creating authored relationships for magtwo/mag_authors_0.txt
Query completed in 12384.75 seconds.
Creating authored relationships for magtwo/mag_authors_1.txt
Query completed in 5023.32 seconds.
Creating authored relationships for magtwo/mag_authors_2.txt
Query completed in 3520.30 seconds.
Creating authored relationships for magtwo/mag_authors_3.txt
Query completed in 3387.49 seconds.
Creating authored relationships for magtwo/mag_authors_4.txt
Query completed in 3514.79 seconds.
Creating authored relationships for magtwo/mag_authors_5.txt
Query completed in 2603.89 seconds.
Creating authored relationships for magtwo/mag_authors_6.txt
Query completed in 2428.73 seconds.
Creating authored relationships for magtwo/mag_authors_7.txt
Query completed in 2322.33 seconds.
Creating authored relationships for magtwo/mag_authors_8.txt
Query completed in 1601.37 seconds.
Creating authored relationships for magtwo/mag_authors_9.txt
Query completed in 1805.25 seconds.
Creating authored relationshi

In [52]:
query = """
MATCH (a:Author {id:'1000007673'}) RETURN (a)
""" 
a = graph.run(query).to_data_frame()
a.iloc[0].head()

a    {'name': '阚乃庆', 'id': '1000007673', 'normalize...
Name: 0, dtype: object

In [None]:
# Old
for file_name in papers_files:
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "MATCH (p:Quanta {{id:q.id}})
    WITH p
    MATCH (y:Year {{value: p.year}})
    CREATE (p)-[:PUBLISHED_IN_YEAR]->(y)",
    {{batchSize:10000, iterateList:true, parallel:true}})
    """.format(file_name)
    run_query(query, graph)
    
# Changing q to p in value:p.year because year value on the node should be same
# as the year value on corresponding node in the map

In [None]:
## USE TO UPDATE STUFF THAT WAS WRONG

for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "MATCH (a:Author {{id:q.id}})
    SET a.lastAffiliation=q.org, a.name:q.name, a.normalizedName=q.normalized_name",
    {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(file_name)
    run_query(query, graph)

### Create relationships between Authors and Quanta

In [4]:
# updated version (experimental)
for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "MATCH (a:Author {{id:q.id}})
    WITH q, a
    UNWIND q.pubs as pubs
    MATCH (p:Quanta {{id:pubs.i}})
    MERGE (a)-[r:AUTHORED]->(p)
    ON CREATE SET r.order = CASE WHEN pubs.r IS NULL THEN NULL ELSE pubs.r END",
    {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(file_name)
    run_query(query, graph)

Query completed in 458.34 seconds.
Query completed in 188.45 seconds.
Query completed in 182.63 seconds.
Query completed in 230.54 seconds.
Query completed in 318.06 seconds.
Query completed in 298.09 seconds.
Query completed in 292.88 seconds.
Query completed in 176.00 seconds.
Query completed in 134.22 seconds.
Query completed in 216.26 seconds.
Query completed in 223.89 seconds.
Query completed in 120.51 seconds.


In [None]:
# VERSION 2
for file_name in papers_files[0:1]:
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "MATCH (p:Quanta {{id:q.id}})
    WITH p, q, range(0, size(q.authors)) as is
    UNWIND is as i
    MATCH (a:Author)
    WHERE 
        CASE WHEN ((a.id IS NULL) OR (q.authors[i].id IS NULL))
        THEN (a.name=q.authors[i].name)
        ELSE (a.id=q.authors[i].id)
        END
    WITH p, a, i
    MERGE (p)-[r:AUTHORED]->(a)
    SET r.order = i",
    {{batchSize:10000, iterateList:true, parallel:true}})
    """.format(file_name)
    run_query(query, graph)

### Import Organizations

In [None]:
# Add organization nodes
for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "WHERE q.org is not null
        MERGE (o:Organization {{name: q.org}})",
        {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(data_dir + file_name)
    run_query(query, graph)

### Add relationships between Authors and Org

In [None]:
# Add relationships between authors and organizations
for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "MATCH (a:Author {{id: q.id}})
        MATCH (o:Organization {{name: q.org}})
        WITH q, a, o
        WHERE q.org is not null
        MATCH (o:Organization {{name: q.org}})
        MERGE (a)-[:AFFILIATED_WITH]->(o)",
        {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(data_dir + file_name)
    run_query(query, graph)

### Import Tags

In [None]:
# Add tags as nodes
for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "UNWIND q.tags as tags
        MERGE (t:Tag {{name: tags.t}})",
        {{batchSize:10000, iterateList:true, parallel:true}})
    """.format(data_dir + file_name)
    run_query(query, graph)

### Add relationships between Authors and Tags

In [None]:
# Add relationships between authors and tags
for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "UNWIND q.tags as tags
        WITH q, tags
        MATCH (a:Author {{id: q.id}})
        MATCH (t:Tag {{name: tags.t}})
        WITH a, t, i
        MERGE (a)-[r:HAS_TAG]->(t)
        SET r.weight = a.weight",
        {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(data_dir + file_name)
    run_query(query, graph)

### Import citation data from MAGv1

In [38]:
# Edit this so that it matches the ids from magv1 with the id properties on existing 
for file_name in v1_papers_files_clean:
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "MATCH (p:Quanta {{title: q.title}})
        UNWIND q.refs as ref
        WITH p, ref
        MATCH (b:Quanta {{title: ref}})
        CREATE (p)-[:CITES]->(b)",
        {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(data_dir + file_name)
    
    run_query(query, graph)

ClientError: Procedure Call Failed: Failed to invoke procedure `apoc.periodic.iterate`: Caused by: com.fasterxml.jackson.core.JsonParseException: Unexpected character ('e' (code 101)): was expecting comma to separate Array entries
 at [Source: (apoc.export.util.CountingInputStream); line: 15, column: 3441]

### Add coauthor relationships

In [None]:
query = """
CALL apoc.periodic.iterate(
    "MATCH (q:Quanta) WHERE size((q)<-[:AUTHORED]-()) > 1 RETURN q",
    "WITH [(q)<-[:AUTHORED]-(a) | a] as coAuthors
    UNWIND coAuthors as first
    UNWIND coAuthors as second
    WITH first, second
    WHERE id(first) < id(second)
    MERGE (first)-[r:COAUTHOR]-(second)
    SET r.strength = CASE WHEN r.strength IS NULL THEN 1 ELSE r.strength + 1 END",
{batchSize:10000, iterateList:true, parallel:true});
"""
run_query(query, graph, print_only=False)

# OLD IMPORTS BELOW HERE

### Import quanta

In [None]:
#for file_name in papers_files:
for file_name in papers_files:
    print('Importing {}'.format(file_name))
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "MERGE (p:Quanta {{id:q.id, title:q.title, year:q.year, keywords:q.keywords,
        numCitations:q.n_citation, docType:q.doc_type, language:q.lang, 
        publisher:q.publisher, doi:q.doi, pdf:q.pdf, abstract:q.abstract}})",
    {{batchSize:10000, iterateList:true, parallel:true}})
    """.format(file_name)
    run_query(query, graph)
    

### Delete relationships between Quanta and Venue

In [None]:
query = """
MATCH (:Quanta)-[r:PUBLISHED_IN_VENUE]-(:Venue) 
DELETE r
"""
run_query(query, graph)

### Import quanta, authors, and add relationships to author and venue nodes

In [None]:
for file_name in papers_files:
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "UNWIND q.id as id
    CREATE (p:Quanta {{id:id, title:q.title, year:q.year, keywords:q.keywords,
        numCitations:q.n_citation, docType:q.doc_type, language:q.lang, 
        publisher:q.publisher, doi:q.doi, pdf:q.pdf, abstract:q.abstract}})
    WITH q, p
    MATCH (y:Year {{value: q.year}})
    CREATE (p)-[:PUBLISHED_IN_YEAR]->(y)
    WITH q, p
    UNWIND q.venue as venue
    MATCH (v:Venue {{name: venue.raw }})
    CREATE (p)-[:PUBLISHED_IN_VENUE]->(v)
    WITH q, p
    UNWIND range(0, size(q.authors)-1) as i
    MERGE (a:Author {{id:q.authors[i].id, name:q.authors[i].name}})
    CREATE (a)-[:AUTHORED {{rank:i}}]->(p)",
    {{batchSize:10000, iterateList:true, parallel:false}});
    """.format(file_name)
    run_query(query, graph, print_only=False)

### Import Author, Tag, and Organization nodes and relationships between them

In [None]:
# Import authors and make relationships between authors and quanta
for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
    "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
    "MERGE (a:Author {{id:q.id, name:q.name}})
        SET a.normalizedName=q.normalized_name, a.position=q.position, 
            a.numCitations=q.n_citation, a.numPublications=q.n_pubs, a.hIndex=q.h_index)
    WITH q, a
    UNWIND q.pubs as pubs
    MATCH (p:Quanta {{id:pubs.i}})
    MERGE (a)-[r:AUTHORED]->(p)
    SET  rrank=pubs.r",
    {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(data_dir + file_name)
    run_query(query, graph)

In [None]:
# Add organization nodes
for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "CREATE (o:Organization {{name: q.org}})",
        {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(data_dir + file_name)
    run_query(query, graph)

In [None]:
# Add relationships between authors and organizations
for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "MATCH (a:Author {{normalizedName: q.normalized_name}})
        MATCH (o:Organization {{name: q.org}})
        WHERE q.org is not null
        CREATE (a)-[:AFFILIATED_WITH]->(o)",
        {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(data_dir + file_name)
    run_query(query, graph)

In [None]:
# Add tags as nodes
for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "UNWIND q.tags as tags
        MERGE (t:Tag {{name: tags.t}})",
        {{batchSize:10000, iterateList:true, parallel:true}})
    """.format(data_dir + file_name)
    run_query(query, graph)

In [None]:
# Add relationships between authors and tags
for file_name in authors_files: 
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "UNWIND q.tags as tags
        MATCH (a:Author {{normalizedName: q.normalized_name}})
        MATCH (t:Tag {{name: tags.t}})
        CREATE (a)-[:HAS_TAG {{weight: tags.weight}}]->(t)",
        {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(data_dir + file_name)
    run_query(query, graph)

### Import Citation Data from MAGv1 Data

In [None]:
# Edit this so that it matches the ids from magv1 with the id properties on existing 
for file_name in v1_papers_files:
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "MATCH (a:Quanta {{title: q.title}})
        WITH a
        UNWIND q.refs as ref
        MATCH (b:Quanta {{title: ref}})
        CREATE (a)-[:CITES]->(b)",
        {{batchSize:10000, iterateList:true, parallel:true}});
    """.format(data_dir + file_name)
    
run_query(query, graph)

### Add Coauthor Relationships

In [None]:
query = """
CALL apoc.periodic.iterate(
    "MATCH (q:Quanta) WHERE size((q)<-[:AUTHORED]-()) > 1 RETURN q",
    "WITH [(q)<-[:AUTHORED]-(a) | a] as coAuthors
    UNWIND coAuthors as first
    UNWIND coAuthors as second
    WITH first, second
    WHERE id(first) < id(second)
    MERGE (first)-[r:COAUTHOR]-(second)
    SET r.strength = CASE WHEN r.strength IS NULL THEN 1 ELSE r.strength + 1 END",
{batchSize:10000, iterateList:true, parallel:true});
"""
run_query(query, graph, print_only=False)