In [67]:
import numpy as np

coordinates = np.ones([1000, 2])

def translate (x,y):
    dx = 5
    dy = 5
    return x + dx, y + dy

for i in range(0, 100):
    for j in range(0, 100):
        (x, y) = translate(i, j)
        np.append(coordinates, translate(x, y), axis=None)

print(coordinates)

[[ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]
 ..., 
 [ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]]


In [None]:
co

## Setup

## Connect to graph

In [3]:
from py2neo import Graph, Node, Relationship
import glob, os, time
 
graph = Graph('bolt://dev_neo4j:7687', auth=('neo4j','myneo'))

n_nodes = graph.database.primitive_counts['NumberOfNodeIdsInUse']
n_relationships = graph.database.primitive_counts['NumberOfRelationshipIdsInUse']
print("Connected to graph database with {:,} nodes and {:,} relationships!".format
     (n_nodes, n_relationships))

Connected to graph database with 255,334,318 nodes and 12,397,883 relationships!


## Setup workspace

In [68]:
## In Neo4j 3.5+, the directory is automatically set to /import
data_dir = '' 
scipy_data_dir = '/tmp/data/'

print("Neo4j data directory set to `{}`.".format(data_dir))
print("SciPy data directory set to `{}`.".format(scipy_data_dir))

# MAG files
mag_venues_file = data_dir + 'magtwo/mag_venues.txt'

n_mag_papers_files = len(glob.glob(scipy_data_dir + 'magtwo/mag_papers_*.txt'))
mag_papers_files = [data_dir + 'magtwo/mag_papers_{}.txt'.format(i) for i in range(n_mag_papers_files)]

n_mag_authors_files = len(glob.glob(scipy_data_dir + 'magtwo/mag_authors_*.txt'))
mag_authors_files = [data_dir + 'magtwo/mag_authors_{}.txt'.format(i) for i in range(n_mag_authors_files)]

n_magv1_papers_files = len(glob.glob(scipy_data_dir + 'magone/mag_papers_*.txt'))
mag_v1_papers_files = [data_dir + 'magone/mag_papers_{}.txt'.format(i) for i in range(n_magv1_papers_files)]

print("\nMAG files:")
print("\tVenue files: {}".format(1))
print("\tPapers files: {}".format(n_mag_papers_files))
print("\tAuthors files: {}".format(n_mag_authors_files))
print("\tMAGv1 papers files: {}".format(n_magv1_papers_files))

# AMner files
aminer_venues_file = data_dir + 'aminertwo/aminer_venues.txt'

n_aminer_papers_files = len(glob.glob(scipy_data_dir + 'aminertwo/aminer_papers_*.txt'))
aminer_papers_files = [data_dir + 'aminertwo/aminer_papers_{}.txt'.format(i) for i in range(n_aminer_papers_files)]

n_aminer_authors_files = len(glob.glob(scipy_data_dir + 'aminertwo/aminer_authors_*.txt'))
aminer_authors_files = [data_dir + 'aminertwo/aminer_authors_{}.txt'.format(i) for i in range(n_aminer_authors_files)]

n_aminer_v1_papers_files = len(glob.glob(scipy_data_dir + 'aminerone/aminer_papers_*.txt'))
aminer_v1_papers_files = [data_dir + 'aminerone/aminer_papers_{}.txt'.format(i) for i in range(n_aminer_v1_papers_files)]
      
print("\nAMiner files:")
print("\tVenue files: {}".format(1))
print("\tPapers files: {}".format(n_aminer_papers_files))
print("\tAuthors files: {}".format(n_aminer_authors_files))
print("\tAMinerv1 papers files: {}".format(n_aminer_v1_papers_files))

# Linking pairs
venue_linking_pairs = 'magtwo/venue_linking_pairs.txt'
paper_linking_pairs = 'magtwo/paper_linking_pairs.txt'
author_linking_pairs = 'magtwo/author_linking_pairs.txt'

def run_query(query, graph, print_query=False, run_query=True, print_only=False):
    if print_only: 
        print_query = True
        run_query = False
    start_time = time.time()
    if print_query:
        print(query)
    if run_query:
        graph.run(query)
    end_time = time.time()
    seconds_elapsed = end_time-start_time
    minutes_elapsed = (end_time-start_time)/60
    print("Query completed in {:.2f} seconds.".format(seconds_elapsed))

top_5 = ['Cell', 'Nature', 'Nature Biotechnology','Proceedings of the National Academy of Sciences of the United States of America','Science']
top_10 = ['Cell', 'Nature', 'Nature Biotechnology','Proceedings of the National Academy of Sciences of the United States of America','Science', 'Journal of the American Chemical Society', 'JAMA', 'The New England Journal of Medicine', 'Nature Genetics', 'Neuron']
top_42 = ['Angewandte Chemie','Blood','Cancer Cell','Cancer Discovery','Cancer Research','Cell','Cell Host & Microbe','Cell Metabolism','Cell Stem Cell','Chemistry & Biology','The EMBO Journal','Genes & Development','Immunity','Journal of Neurology','Journal of the American Chemical Society','JAMA','Journal of Biological Chemistry','Journal of Cell Biology','Journal of Clinical Investigation','Journal of Experimental Medicine','Journal of Medicinal Chemistry','The Lancet','Nature Cell Biology','Nature Chemical Biology','Nature Chemistry','Nature Medicine','Nature Methods','Nature','Nature Biotechnology','The New England Journal of Medicine','Neuron','Nature Genetics','Nature Immunology','Nature Neuroscience','Nature Structural & Molecular Biology','PLOS Biology','PLOS Genetics','PLOS Pathogens','Proceedings of the National Academy of Sciences of the United States of America','Science Signaling','Science Translational Medicine','Science']
top_42_mag_ids = ['172573765','168778273','3880285','103870658','49861241','200071133','106963461','139253143','151741590','175814247','67393510','2486275289','41063453','24207032','177533077','96410829','110447773','2298632','203256638','202193212','154343897','45757444','162030435','62468778','128124174','2004986','199671312','85134406','111155417','125754415','127916151','140251998','207313999','137905309','168522863','114430552','25671836','51309854','155937366','22947807','127827428','137773608']
top_42_aminer_ids = ['5451a5c4e0cf0b02b5f3821a','5451a5c5e0cf0b02b5f396be','5451a5c6e0cf0b02b5f3984f','5451a5c4e0cf0b02b5f38892','5451a5c4e0cf0b02b5f38092','5451a5c5e0cf0b02b5f39630','5451a5c7e0cf0b02b5f3ac13','5451a57ae0cf0b02b5f2dd9f','5451a5c4e0cf0b02b5f380b3','5451a5c4e0cf0b02b5f3935a','5451a5c4e0cf0b02b5f3844b','5451a5c6e0cf0b02b5f3986c','5451a5c5e0cf0b02b5f396fe','5451a5c4e0cf0b02b5f381da','5451a5c4e0cf0b02b5f38910','5451a5c4e0cf0b02b5f38093','5451a5c9e0cf0b02b5f3bcf1','5451a5c4e0cf0b02b5f388fa','5451a5c6e0cf0b02b5f3977e','5451a5c4e0cf0b02b5f38030','5451a5c4e0cf0b02b5f39329','5451a5c4e0cf0b02b5f38771','5451a5c4e0cf0b02b5f37fc6','5451a5c5e0cf0b02b5f3967a','5451a5c4e0cf0b02b5f384ad','5451a5c4e0cf0b02b5f39454','5451a5c4e0cf0b02b5f38097','5451a5c4e0cf0b02b5f3827c','5451a5c6e0cf0b02b5f39776','5451a5c4e0cf0b02b5f37c20','5451a57ae0cf0b02b5f2de43','5451a5c6e0cf0b02b5f3978e','5451a5c4e0cf0b02b5f3886a','5451a5c4e0cf0b02b5f37fc0','5451a5c5e0cf0b02b5f3970e','5451a5c5e0cf0b02b5f396c9','5451a5c5e0cf0b02b5f3973f','5451a5c4e0cf0b02b5f384c5','5451a5c6e0cf0b02b5f39887','5451a5c4e0cf0b02b5f387a9','5451a5c4e0cf0b02b5f38091','5451a5c4e0cf0b02b5f37dd5']
top_42_ids = top_42_mag_ids + top_42_aminer_ids

Neo4j data directory set to ``.
SciPy data directory set to `/tmp/data/`.

MAG files:
	Venue files: 1
	Papers files: 11
	Authors files: 13
	MAGv1 papers files: 167

AMiner files:
	Venue files: 1
	Papers files: 15
	Authors files: 20
	AMinerv1 papers files: 155


# Import functions

## Constraints and indices

In [None]:
def add_constraints(print_only=False):
    contraint_pairs = [('n:Quanta', 'n.id'),
                       ('n:Quanta', 'n.idv1'), 
                       ('a:Author', 'a.id'),
                       ('v:Venue', 'v.id')]
    
    for n, p in contraint_pairs:
        query = "CREATE CONSTRAINT ON ({}) ASSERT {} IS UNIQUE;".format(n,p)
        run_query(query, graph, print_only=print_only)
        
def add_indices(print_only=False):
    indices = [':Quanta(cleanName)',
               ':Venue(cleanName)', 
               ':Venue(matchingId)',
               ':Year(value)',
               ':Author(cleanName)',
               ':Organization(cleanName)']
    
    for index in indices:
        query = "CREATE INDEX ON {};".format(index)
        run_query(query, graph, print_only=print_only)

## (:Year)

In [None]:
def add_years(min_year, max_year, print_only=False):
    query = """
    UNWIND range({}, {}) as yr
    MERGE (y:Year {{value: yr}})
    """.format(min_year, max_year)
    run_query(query, graph, print_only=print_only)

## (:Venue)

In [None]:
def add_venues(venues_file, print_only=False):
    query = """ 
    CALL apoc.load.json('{}') YIELD value AS q
    CREATE (v:Venue {{id:q.id}})
        SET v.journalId = q.JournalId, 
            v.conferenceId = q.ConferenceId, 
            v.matchingId = coalesce(q.JournalId, q.ConferenceId, q.id),
            v.name = q.DisplayName, 
            v.normalizedName = q.NormalizedName,
            v.cleanName = apoc.text.clean(q.DisplayName);
    """.format(venues_file)
    run_query(query, graph, print_only=print_only)

## (:Quanta)

In [None]:

def add_quanta(papers_files, print_only=False):
    for file_name in papers_files:
        print('Importing {}'.format(file_name))
        query = """
        CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "WHERE 
            (q IS NOT NULL) AND 
            (size(q.title) <= 3000) AND
            exists(q.venue.id) AND
            (q.venue.id IN {})
         MERGE (p:Quanta:Aminer {{id:q.id}})
            SET p.title = q.title, 
                p.cleanName = apoc.text.clean(q.title),
                p.year = q.year, 
                p.numCitations = q.n_citation, 
                p.docType = q.doc_type, 
                p.language = q.lang, 
                p.publisher = q.publisher, 
                p.doi = q.doi, 
                p.pdf = q.pdf, 
                p.abstract = q.abstract;",
        {{batchSize:10000, iterateList:true, parallel:true}});
        """.format(file_name, top_42_aminer_ids)

        run_query(query, graph, print_only=print_only)

## (:Quanta {idv1})

In [None]:
def add_v1_ids(v1_papers_files, print_only=False):
    for file_name in v1_papers_files:
        query = """
        CALL apoc.periodic.iterate(
            "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
            "MATCH (p:Quanta:Aminer {{cleanName: apoc.text.clean(q.title)}})
             SET p.idv1 = q.id",
            {{batchSize:10000, iterateList:true, parallel:true}});
        """.format(data_dir + file_name, top_42)
        run_query(query, graph, print_only=print_only)

In [None]:
def merge_quanta_on_cleanname(print_only=False):
    query = """
    CALL apoc.periodic.iterate(
        "MATCH (q:Quanta:Aminer) 
         WHERE EXISTS(q.cleanName)
         WITH 
             q.cleanName as cleanName, 
             q.year as year, 
             q.venue as venue,
             COLLECT(q) as nodeList, 
             count(*) as count
         WHERE count>=2
         RETURN nodeList",
        "CALL apoc.refactor.mergeNodes(nodeList, 
         {properties:'discard', mergeRels:true})
         YIELD node
         RETURN 'none'",
        {batchSize:10000, iterateList:true, parallel:false});
    """
    run_query(query, graph, print_only=print_only)

## (:Quanta)-[:PUBLISHED_IN]->(:Year)

In [None]:
def add_quanta_year_edges(papers_files, print_only=False):
    for file_name in papers_files:
        query = """
        CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "WHERE 
            (q IS NOT NULL) AND 
            (size(q.title) <= 3000) AND
            exists(q.venue.id) AND
            (q.venue.id IN {})
         MATCH (p:Quanta:Aminer {{id:q.id}})
         MATCH (y:Year {{value: q.year}})
         CREATE (p)-[:PUBLISHED_IN]->(y)",
        {{batchSize:10000, iterateList:true, parallel:false}})
        """.format(file_name, top_42_ids)
        run_query(query, graph, print_only=print_only)

## (:Quanta)-[:PUBLISHED_IN]->(:Venue)

In [None]:
def add_quanta_venue_edges(papers_files, print_only=False):
    for file_name in papers_files:
        print('Processing {}'.format(file_name))

        # First run on all venues WITH id's 
        query = """
        CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') 
         YIELD value AS q 
         RETURN q",
        "WHERE 
            (q IS NOT NULL) AND 
            (size(q.title) <= 3000) AND
            exists(q.venue.id) AND
            (q.venue.id IN {})
         MATCH (p:Quanta:Aminer {{id:q.id}} )
         MATCH (v:Venue {{matchingId:q.venue.id}} )
         CREATE (p)-[:PUBLISHED_IN]->(v)",
        {{batchSize:10000, iterateList:true, parallel:false}})        
        """.format(file_name, top_42_ids)
        run_query(query, graph, print_only=print_only)
        
#         # Second, run on all venues WITHOUT id's
#         # This means that the :Venue is not in the graph
#         # So we want to MERGE it in
#         query = """
#         CALL apoc.periodic.iterate(
#         "CALL apoc.load.json('{}') 
#          YIELD value AS q 
#          RETURN q",
#         "WHERE not(exists(q.venue.id)) AND exists(q.venue.raw) 
#          MATCH (p:Quanta {{id:q.id}} )
#          MERGE (v:Venue {{cleanName:apoc.text.clean(q.venue.raw)}} )
#          CREATE (p)-[:PUBLISHED_IN]->(v)",
#         {{batchSize:5000, iterateList:true, parallel:false}})        
#         """.format(file_name)            
#         run_query(query, graph, print_only=print_only)

## (:Author)

In [None]:
def add_authors(authors_files, print_only=False):
    for file_name in authors_files: 
        print('Importing {}'.format(file_name))
        query = """
        CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "MERGE (a:Author:Aminer {{id:q.id}})
        SET a.name = q.name, 
            a.normalizedName = q.normalized_name, 
            a.cleanName = apoc.text.clean(q.normalized_name),
            a.position = q.position, 
            a.lastAffiliation = q.org,
            a.numCitations = q.n_citation, 
            a.numPublications = q.n_pubs, 
            a.hIndex=q.h_index",
        {{batchSize:10000, iterateList:true, parallel:true}});
        """.format(file_name)
        run_query(query, graph, print_only=print_only)

## (:Author)-[:AUTHORED]->(:Quanta)

In [None]:
def add_author_quanta_edges(authors_files, print_only=False):
    for file_name in authors_files: 
        print('Creating authored relationships for {}'.format(file_name))
        query = """
        CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
        "MATCH (a:Author:Aminer {{id:q.id}})
        UNWIND q.pubs as pub
        MATCH (p:Quanta:Aminer {{id:pub.i}})
        MERGE (a)-[r:AUTHORED]->(p)
        ON CREATE SET r.order = CASE WHEN pub.r IS NULL THEN NULL ELSE pub.r END",
        {{batchSize:10000, iterateList:true, parallel:false}});
        """.format(file_name)
        run_query(query, graph, print_only=print_only)

## (:Organization)

In [1]:
def add_organizations(authors_files, print_only=False):
    for file_name in authors_files: 
        query = """
        CALL apoc.periodic.iterate(
            "CALL apoc.load.json('{}') 
             YIELD value AS q              
             RETURN q",
            "WHERE q.org is not null
             MATCH (:Author:Aminer {{id:q.id}})
             MERGE (o:Organization:Aminer {{cleanName: apoc.text.clean(q.org)}})
             ON CREATE SET o.name = q.org",
            {{batchSize:5000, iterateList:true, parallel:false}});
        """.format(data_dir + file_name)
        run_query(query, graph, print_only=print_only)

## (:Author)-[:AFFILIATED_WITH]->(:Organization)

In [6]:
def add_author_organization_edges(authors_files, print_only=False):
    for file_name in authors_files: 
        query = """
        CALL apoc.periodic.iterate(
            "CALL apoc.load.json('{}') 
             YIELD value AS q 
             RETURN q",
            "WHERE q.org IS NOT NULL
             MATCH (a:Author:Aminer {{id: q.id}})
             MATCH (o:Organization:Aminer {{cleanName: apoc.text.clean(q.org)}})
             MERGE (a)-[:AFFILIATED_WITH]->(o)",
            {{batchSize:5000, iterateList:true, parallel:false}});
        """.format(data_dir + file_name)
        run_query(query, graph, print_only=print_only)

## (:Tag)

In [None]:
def add_tags(authors_files, print_only=False):
    for file_name in authors_files: 
        query = """
        CALL apoc.periodic.iterate(
            "CALL apoc.load.json('{}') 
             YIELD value AS q 
             RETURN q",
            "WHERE exists(q.tags)
             MATCH (a:Author {{id: q.id}})
             UNWIND q.tags as tag
             MERGE (t:Tag {{cleanName: apoc.text.clean(tag.t)}})
             ON CREATE SET t.name = tag.t",
            {{batchSize:5000, iterateList:true, parallel:true}})
        """.format(data_dir + file_name)
        run_query(query, graph, print_only=print_only)

## (:Author)-[:HAS_TAG]->(:Tag)

In [None]:
def add_author_tag_edges(authors_files, print_only=False):
    for file_name in authors_files: 
        print("Processing {}...".format(file_name))    
        
        query = """
        CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') 
         YIELD value AS q 
         RETURN q",
        "WHERE q.tags IS NOT NULL 
         MATCH (a:Author {{id:q.id}})
         UNWIND q.tags as tag
         MATCH (t:Tag {{cleanName:apoc.text.clean(tag.t)}})
         MERGE (a)-[:HAS_TAG]->(t)",
        {{batchSize:10000, iterateList:true, parallel:false}});
        """.format(file_name)
        run_query(query, graph, print_only=print_only)

## (:Quanta)-[:HAS_TAG]->(:Tag)

In [None]:
def add_mag_quanta_tag_edges(papers_files, print_only=False):
    for file_name in authors_files: 
        print("Processing {}...".format(file_name))    
        query = """
        CALL apoc.periodic.iterate(
        "CALL apoc.load.json('{}') 
         YIELD value AS q 
         RETURN q",
        "WHERE q.keywords IS NOT NULL 
         MATCH (q:Quanta {{id:q.idv1}})
         UNWIND q.keywords as tag
         MERGE (t:Tag {{cleanName:apoc.text.clean(tag.t)}})
         MERGE (q)-[:HAS_TAG]->(t)",
        {{batchSize:5000, iterateList:true, parallel:false}});
        """.format(file_name)
        run_query(query, graph, print_only=print_only)

## (:Quanta)-[:CITES]->(:Quanta)

In [None]:
def add_quanta_quanta_edges(v1_papers_files, print_only=False):
    for file_name in v1_papers_files:
        print("Processing {}...".format(file_name))
        query = """
        CALL apoc.periodic.iterate(
            "CALL apoc.load.json('{}') YIELD value AS q RETURN q",
            "MATCH (p:Quanta {{idv1: q.id}})
            UNWIND q.references as ref
            WITH p, ref
            MATCH (b:Quanta {{idv1: ref}})
            MERGE (p)-[:CITES]->(b)",
            {{batchSize:5000, iterateList:true, parallel:false}});
        """.format(data_dir + file_name)
        run_query(query, graph, print_only=print_only)

## (:Author)-[:COAUTHOR]-(:Author)

In [None]:
def add_author_author_edges(print_only=False):
    query = """
    CALL apoc.periodic.iterate(
        "MATCH (q:Quanta:Aminer) WHERE size((q)<-[:AUTHORED]-()) > 1 RETURN q",
        "WITH [(q)<-[:AUTHORED]-(a) | a] as coAuthors
        UNWIND coAuthors as first
        UNWIND coAuthors as second
        WITH first, second
        WHERE id(first) < id(second)
        MERGE (first)-[r:COAUTHOR]-(second)
        SET r.strength = CASE WHEN r.strength IS NULL THEN 1 ELSE r.strength + 1 END",
    {batchSize:5000, iterateList:true, parallel:false});
    """
    run_query(query, graph, print_only=print_only)

# Other updates

In [None]:
def cull_isolated_quanta(print_only=False):
    query = """
    CALL apoc.periodic.iterate(
        "MATCH (q:Quanta) WHERE size((q)-[:CITES]-())=0 RETURN q",
        "WITH [(q)<-[:AUTHORED]-(a) | a] as coAuthors
        UNWIND coAuthors as first
        UNWIND coAuthors as second
        WITH first, second
        WHERE id(first) < id(second)
        MERGE (first)-[r:COAUTHOR]-(second)
        SET r.strength = CASE WHEN r.strength IS NULL THEN 1 ELSE r.strength + 1 END",
    {batchSize:10000, iterateList:true, parallel:false});
    """
    run_query(query, graph, print_only=print_only)

## Merge nodes that occur in both MAG and AMiner databases

In [None]:
def merge_mag_aminer_venues(print_only=False):
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('magtwo/venue_linking_pairs.txt') 
         YIELD value as q
         RETURN q",
        "MATCH (m:Venue {id:q.mid})
         MATCH (a:Venue {id:q.aid})
         CALL apoc.refactor.mergeNodes(collect([m, a]), 
             {{properties:'combine', mergeRels:true}}) YIELD node
         RETURN 'none'",
        {{batchSize:10000, iterateList:true, parallel:true}});
        """ 
    run_query(query, graph, print_only=print_only)

In [None]:
def merge_mag_aminer_authors(print_only=False):
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('magtwo/author_linking_pairs.txt') 
         YIELD value as q
         RETURN q",
        "MATCH (m:Author {id:q.mid})
         MATCH (a:Author {id:q.aid})
         CALL apoc.refactor.mergeNodes(collect([m, a]), 
             {{properties:'combine', mergeRels:true}}) YIELD node
         RETURN 'none'",
        {{batchSize:10000, iterateList:true, parallel:true}});
        """
    run_query(query, graph, print_only=print_only)

In [None]:
def merge_mag_aminer_papers(print_only=False):
    query = """
    CALL apoc.periodic.iterate(
        "CALL apoc.load.json('magtwo/paper_linking_pairs.txt') 
         YIELD value as q
         RETURN q",
        "MATCH (m:Quanta {id:q.mid})
         MATCH (a:Quanta {id:q.aid})
         CALL apoc.refactor.mergeNodes(collect([m, a]), 
             {{properties:'discard', mergeRels:true}}) YIELD node
         RETURN 'none'",
        {{batchSize:10000, iterateList:true, parallel:true}});
        """
    run_query(query, graph, print_only=print_only)

# Run Import

## Setup database

In [None]:
add_constraints(print_only=False)
add_indices(print_only=False)
add_years(1700, 2020)

## Import MAG

In [None]:
add_venues(mag_venues_file)

In [None]:
add_quanta(mag_papers_files, print_only=False)

In [None]:
merge_quanta_on_cleanname(print_only=False)
add_v1_ids(mag_v1_papers_files, print_only=False)

In [None]:
add_quanta_year_edges(mag_papers_files, print_only=True)

In [None]:
add_quanta_venue_edges(mag_papers_files, print_only=True)
# NEED TO RE ADD

In [None]:
add_authors(mag_authors_files, print_only=True)

In [None]:
add_author_quanta_edges(mag_authors_files, print_only=True)

In [None]:
add_organizations(mag_authors_files)

In [None]:
add_author_organization_edges(mag_authors_files)

In [None]:
add_tags(mag_authors_files)

In [None]:
add_author_tag_edges(mag_authors_files)

In [None]:
add_quanta_quanta_edges(mag_v1_papers_files)

## Import AMiner

In [None]:
add_venues(aminer_venues_file)

In [None]:
add_quanta(aminer_papers_files)

In [None]:
add_v1_ids(aminer_v1_papers_files)

In [None]:
add_quanta_year_edges(aminer_papers_files)

In [None]:
add_quanta_venue_edges(aminer_papers_files)
# NEED TO RE ADD

In [None]:
add_authors(aminer_authors_files)

In [None]:
add_author_quanta_edges(aminer_authors_files)

In [None]:
add_organizations(aminer_authors_files)

In [None]:
add_author_organization_edges(aminer_authors_files)

In [None]:
add_tags(aminer_authors_files)

In [None]:
add_author_tag_edges(aminer_authors_files)
# adding

In [None]:
add_quanta_quanta_edges(aminer_v1_papers_files)

## Further additions and modifications

In [None]:
add_author_author_edges()

In [None]:
merge_mag_aminer_venues(venue_linking_pairs)
#Not done

In [None]:
merge_mag_aminer_papers(paper_linking_pairs)
#Not done

In [None]:
merge_mag_aminer_authors(author_linking_pairs)
#Not done

In [None]:
# MErge duplicate venues, organizations, authors?

# MERGE DUPLICATE CITATIONS 