<h1 style="text-align:center;"><strong>Inserción de la data en Neo4j<strong></h1>

### Importar librerías

In [42]:
from py2neo import Graph

### Definir las rutas de los archivos que contienen la data

In [43]:
base_path = "file:///D:/EPN/Tesis/API-Elsevier/src/data/completo/30-06-2021/"

In [44]:
affiliations_path = base_path+"affiliations.csv"
print(affiliations_path)

file:///D:/EPN/Tesis/API-Elsevier/src/data/completo/30-06-2021/affiliations.csv


In [45]:
articles_path = base_path+"articles.csv"
print(articles_path)

file:///D:/EPN/Tesis/API-Elsevier/src/data/completo/30-06-2021/articles.csv


In [46]:
authors_path = base_path+"authors.csv"
print(authors_path)

file:///D:/EPN/Tesis/API-Elsevier/src/data/completo/30-06-2021/authors.csv


In [47]:
author_keywords_path = base_path+"author_keywords.csv"
print(author_keywords_path)

file:///D:/EPN/Tesis/API-Elsevier/src/data/completo/30-06-2021/author_keywords.csv


In [48]:
articles_affiliations_path = base_path+"articles_affiliations.csv"
print(articles_affiliations_path)

file:///D:/EPN/Tesis/API-Elsevier/src/data/completo/30-06-2021/articles_affiliations.csv


In [49]:
articles_authors_path = base_path+"articles_authors.csv"
print(articles_authors_path)

file:///D:/EPN/Tesis/API-Elsevier/src/data/completo/30-06-2021/articles_authors.csv


In [50]:
authors_affiliations_path = base_path+"authors_affiliations.csv"
print(authors_affiliations_path)

file:///D:/EPN/Tesis/API-Elsevier/src/data/completo/30-06-2021/authors_affiliations.csv


In [51]:
articles_author_keywords_path = base_path+"articles_author_keywords.csv"
print(articles_author_keywords_path)

file:///D:/EPN/Tesis/API-Elsevier/src/data/completo/30-06-2021/articles_author_keywords.csv


### Configurar la conexión a la base de datos de Neo4j

In [52]:
graph = Graph("bolt://localhost:7687", auth=("neo4j", "narias"))

### Eliminar toda la data de la base de datos de Neo4j

In [53]:
graph.run("MATCH (n) DETACH DELETE n")

### Eliminar los constraints a la base de datos

In [54]:
#Elimina los constraints si existen
graph.run("DROP CONSTRAINT affiliationScopusIdConstraint IF EXISTS")
graph.run("DROP CONSTRAINT articleScopusIdConstraint IF EXISTS")
graph.run("DROP CONSTRAINT authorScopusIdConstraint IF EXISTS")
graph.run("DROP CONSTRAINT authorKeywordsIdConstraint IF EXISTS")

### Crear los constraints

In [55]:
#Constraint del id de las afiliaciones
graph.run("CREATE CONSTRAINT affiliationScopusIdConstraint ON (af:Affiliation) ASSERT af.scopus_id IS UNIQUE")

In [56]:
#Constraint del id de los articulos
graph.run("CREATE CONSTRAINT articleScopusIdConstraint ON (ar:Article) ASSERT ar.scopus_id IS UNIQUE")

In [57]:
#Constraint del id de los autores
graph.run("CREATE CONSTRAINT authorScopusIdConstraint ON (au:Author) ASSERT au.scopus_id IS UNIQUE")

In [58]:
#Constraint del id de los autores
graph.run("CREATE CONSTRAINT authorKeywordsIdConstraint ON (ak:AuthorKeyword) ASSERT ak.id IS UNIQUE")

### Insersión de la data en Neo4j

In [59]:
#affiliations
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+affiliations_path+'"'+""" AS csvLine
CREATE (af:Affiliation {scopus_id: csvLine.identifier, 
affiliation_name: csvLine.affiliation_name,
document_count: csvLine.document_count,
city: csvLine.city,
country: csvLine.country
})
"""
graph.run(query)

In [60]:
#articles
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_path+'"'+""" AS csvLine
CREATE (ar:Article {scopus_id: csvLine.identifier, 
affiliation_name: csvLine.affiliation_name,
title: csvLine.title,
publication_date: csvLine.publication_date,
doi: csvLine.doi,
abstract: csvLine.abstract,
author_count: csvLine.author_count,
affiliation_count: csvLine.affiliation_count
})
"""
graph.run(query)

In [61]:
#authors
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+authors_path+'"'+""" AS csvLine
CREATE (au:Author {scopus_id: csvLine.identifier, 
eid: csvLine.eid,
orcid: csvLine.orcid,
document_count: csvLine.document_count,
first_name: csvLine.first_name,
last_name: csvLine.last_name
})
"""
graph.run(query)

In [62]:
#author_keywords
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+author_keywords_path+'"'+""" AS csvLine
CREATE (ak:AuthorKeyword {id: csvLine.identifier,
name: csvLine.name
})
"""
graph.run(query)

In [63]:
#articles_affiliations
query = """
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_affiliations_path+'"'+""" AS csvLine
MATCH (ar:Article {scopus_id: csvLine.article_id}), 
(af:Affiliation {scopus_id: csvLine.affiliation_id})
CREATE (ar)-[:BELONG_TO]->(af)
"""
graph.run(query)

In [64]:
#articles_authors
query = """
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_authors_path+'"'+""" AS csvLine
MATCH (au:Author {scopus_id: csvLine.author_id}), 
(ar:Article {scopus_id: csvLine.article_id})
CREATE (ar)-[:WRITTEN_BY]->(au)
"""
graph.run(query)

In [65]:
#authors_affiliations
query = """
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS 
FROM """+'"'+authors_affiliations_path+'"'+""" AS csvLine
MATCH (au:Author {scopus_id: csvLine.author_id}), 
(af:Affiliation {scopus_id: csvLine.affiliation_id})
CREATE (au)-[:WORKED_AT]->(af)
"""
graph.run(query)

In [66]:
#articles_author_keywords
query = """
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_author_keywords_path+'"'+""" AS csvLine
MATCH (ar:Article {scopus_id: csvLine.article_id}), 
(ak:AuthorKeyword {id: csvLine.author_keyword_id})
CREATE (ak)-[:BELONG_TO]->(ar)
"""
graph.run(query)