<h1 style="text-align:center;"><strong>Inserción de la data en Neo4j<strong></h1>

### Importar librerías

In [None]:
from py2neo import Graph
import time

### Definir las rutas de los archivos que contienen la data

In [None]:
base_path = "file:///"

In [None]:
affiliations_path = base_path+"affiliations.csv"
print(affiliations_path)

In [None]:
articles_path = base_path+"articles.csv"
print(articles_path)

In [None]:
authors_path = base_path+"authors.csv"
print(authors_path)

In [None]:
author_keywords_path = base_path+"author_keywords.csv"
print(author_keywords_path)

In [None]:
articles_affiliations_path = base_path+"articles_affiliations.csv"
print(articles_affiliations_path)

In [None]:
articles_authors_path = base_path+"articles_authors.csv"
print(articles_authors_path)

In [None]:
authors_affiliations_path = base_path+"authors_affiliations.csv"
print(authors_affiliations_path)

In [None]:
articles_author_keywords_path = base_path+"articles_author_keywords.csv"
print(articles_author_keywords_path)

In [None]:
collab_strengths_path = base_path+"collab_strength.csv"
print(collab_strengths_path)

### Configurar la conexión a la base de datos Neo4j

In [None]:
graph = Graph("bolt://20.127.223.251", auth=("neo4j", "narias98"))

### Eliminar toda la data de la base de datos de Neo4j

In [None]:
query = """
MATCH (n)
CALL {
  WITH n
  DETACH DELETE n
} IN TRANSACTIONS
"""
graph.run(query)

### Eliminar los constraints a la base de datos

In [None]:
#Elimina los constraints si existen
graph.run("DROP CONSTRAINT affiliationScopusIdConstraint IF EXISTS")
graph.run("DROP CONSTRAINT articleScopusIdConstraint IF EXISTS")
graph.run("DROP CONSTRAINT authorScopusIdConstraint IF EXISTS")
graph.run("DROP CONSTRAINT topicsNameConstraint IF EXISTS")

### Crear los constraints

In [None]:
#Timer Start
timerStart = time.time()

In [None]:
#Constraint del id de las afiliaciones
graph.run("CREATE CONSTRAINT affiliationScopusIdConstraint FOR (af:Affiliation) REQUIRE af.scopus_id IS UNIQUE")

In [None]:
#Constraint del id de los articulos
graph.run("CREATE CONSTRAINT articleScopusIdConstraint FOR (ar:Article) REQUIRE ar.scopus_id IS UNIQUE")

In [None]:
#Constraint del id de los autores
graph.run("CREATE CONSTRAINT authorScopusIdConstraint FOR (au:Author) REQUIRE au.scopus_id IS UNIQUE")

In [None]:
#Constraint del name de las author_keywords
graph.run("CREATE CONSTRAINT topicsNameConstraint FOR (t:Topic) REQUIRE t.name IS UNIQUE")

### Inserción de la data en Neo4j

In [None]:
#affiliations
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+affiliations_path+'"'+""" AS csvLine
CREATE (af:Affiliation {scopus_id: csvLine.identifier, 
name: csvLine.affiliation_name,
city: csvLine.city,
country: csvLine.country
})
RETURN count(af)
"""
graph.run(query)

---------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------

In [None]:
#articles
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_path+'"'+""" AS csvLine
CREATE (ar:Article {scopus_id: csvLine.identifier, 
title: csvLine.title,
publication_date: csvLine.publication_date,
doi: csvLine.doi,
abstract: csvLine.abstract,
author_count: csvLine.author_count,
affiliation_count: csvLine.affiliation_count,
corpus: csvLine.corpus
})
RETURN count(ar)
"""
graph.run(query)

---------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------

In [None]:
#authors
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+authors_path+'"'+""" AS csvLine
CREATE (au:Author {scopus_id: csvLine.identifier, 
first_name: csvLine.first_name,
last_name: csvLine.last_name,
auth_name: csvLine.auth_name,
initials: csvLine.initials
})
RETURN count(au)
"""
graph.run(query)

---------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------

In [None]:
#author_keywords
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+author_keywords_path+'"'+""" AS csvLine
CREATE (t:Topic {name: csvLine.name
})
RETURN count(t)
"""
graph.run(query)

---------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------

In [None]:
#articles_affiliations
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_affiliations_path+'"'+""" AS csvLine
MATCH (ar:Article {scopus_id: csvLine.article_id}), 
(af:Affiliation {scopus_id: csvLine.affiliation_id})
CREATE (ar)-[r:BELONGS_TO]->(af)
RETURN count(r)
"""
graph.run(query)

---------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------

In [None]:
#articles_authors
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_authors_path+'"'+""" AS csvLine
MATCH (au:Author {scopus_id: csvLine.author_id}), 
(ar:Article {scopus_id: csvLine.article_id})
CREATE (au)-[r:WROTE]->(ar)
RETURN count(r)
"""
graph.run(query)

---------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------

In [None]:
#authors_affiliations
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+authors_affiliations_path+'"'+""" AS csvLine
MATCH (au:Author {scopus_id: csvLine.author_id}), 
(af:Affiliation {scopus_id: csvLine.affiliation_id})
CREATE (au)-[r:AFFILIATED_WITH]->(af)
RETURN count(r)
"""
graph.run(query)

---------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------

In [None]:
#articles_author_keywords
query = """
LOAD CSV WITH HEADERS FROM """+'"'+articles_author_keywords_path+'"'+""" AS csvLine
CALL {
  WITH csvLine
  MATCH (ar:Article {scopus_id: csvLine.article_id}),
  (t:Topic {name:csvLine.author_keyword})
  CREATE (ar)-[r:USES]->(t)
} IN TRANSACTIONS
"""
graph.run(query)

---------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------

In [None]:
#collab_strength
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+collab_strengths_path+'"'+""" AS csvLine
MATCH (au1:Author {scopus_id: csvLine.au1}), 
(au2:Author {scopus_id: csvLine.au2})
CREATE (au1)-[r:CO_AUTHORED{collab_strength:csvLine.collab_strength}]->(au2)
RETURN count(r)
"""
graph.run(query)

---------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------

In [None]:
#Timer End
timerEnd = time.time()

### Tiempo de ejecución

In [None]:
print('Fecha de inicio:', time.ctime(timerStart)) 
print('Fecha de fin:', time.ctime(timerEnd)) 
print('Tiempo total de ejecución:', timerEnd-timerStart, 'segundos.') 