<h1 style="text-align:center;"><strong>Inserción de la data en Neo4j<strong></h1>

### Importar librerías

In [1]:
from py2neo import Graph
import time

### Definir las rutas de los archivos que contienen la data

In [2]:
base_path = "file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/"

In [3]:
affiliations_path = base_path+"affiliations.csv"
print(affiliations_path)

file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/affiliations.csv


In [4]:
articles_path = base_path+"articles.csv"
print(articles_path)

file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/articles.csv


In [5]:
authors_path = base_path+"authors.csv"
print(authors_path)

file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/authors.csv


In [6]:
author_keywords_path = base_path+"author_keywords.csv"
print(author_keywords_path)

file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/author_keywords.csv


In [7]:
articles_affiliations_path = base_path+"articles_affiliations.csv"
print(articles_affiliations_path)

file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/articles_affiliations.csv


In [8]:
articles_authors_path = base_path+"articles_authors.csv"
print(articles_authors_path)

file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/articles_authors.csv


In [9]:
authors_affiliations_path = base_path+"authors_affiliations.csv"
print(authors_affiliations_path)

file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/authors_affiliations.csv


In [10]:
articles_author_keywords_path = base_path+"articles_author_keywords.csv"
print(articles_author_keywords_path)

file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/articles_author_keywords.csv


### Configurar la conexión a la base de datos de Neo4j

In [11]:
graph = Graph("bolt://3.138.124.86:7687", auth=("neo4j", "narias98"))

### Eliminar toda la data de la base de datos de Neo4j

In [12]:
graph.run("MATCH (n) DETACH DELETE n")

### Eliminar los constraints a la base de datos

In [13]:
#Elimina los constraints si existen
graph.run("DROP CONSTRAINT affiliationScopusIdConstraint IF EXISTS")
graph.run("DROP CONSTRAINT articleScopusIdConstraint IF EXISTS")
graph.run("DROP CONSTRAINT authorScopusIdConstraint IF EXISTS")
graph.run("DROP CONSTRAINT topicsNameConstraint IF EXISTS")

### Crear los constraints

In [14]:
#Timer Start
timerStart = time.time()

In [15]:
#Constraint del id de las afiliaciones
graph.run("CREATE CONSTRAINT affiliationScopusIdConstraint ON (af:Affiliation) ASSERT af.scopus_id IS UNIQUE")

ClientError: [Statement.SyntaxError] Invalid constraint syntax, ON and ASSERT should not be used. Replace ON with FOR and ASSERT with REQUIRE. (line 1, column 1 (offset: 0))
"CREATE CONSTRAINT affiliationScopusIdConstraint ON (af:Affiliation) ASSERT af.scopus_id IS UNIQUE"
 ^

In [None]:
#Constraint del id de los articulos
graph.run("CREATE CONSTRAINT articleScopusIdConstraint ON (ar:Article) ASSERT ar.scopus_id IS UNIQUE")

In [None]:
#Constraint del id de los autores
graph.run("CREATE CONSTRAINT authorScopusIdConstraint ON (au:Author) ASSERT au.scopus_id IS UNIQUE")

In [16]:
#Constraint del name de las author_keywords
graph.run("CREATE CONSTRAINT topicsNameConstraint ON (t:Topic) ASSERT t.name IS UNIQUE")

ClientError: [Statement.SyntaxError] Invalid constraint syntax, ON and ASSERT should not be used. Replace ON with FOR and ASSERT with REQUIRE. (line 1, column 1 (offset: 0))
"CREATE CONSTRAINT topicsNameConstraint ON (t:Topic) ASSERT t.name IS UNIQUE"
 ^

### Insersión de la data en Neo4j

In [19]:
#affiliations
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+affiliations_path+'"'+""" AS csvLine
CREATE (af:Affiliation {scopus_id: csvLine.identifier, 
name: csvLine.affiliation_name,
city: csvLine.city,
country: csvLine.country
})
RETURN count(af)
"""
graph.run(query)

count(af)
5372


In [20]:
#articles
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_path+'"'+""" AS csvLine
CREATE (ar:Article {scopus_id: csvLine.identifier, 
title: csvLine.title,
publication_date: csvLine.publication_date,
doi: csvLine.doi,
abstract: csvLine.abstract,
author_count: csvLine.author_count,
affiliation_count: csvLine.affiliation_count,
corpus: csvLine.corpus
})
RETURN count(ar)
"""
graph.run(query)

count(ar)
39952


In [21]:
#authors
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+authors_path+'"'+""" AS csvLine
CREATE (au:Author {scopus_id: csvLine.identifier, 
first_name: csvLine.first_name,
last_name: csvLine.last_name,
auth_name: csvLine.auth_name,
initials: csvLine.initials
})
RETURN count(au)
"""
graph.run(query)

count(au)
39225


In [22]:
#author_keywords
query = """
LOAD CSV WITH HEADERS 
FROM """+'"'+author_keywords_path+'"'+""" AS csvLine
CREATE (t:Topic {name: csvLine.name
})
RETURN count(t)
"""
graph.run(query)

count(t)
86314


In [23]:
#articles_affiliations
query = """
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_affiliations_path+'"'+""" AS csvLine
MATCH (ar:Article {scopus_id: csvLine.article_id}), 
(af:Affiliation {scopus_id: csvLine.affiliation_id})
CREATE (ar)-[r:BELONGS_TO]->(af)
RETURN count(r)
"""
graph.run(query)

count(r)
55302


In [24]:
#articles_authors
query = """
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_authors_path+'"'+""" AS csvLine
MATCH (au:Author {scopus_id: csvLine.author_id}), 
(ar:Article {scopus_id: csvLine.article_id})
CREATE (au)-[r:WROTE]->(ar)
RETURN count(r)
"""
graph.run(query)

count(r)
102592


In [25]:
#authors_affiliations
query = """
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS 
FROM """+'"'+authors_affiliations_path+'"'+""" AS csvLine
MATCH (au:Author {scopus_id: csvLine.author_id}), 
(af:Affiliation {scopus_id: csvLine.affiliation_id})
CREATE (au)-[r:AFFILIATED_WITH]->(af)
RETURN count(r)
"""
graph.run(query)

count(r)
48362


In [26]:
#articles_author_keywords
query = """
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS 
FROM """+'"'+articles_author_keywords_path+'"'+""" AS csvLine
MATCH (ar:Article {scopus_id: csvLine.article_id}),
(t:Topic {name:csvLine.author_keyword})
CREATE (ar)-[r:USES]->(t)
RETURN count(r)
"""
graph.run(query)

count(r)
162829


In [27]:
#Timer End
timerEnd = time.time()

### Tiempo de ejecución

In [28]:
print('Fecha de inicio:', time.ctime(timerStart)) 
print('Fecha de fin:', time.ctime(timerEnd)) 
print('Tiempo total de ejecución:', timerEnd-timerStart, 'segundos.') 

Fecha de inicio: Fri Dec 30 11:35:06 2022
Fecha de fin: Fri Dec 30 11:35:31 2022
Tiempo total de ejecución: 25.309191942214966 segundos.
