In [1]:
import pandas as pd
from os import getcwd, listdir
from os.path import join, isfile, exists, abspath, pardir
from sys import path
import re

##### Configs

In [2]:
database = "sdm"
parent_dir = abspath(join(join(getcwd(), pardir), pardir))
data_dir = join(parent_dir, "data")
scripts_dir = join(parent_dir, "src", "scripts")

In [3]:
path.append(scripts_dir)
from connect import Neo4jConnection

In [4]:
driver = Neo4jConnection(uri="bolt://localhost:7687", user=None, pwd=None, database=database)

##### Helper methods

In [6]:
def run_query(query):
    """
    Basic wrapper around 'driver' object
    """
    return driver.query(query)

#### Delete everything

In [7]:
def delete_all_nodes():
    query = """
        MATCH(n) DETACH DELETE(n)
    """
    run_query(query=query)

In [8]:
delete_all_nodes()

### Create Constraints (Optional)

__Document__'s `document_id` should be unique

In [9]:
def create_document_unqiue_constraint():    
    query = """
        CREATE CONSTRAINT document_unqiue IF NOT EXISTS FOR (n: Document) REQUIRE (n.document_id) IS UNIQUE
    """
    run_query(query=query)

In [10]:
create_document_unqiue_constraint()

__Author__'s `document_id` should be unique

In [11]:
def create_author_id_unqiue_constraint():    
    query = """
        CREATE CONSTRAINT author_id_unqiue IF NOT EXISTS FOR (n: Author) REQUIRE (n.author_id) IS UNIQUE
    """
    run_query(query=query)

In [12]:
create_author_id_unqiue_constraint()

__Keyword__'s `name` should be unique

In [13]:
def create_keyword_unqiue_constraint():    
    query = """
        CREATE CONSTRAINT keyword_unqiue IF NOT EXISTS FOR (n: Keyword) REQUIRE (n.name) IS UNIQUE
    """
    run_query(query=query)

In [14]:
create_keyword_unqiue_constraint()

### Create Nodes

Create `Journal` nodes

In [15]:
def create_journal_nodes():    
    query = """
        LOAD CSV WITH HEADERS FROM "file:///Users/mohammadzainabbas/Downloads/sdm/journals.csv" AS x
        CREATE(n: Journal)
        SET n=x, n.year=toInteger(x.year)
    """
    run_query(query=query)

In [16]:
create_journal_nodes()

Create `Affiliation` nodes

In [17]:
def create_affiliation_nodes():    
    query = """
        LOAD CSV WITH HEADERS FROM "file:///Users/mohammadzainabbas/Downloads/sdm/affiliations.csv" AS x
        CREATE(n: Affiliation)
        SET n=x
    """
    run_query(query=query)

In [18]:
create_affiliation_nodes()

Create `Keyword` nodes

In [19]:
def create_keyword_nodes():    
    query = """
        LOAD CSV WITH HEADERS FROM "file:///Users/mohammadzainabbas/Downloads/sdm/keywords.csv" AS x
        CREATE(n: Keyword)
        SET n=x
    """
    run_query(query=query)

In [20]:
create_keyword_nodes()

Create `Author` nodes

In [21]:
def create_author_nodes():    
    query = """
        LOAD CSV WITH HEADERS FROM "file:///Users/mohammadzainabbas/Downloads/sdm/authors.csv" AS x
        WITH x, toInteger(x.author_id) AS author_id
        CREATE (n: Author)
        SET n=x, n.author_id=author_id
    """
    run_query(query=query)

In [22]:
create_author_nodes()

Create `Document` nodes

In [23]:
def create_document_nodes():    
    query = """
        LOAD CSV WITH HEADERS FROM "file:///Users/mohammadzainabbas/Downloads/sdm/documents.csv" AS x
        WITH x, toInteger(x.document_id) AS document_id
        CREATE (n: Document)
        SET n=x, n.document_id=document_id
    """
    run_query(query=query)

In [24]:
create_document_nodes()

### Create Relationships

Create relationship between `Document` and `Author` nodes

In [27]:
def create_document_author_relation():    
    query = """
        LOAD CSV WITH HEADERS FROM "file:///Users/mohammadzainabbas/Downloads/sdm/document_author.csv" AS x
        WITH toInteger(x.author_id) AS auth_id, toInteger(x.document_id) AS doc_id
        MATCH (a:Author {author_id: auth_id}), (b:Document {document_id: doc_id})
        CREATE (b)-[r:written_by]->(a)
    """
    run_query(query=query)

In [29]:
create_document_author_relation()

Create relationship between `Document` and `Keyword` nodes

In [30]:
def create_document_keyword_relation():    
    query = """
        LOAD CSV WITH HEADERS FROM "file:///Users/mohammadzainabbas/Downloads/sdm/document_keyword.csv" AS x
        WITH x, toInteger(x.document_id) AS doc_id
        MATCH (a:Document {document_id: doc_id}), (b:Keyword {name: x.keyword})
        CREATE (a)-[r:has]->(b)
    """
    run_query(query=query)

In [31]:
create_document_keyword_relation()

Create relationship between `Author` and `Keyword` nodes

In [32]:
def create_author_keyword_relation():    
    query = """
        LOAD CSV WITH HEADERS FROM "file:///Users/mohammadzainabbas/Downloads/sdm/author_keyword.csv" AS x
        WITH x, toInteger(x.author_id) AS auth_id
        MATCH (a:Author {author_id: auth_id}), (b:Keyword {name: x.keyword})
        CREATE (a)-[r:has]->(b)
    """
    run_query(query=query)

In [33]:
create_author_keyword_relation()

Create relationship between `Author` and `Affiliation` nodes

In [38]:
def create_author_affiliation_relation():
    query = """
        LOAD CSV WITH HEADERS FROM "file:///Users/mohammadzainabbas/Downloads/sdm/author_affiliation.csv" AS x
        WITH x, toInteger(x.author_id) AS auth_id
        MATCH (a:Author {author_id: auth_id}), (b:Affiliation {name: x.affiliation})
        CREATE (a)-[r:affiliated_with]->(b)
    """
    run_query(query=query)

In [39]:
create_author_affiliation_relation()

Create relationship between `Document` and `Journal` nodes

In [40]:
def create_document_journal_relation():
    query = """
        LOAD CSV WITH HEADERS FROM "file:///Users/mohammadzainabbas/Downloads/sdm/document_journal.csv" AS x
        WITH x, toInteger(x.document_id) AS doc_id
        MATCH (d:Document {document_id: doc_id}), (j:Journal {name: x.source_title})
        CREATE (d)-[r:published_in]->(j)
    """
    run_query(query=query)

In [41]:
create_document_journal_relation()