In [10]:
from neo4j import GraphDatabase, basic_auth
import os

driver = GraphDatabase.driver("bolt://localhost:7687",auth=basic_auth("neo4j", "Kaffe1234"))

In [11]:
def get_count_of_type(label, session):
    q = "MATCH (n:%s) RETURN count(n)" % label
    res = session.run(q)
    print("Added", res.value()[0], "nodes of type", label)
    
def get_count_of_relationship(rel_label, fromLabel, toLabel, session):
    q = "MATCH (:%s)-[r:%s]-(:%s) RETURN count(*)" % (fromLabel, rel_label, toLabel)
    res = session.run(q)
    print("Added", res.value()[0], "relationships of type", fromLabel, rel_label, toLabel)

def get_csv_path(csv_file):
    path_all_csv = os.path.realpath("Data/csvs/basal_ganglia/sources")
    return os.path.join(path_all_csv, csv_file).replace("\\","/")

In [12]:
# Deleting all source data in database
with driver.session() as session:
        session.run('''
            MATCH (n)
            WHERE  n:Source
                OR n:SourceOrigin
                OR n:ConsideredPaper
                OR n:ExclusionReason
            DETACH DELETE n
        ''')
        session.run('''
            DROP CONSTRAINT ON (n:Source) ASSERT n.id IS UNIQUE
            DROP INDEX ON :Cell(Source)
        ''')
        print("wipeddatabase")

wipeddatabase


In [13]:
# Adding Source to graph from sources.csv

csv_file_path = "file:///%s" % get_csv_path("sources.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        CREATE (:Source {id: row.ID, title: row.Source_title, type: row.Source_type, insertedData: row.Inserted_date, publicationYear: toInt(row.Source_publication_year), sourceName: row.Source_name})
    """ % csv_file_path

with driver.session() as session:
    session.run("CREATE INDEX ON :Source(name)")
    session.run("CREATE CONSTRAINT ON (n:Source) ASSERT n.id IS UNIQUE")
    session.run(query)
    get_count_of_type("Source", session)

Added 246 nodes of type Source


In [14]:
# Adding SourceOrigin to graph from sources.csv

csv_file_path = "file:///%s" % get_csv_path("source_origins_lookup.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        CREATE (:SourceOrigin {id: row.ID, name: row.Source_name, identifier: row.Identifier})
    """ % csv_file_path

with driver.session() as session:    
    session.run(query)
    get_count_of_type("SourceOrigin", session)

Added 363 nodes of type SourceOrigin


In [15]:
# Adding ConsideredPaper to graph from sources.csv
## TODO try to connect considered_paper and publication 
## based on first auth + year (published) with Source_name

csv_file_path = "file:///%s" % get_csv_path("considered_papers.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        CREATE (:ConsideredPaper {id: row.ID, title: row.Title, publishedYear: toInt(row.Published), firstAuthor: row.First_author, isIncluded: false})
    """ % csv_file_path

with driver.session() as session:    
    session.run(query)
    session.run("create index on :ConsideredPaper(id)")
    get_count_of_type("ConsideredPaper", session)
    
csv_file_path = "file:///%s" % get_csv_path("considered_papers_desicions.csv")

included_query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        WITH row WHERE row.Decision =~ ".*(?i)Included.*"
        MATCH (n:ConsideredPaper {id: row.Paper})
        SET n.isIncluded = true
        RETURN COUNT(n)
    """ % csv_file_path

with driver.session() as session:    
    res = session.run(included_query)
    print("Number of considered papers that are included:",  res.value()[0])

Added 2204 nodes of type ConsideredPaper
Number of considered papers that are included: 244


In [16]:
### Process considered paper decisions to find reasons where considered paper can get a relation to the reaseon.
## IF it doesnt have relation to excluded it  is included 

import pandas as pd
csv_file_path = "file:///%s" % get_csv_path("considered_papers_desicions.csv")
df = pd.read_csv(csv_file_path)

exclusion_reasons = df["Decision"].unique()

with driver.session() as session: 
    for reason in exclusion_reasons:
        if type(reason) == str and "included" not in reason.lower():

            q = "CREATE (:ExclusionReason {reason: '%s'})" % reason.replace("Excluded: ", "")
            session.run(q)
            
            # Relationship EXCLUSION_REASON between ConsideredPaper and ExclusionReason
            query="""
                    LOAD CSV WITH HEADERS FROM "%s" AS row
                    WITH row where row.Decision = '%s'
                    MATCH (a:ConsideredPaper { id: row.Paper})
                    MATCH (c:ExclusionReason { reason: '%s' })
                    MERGE (a)-[:EXCLUSION_REASON]->(c)
                """ % (csv_file_path, reason,  reason.replace("Excluded: ", ""))

            session.run(query)
    get_count_of_type("ExclusionReason", session)
    get_count_of_relationship("EXCLUSION_REASON", "ConsideredPaper", "ExclusionReason", session)



Added 13 nodes of type ExclusionReason
Added 2113 relationships of type ConsideredPaper EXCLUSION_REASON ExclusionReason


In [19]:
# Relationship COLLECTED_FROM between Source and SourceOrigin
csv_file_path = "file:///%s" % get_csv_path("sources.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        MATCH (a:Source { id: row.ID})
        MATCH (c:SourceOrigin { id: row.Source_origin })
        MERGE (a)-[:COLLECTED_FROM]->(c)
    """ % csv_file_path

with driver.session() as session:
    session.run(query)
    get_count_of_relationship("COLLECTED_FROM", "Source", "SourceOrigin", session)

Added 245 relationships of type Source COLLECTED_FROM SourceOrigin


In [20]:
# Relationship COLLECTED_FROM between ConsideredPaper and SourceOrigin
csv_file_path = "file:///%s" % get_csv_path("considered_papers.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        MATCH (a:ConsideredPaper { id: row.ID})
        MATCH (c:SourceOrigin { id: row.Journal })
        MERGE (a)-[:COLLECTED_FROM]->(c)
    """ % csv_file_path

with driver.session() as session:
    session.run(query)
    get_count_of_relationship("COLLECTED_FROM", "ConsideredPaper", "SourceOrigin", session)

Added 2134 relationships of type ConsideredPaper COLLECTED_FROM SourceOrigin
