In [46]:
from neo4j import GraphDatabase, basic_auth
import os

driver = GraphDatabase.driver("bolt://localhost:7687",auth=basic_auth("neo4j", "Kaffe1234"))

In [47]:
def get_count_of_type(label, session):
    q = "MATCH (n:%s) RETURN count(n)" % label
    res = session.run(q)
    print("Added", res.value()[0], "nodes of type", label)
    
def get_count_of_relationship(label, session):
    q = "MATCH ()-[r:%s]-() RETURN count(*)" %label
    res = session.run(q)
    print("Added", res.value()[0], "relationships of type", label)

def get_csv_path(csv_file):
    path_all_csv = os.path.realpath("Data/csvs/basal_ganglia/regions")
    return os.path.join(path_all_csv, csv_file).replace("\\","/")

In [98]:
## If newly imported csvs of brain region, remove prefix in names and merge to one csv
import pandas as pd

root = "Data/csvs/basal_ganglia/regions"

# Load region csvs, add preferred value and a prefix to the ID to keep them unique
df_region = pd.read_csv(root + "/regions.csv", dtype="object")
df_region["preferred"] = 1

df_region_other = pd.read_csv(root + "/regions_other.csv", dtype="object")
df_region_other["preferred"] = 0
# Couldnt do this bc foregin keys: df_region_other.ID = [10*x for x in df_region_other["ID"]]

df_both = df_region.append(df_region_other)

# Remove the prefix of the name as it only adds a relation to the nomenclature which we again add later
df_both.Region_name = [re.sub(r'\w*\_','', str(x)) for x in df_both['Region_name']]

# Store in common csv
df_both.to_csv(root + "/all_regions.csv", encoding='utf-8')
print("Csv with all regions fixed in all_regions.csv")

Csv with all regions fixed in all_regions.csv


In [99]:
# Deleting all region data in database
with driver.session() as session:
        session.run('''
            MATCH (n)
            WHERE  n:BrainRegion 
                OR n:RegionZone 
                OR n:RegionRecord 
                OR n:Nomenclature
            DETACH DELETE n
        ''')
        print("wipeddatabase")

wipeddatabase


In [100]:
# Adding all regions to graph with BrainRegion label
csv_file_path = "file:///%s" % get_csv_path("all_regions.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        CREATE (d:BrainRegion {id: row.ID, name: row.Region_name, abbreviation: row.Abbreviation, comments: row.Comments, preferred: row.preferred})
    """ % csv_file_path

with driver.session() as session:    
    session.run(query)
    get_count_of_type("BrainRegion", session)

Added 302 nodes of type BrainRegion


In [101]:
# Adding region_records to graph with RegionRecord label
csv_file_path = "file:///%s" % get_csv_path("region_records.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        CREATE (:RegionRecord {id: row.ID, name: row.Region_record_name, coverage: row.Coverage, specificity: row.Specificity, no_original_regions: row.No_original_regions, original_region_retained: row.Original_region_retained, parcellation_scheme: row.Parcellation_scheme, atlas_coordinates: row.Atlas_coordinates, illustration: row.Illustration,semantic_description: row.Semantic_description, annotated_images: row.Annotated_images,regional_characteristics: row.Regional_characteristics,atlas_reg: row.Atlas_reg,serial_sections: row.Serial_sections,collectors_comment: row.Collectors_comment,original_framework: row.Original_framework,documentation_score: row.Documentation_score})
    """ % csv_file_path

with driver.session() as session:    
    session.run(query)
    get_count_of_type("RegionRecord", session)

Added 313 nodes of type RegionRecord


In [102]:
# Adding nomenclatures to graph with Nomenclature label
csv_file_path = "file:///%s" % get_csv_path("nomenclatures.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        CREATE (:Nomenclature {id: row.ID, name: row.Nomenclature_name, version: row.Version, authors: row.Authors, published: row.Published, publication_type: row.Publication_type, doi: row.DOI})
    """ % csv_file_path

## TODO remember to add relationship to strain and species

with driver.session() as session:    
    session.run(query)
    get_count_of_type("Nomenclature", session)

Added 3 nodes of type Nomenclature


In [103]:
# Adding nomebclatures_other to graph with Nomenclature label
csv_file_path = "file:///%s" % get_csv_path("nomenclatures_other.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        CREATE (:Nomenclature {id: row.ID, name: row.Nomenclature_name, version: row.Version, authors: row.Authors, published: row.Published, publication_type: row.Publication_type })
    """ % csv_file_path

## TODO remember to add relationship to strain and species

with driver.session() as session:    
    session.run(query)
    get_count_of_type("Nomenclature", session)

Added 19 nodes of type Nomenclature


In [104]:
# Relationship PRIMARY_REGION between BrainRegion and RegionRecord
csv_file_path = "file:///%s" % get_csv_path("region_records.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        MATCH (a:RegionRecord {id: row.ID})
        MATCH (c:BrainRegion {id: row.Region, preferred: '1' })
        MERGE (a)-[:PRIMARY_REGION]->(c)
    """ % csv_file_path

with driver.session() as session:
    session.run(query)
    get_count_of_relationship("PRIMARY_REGION", session)

Added 624 relationships of type PRIMARY_REGION


In [105]:
# Relationship SECONDARY_REGION between BrainRegion and RegionRecord
csv_file_path = "file:///%s" % get_csv_path("region_records.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        MATCH (a:RegionRecord {id: row.ID})
        MATCH (c:BrainRegion {id: row.Secondary_region, preferred: '0' })
        MERGE (a)-[:SECONDARY_REGION]->(c)
    """ % csv_file_path

with driver.session() as session:
    session.run(query)
    get_count_of_relationship("SECONDARY_REGION", session)

Added 36 relationships of type SECONDARY_REGION


In [106]:
# Relationship NAMING between All Regions and Nomenclature
csv_file_path = "file:///%s" % get_csv_path("all_regions.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        MATCH (a:BrainRegion {id: row.ID})
        MATCH (c:Nomenclature {id: row.Nomenclature })
        MERGE (a)-[:NAMING]->(c)
    """ % csv_file_path

with driver.session() as session:
    session.run(query)
    get_count_of_relationship("NAMING", session)

Added 690 relationships of type NAMING


In [107]:
# Relationship ORIGINAL_FRAMEWORK between Regions_records and Nomenclatures_other
csv_file_path = "file:///%s" % get_csv_path("region_records.csv")
query="""
        LOAD CSV WITH HEADERS FROM "%s" AS row
        MATCH (a:RegionRecord {id: row.ID})
        MATCH (c:Nomenclature {id: row.Original_framework })
        MERGE (a)-[:ORIGINAL_FRAMEWORK]->(c)
    """ % csv_file_path

with driver.session() as session:
    session.run(query)
    get_count_of_relationship("ORIGINAL_FRAMEWORK", session)

Added 574 relationships of type ORIGINAL_FRAMEWORK


In [60]:
# queries to view

q = """
MATCH (n)
WHERE n:BrainRegion OR n:Nomenclature OR n:RegionRecord
RETURN n
"""