# Load cell definitions from Interlex

This code loads cell definitions from Interlex provided through the SciCruch API

In [45]:
import os 

def get_csv_path(csv_file):
    path_all_csv = os.path.realpath("Data/csvs/basal_ganglia/cells")
    return os.path.join(path_all_csv, csv_file).replace("\\","/")

In [46]:
import urllib, json
from dotenv import load_dotenv

load_dotenv()

apiToken = os.getenv("SCICRUNCH_API_TOKEN")

headers = {
    "x-api-key": apiToken
}

constData = {}

def getSearchRes(curie):
    # Swagger: https://scicrunch.org/browse/api-docs/index.html?url=https://scicrunch.org/swagger-docs/swagger.json
    # https://scicrunch.org/api/1/term/ilx/ilx_0106919
    # ALso definition: https://knowledge-space.org/entity/find-by-slug?id=astrocyte (case sensitive)
    #url = "http://maps.googleapis.com/maps/api/geocode/json?address=googleplex&sensor=false"
    
    # Use this: https://scicrunch.org/api/1/ilx/search/curie/SAO%3A844118448
    url = "https://scicrunch.org/api/1/ilx/search/curie/%s" % curie
    req = urllib.request.Request(url, headers=headers)
    response = urllib.request.urlopen(req)
    
    data = json.loads(response.read())
    resData = data["data"]
    
    definition = ""
    definitionRef = ""
    scId = ""
    if(resData != None):
        scId = resData["id"]
        definition = resData["definition"]
        exisiting_ids = resData["existing_ids"]
        if(exisiting_ids != None and len(exisiting_ids) > 0):
            definitionRef = exisiting_ids[0]["iri"]
            
    return (scId, definition, definitionRef)


#constData = getSearchRes("Microglia","SAO:789292116")

In [56]:
## Store in csv

import pandas as pd

cell_classes_csv = pd.read_csv(get_csv_path("cell_classes.csv"))
cell_types_csv = pd.read_csv(get_csv_path("cell_types.csv"))
cell_groups_csv = pd.read_csv(get_csv_path("cell_groups.csv"))

descId_columns, desc_columns,iri_columns = [], [], []
cell_type_ids, cell_class_ids, cell_group_ids =  [], [], []

df_obj = {
    "id": descId_columns,
    "description": desc_columns,
    "iri": iri_columns,
    "cell_type_id": cell_type_ids,
    "cell_class_id": cell_class_ids,
    "cell_group_id": cell_group_ids
}

def retrieveDesc(row, cell_type):
    if(pd.isna(row["Ontological_identifier"])):
        return
    print(row["Ontological_identifier"] )
    scId, desc, descRef = getSearchRes(row["Ontological_identifier"])
    print(scId, descRef)
    
    if(scId in df_obj["id"]):
        index =  df_obj["id"].index(scId)
        
        if cell_type == "type":
            df_obj["cell_type_id"][index] = row["ID"]
        elif cell_type == "class":
            df_obj["cell_class_id"][index] = row["ID"]
        elif cell_type == "group":
            df_obj["cell_group_id"][index] = row["ID"]

    else:
        df_obj["id"].append(scId)
        df_obj["description"].append(desc)
        df_obj["iri"].append(descRef)

        df_obj["cell_type_id"].append(row["ID"] if cell_type == "type" else "")
        df_obj["cell_class_id"].append(row["ID"] if cell_type == "class" else "")
        df_obj["cell_group_id"].append(row["ID"] if cell_type == "group" else "")
    
    
for index, row in cell_classes_csv.iterrows():
    retrieveDesc(row, "class")

for index, row in cell_types_csv.iterrows():
    retrieveDesc(row, "type")
    
for index, row in cell_groups_csv.iterrows():
    retrieveDesc(row, "group")
    
df_descriptions_obj = {
    "id": descId_columns,
    "description": desc_columns,
    "iri": iri_columns,
    "cell_type_id": cell_type_ids,
    "cell_class_id": cell_class_ids,
    "cell_group_id": cell_group_ids
}

df_descriptions = pd.DataFrame(df_descriptions_obj)
df_descriptions.to_csv(get_csv_path("cell_description.csv"), index=False)


ILX:0109349
9350 http://uri.interlex.org/base/ilx_0109349
NLXCELL:1003113
5665 http://uri.neuinfo.org/nif/nifstd/nlx_cell_1003113
SAO:789292116
6920 http://uri.interlex.org/base/ilx_0106919
SAO:1394521419
948 http://uri.interlex.org/base/ilx_0100947
SAO:844118448
7992 http://uri.interlex.org/base/ilx_0107991
NLX:148005
2132 http://uri.neuinfo.org/nif/nifstd/nlx_148005
ILX:0107497
7498 http://uri.interlex.org/base/ilx_0107497
SAO:313023570
4635 http://uri.interlex.org/base/ilx_0104634
NLX:148043
4506 http://uri.neuinfo.org/nif/nifstd/nlx_148043
SAO:1394521419
948 http://uri.interlex.org/base/ilx_0100947
SAO:185843373
6443 http://uri.interlex.org/base/ilx_0106442
SAO:789292116
6920 http://uri.interlex.org/base/ilx_0106919
NLX:147837
10555 http://uri.neuinfo.org/nif/nifstd/nlx_147837
SAO:6363641087
7671 http://uri.interlex.org/base/ilx_0107670
SAO:1417703748
7498 http://uri.interlex.org/base/ilx_0107497
SAO:313023570
4635 http://uri.interlex.org/base/ilx_0104634


In [54]:
test = {
    "id": ["a", "b", "c"],
    "links": ["", "", ""],
    "links2": ["", "3", ""],
}


if("b" in test["id"]):
    index = test["id"].index("b")
    test["links"][index] = "2"

print(test)


{'id': ['a', 'b', 'c'], 'links': ['', '2', ''], 'links2': ['', '3', '']}


In [8]:
from neo4j import GraphDatabase, basic_auth
from dotenv import load_dotenv
import os

load_dotenv()

neo4jUser = os.getenv("NEO4J_USER")
neo4jPwd = os.getenv("NEO4J_PASSWORD")

driver = GraphDatabase.driver("bolt://localhost:7687",auth=basic_auth(neo4jUser, neo4jPwd))


def addDescription(nodeType, cellId, descIs, desc, descRef):
    create_query = """
        CREATE (:CellDescription { id:%s, description: %s, iri: %s})
    """ % (descIs, desc, descRef)
    
    rel_query = """
    MATCH (a:nodeType { id: %s})
    MATCH (c:CellDescription { reason: '%s' })
    MERGE (a)-[:EXCLUSION_REASON]->(c)
    RETURN n
    """ % (nodeType, cellId, description, descriptionRef)
    
    with driver.session() as session:    
        res = session.run(query)
        print("Updated description:",  res.value()[0])

