In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
from mapknowledge import KnowledgeStore, NERVE_TYPE
from dotenv import load_dotenv
load_dotenv()

sckan_version = 'sckan-2024-09-21'
store = KnowledgeStore(sckan_version=sckan_version, clean_connectivity=True)


[2m2025-06-25 17:27:09[0m [[32m[1minfo     [0m] [1mMap Knowledge version 1.2.2 with no cache[0m [36mtype[0m=[35mknowledge[0m
[2m2025-06-25 17:29:08[0m [[32m[1minfo     [0m] [1mUsing knowledge source: sckan-2024-09-21[0m [36mtype[0m=[35mknowledge[0m


In [2]:
from pprint import pprint

class SCKANNerves:
    def __init__(self, store):
        self.__store = store

        self.__node_nerve_by_path = {
            path: self.__get_path_node_nerves(path)
            for path in self.__store.connectivity_paths()
        }

        self.__path_by_node_nerve = {
            node_nerve: [path for path, nodes in self.__node_nerve_by_path.items() if node_nerve in nodes]
            for node_nerve in {n for nodes in self.__node_nerve_by_path.values() for n in nodes}
        }

        self.__nerve_by_path = {
            path: [
                nerve
                for node in nodes
                for nerve in (node[0], *node[1])
                if self.is_nerve(nerve)
            ]
            for path, nodes in self.__node_nerve_by_path.items()
        }

        self.__path_by_nerve = {
            nerve: [path for path, nerves in self.__nerve_by_path.items() if nerve in nerves]
            for nerve in {n for nerves in self.__nerve_by_path.values() for n in nerves}
        }

    def __get_path_node_nerves(self, path):
        return self.__store.entity_knowledge(path).get('nerves', [])

    def is_nerve(self, term):
        return self.__store.entity_knowledge(term).get('type') == NERVE_TYPE

    @property
    def nerves(self):
        return {
            nerve: self.__store.entity_knowledge(nerve).get('label')
            for nerve in self.__path_by_nerve
        }

    @property
    def node_nerves(self):
        return list(self.__path_by_node_nerve)

    @property
    def path_nerves(self):
        return self.__nerve_by_path

    def get_nerves(self, path):
        return self.__nerve_by_path.get(path, [])

    def get_node_nerves(self, path):
        return self.__node_nerve_by_path.get(path, [])

    def get_path(self, nerve: str | tuple):
        if isinstance(nerve, str):
            return self.__path_by_nerve.get(nerve)
        elif isinstance(nerve, tuple):
            return self.__path_by_node_nerve.get(nerve)
        return None


sckan_nerves = SCKANNerves(store)

[2m2025-06-25 17:29:08[0m [[32m[1minfo     [0m] [1mConsulting NPO for knowledge about ilxtr:neuron-type-keast-4[0m [36mtype[0m=[35mknowledge[0m
[2m2025-06-25 17:29:08[0m [[32m[1minfo     [0m] [1mConsulting NPO for knowledge about ilxtr:neuron-type-keast-19[0m [36mtype[0m=[35mknowledge[0m
[2m2025-06-25 17:29:08[0m [[32m[1minfo     [0m] [1mConsulting NPO for knowledge about ilxtr:neuron-type-keast-13[0m [36mtype[0m=[35mknowledge[0m
[2m2025-06-25 17:29:08[0m [[32m[1minfo     [0m] [1mConsulting NPO for knowledge about ilxtr:neuron-type-keast-17[0m [36mtype[0m=[35mknowledge[0m
[2m2025-06-25 17:29:08[0m [[32m[1minfo     [0m] [1mConsulting NPO for knowledge about ilxtr:neuron-type-keast-18[0m [36mtype[0m=[35mknowledge[0m
[2m2025-06-25 17:29:08[0m [[32m[1minfo     [0m] [1mConsulting NPO for knowledge about ilxtr:neuron-type-keast-16[0m [36mtype[0m=[35mknowledge[0m
[2m2025-06-25 17:29:08[0m [[32m[1minfo     [0m] [1mConsulti

## Investigate the coverage of nerves in SCKAN/Flatmap neuron populations within the human scaffold

This code utilizes the **FMA-to-ILX/UBERON** mapping file (`data/generated/mapped_fma_nerves.csv`), which is generated using `fma_nerve.ipynb`.

In [3]:
## Loading and preprocess mapped_fma_nerves.csv
import pandas as pd
import ast
from collections import defaultdict
from pprint import pprint

from utility import GENERATED_DIR

human_nerve_df = pd.read_csv(GENERATED_DIR / 'mapped_fma_nerves.csv')


In [None]:
## Load set of human nerve terms (ILX/UBERON)

human_nerves = {
    **(human_nerve_df.loc[
        human_nerve_df['Term ID'].notna(), ['Term ID', 'Group name']
    ].set_index('Term ID')['Group name'].to_dict()),
    **{
        x: row['Group name']
        for _, row in human_nerve_df.iterrows()
        if isinstance(row['available'], str)
        for x in ast.literal_eval(row['available'])
    }
}
len(human_nerves)

1239

In [None]:
# Now investigate how many sckan_nerves are covered by human nerves
print('# number of nerves in SCKAN is:', len(sckan_nerves.nerves))
print('# number of nerves in SCKAN covered by human map nerves', len(set(sckan_nerves.nerves.keys())-set(human_nerves.keys())))

# number of nerves in SCKAN is: 116
# number of nerves in SCKAN covered by human map nerves 116


## It is obvious that no nerve term used in human map nerve are used by SCKAN nerves. It seems that, SCKAN nerves use more general nerve terms while human nerves are more specific.

What next?
- check the superclass of each term in human nerve and give the level
  - ILX check on SCICRUNCH
  - UBERON check on uberon.owl
  - FMA check on fma.owl
- for all terms, find existing_ids

In [6]:
import rdflib
from utility import SOURCE_DIR

# load fma.owl and uberon.owl

g_fma = rdflib.Graph()
g_fma.parse(SOURCE_DIR /'fma.owl', format='xml')

g_uberon = rdflib.Graph()
g_uberon.parse(SOURCE_DIR /'uberon.owl', format='xml')

<Graph identifier=Nc54089261ff5427aae6f48f23667b144 (<class 'rdflib.graph.Graph'>)>

In [204]:
g = g_fma + g_uberon

In [264]:
# functions to get superclasses

FMA = rdflib.Namespace("http://purl.org/sig/ont/fma/fma")
RDFS = rdflib.Namespace("http://www.w3.org/2000/01/rdf-schema#")

def curie(uri):
    if isinstance(uri, rdflib.URIRef):
        uri = str(uri)
        if uri.startswith("http://purl.org/sig/ont/fma/fma"):
            return "FMA:" + uri.rsplit("fma", 1)[-1]
        elif uri.startswith("http://purl.obolibrary.org/obo/UBERON_"):
            return uri.replace('http://purl.obolibrary.org/obo/UBERON_', 'UBERON:')
        elif uri.startswith("http://purl.obolibrary.org/obo/CL_"):
            return uri.replace("http://purl.obolibrary.org/obo/CL_", 'CL:')
        return uri
    return uri

def clean_literal(val):
    if isinstance(val, rdflib.Literal):
        if val.datatype and val.datatype.endswith("integer"):
            return int(val)
        elif val.datatype and val.datatype.endswith("float"):
            return float(val)
        return str(val)
    return val

def get_superclasses(g, term):
    query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX FMA: <http://purl.org/sig/ont/fma/fma>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>

    SELECT ?superclass ?label (COUNT(?mid)-1 AS ?level)
    WHERE {{
    {{
        {term} rdfs:subClassOf* ?mid .
        ?mid rdfs:subClassOf* ?superclass .
    }}

    OPTIONAL {{ ?superclass rdfs:label ?label }}
    ?superclass a owl:Class .
    }}
    GROUP BY ?superclass ?label
    ORDER BY ?level
    """

    rows = []
    for row in g.query(query):
        superclass = curie(row.superclass)
        if superclass == 'FMA:65132' or superclass == 'UBERON:0001021':
            continue
        label = clean_literal(row.label)
        level = clean_literal(row.level)
        rows.append((superclass, label, level))

    return rows

def get_subclasses(g, term):
    query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX FMA: <http://purl.org/sig/ont/fma/fma>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>

    SELECT ?subclass ?label (COUNT(?mid)-1 AS ?level)
    WHERE {{
    {{
        ?subclass rdfs:subClassOf* ?mid .
        ?mid rdfs:subClassOf* {term} .
    }}

    OPTIONAL {{ ?subclass rdfs:label ?label }}
    ?subclass a owl:Class .
    }}
    GROUP BY ?subclass ?label
    ORDER BY ?level
    """

    rows = []
    for row in g.query(query):
        subclass = curie(row.subclass)
        # if subclass == 'FMA:65132' or subclass == 'UBERON:0001021':
        #     continue
        label = clean_literal(row.label)
        level = clean_literal(row.level)
        rows.append((subclass, label, level))

    return rows

def get_term_by_label(g, label):
    label_clean = label.strip().lower()
    query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX FMA: <http://purl.org/sig/ont/fma/fma>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>

    SELECT ?term
    WHERE {{
        ?term rdfs:label ?label .
        FILTER(LCASE(STR(?label)) = "{label_clean}")
    }}
    """

    rows = []
    for row in g.query(query):
        term = curie(row.term)
        rows += [term]
    return rows

In [None]:
# get existing_ids of the sckan nerves

from utility import get_existing_term
from tqdm import tqdm

sckan_nerve_existing_ids = {}
for nerve_id in tqdm(sckan_nerves.nerves):
    sckan_nerve_existing_ids[nerve_id] = get_existing_term(nerve_id)

  3%|▎         | 3/116 [00:10<06:21,  3.38s/it]


KeyboardInterrupt: 

In [10]:
# get subclasses of all FMA and UBERON terms in sckan_nerve_existing_ids

sckan_nerve_subclasses = {}
for nerve_id in tqdm(set([x for k, val in sckan_nerve_existing_ids.items() for x in [k]+val])):
    if nerve_id.startswith('UBERON') or nerve_id.startswith('FMA'):
        sckan_nerve_subclasses[nerve_id] = get_subclasses(g, nerve_id)

  0%|          | 0/244 [00:00<?, ?it/s]

100%|██████████| 244/244 [00:03<00:00, 67.93it/s] 


In [74]:
candidate_nerves = {}
for nerve_id, existing_ids in sckan_nerve_existing_ids.items():
    subclasses = set()
    for existing_id in existing_ids:
        subclasses.update([row[0]for row in sckan_nerve_subclasses.get(existing_id, [])])
    if len(subclasses) > 0 and (avail:=subclasses & set(human_nerves.keys())):

        candidate_nerves[nerve_id] = avail

candidate_nerves

{'ILX:0793723': {'FMA:53001', 'FMA:53002'},
 'ILX:0792409': {'FMA:6894', 'FMA:6895'},
 'UBERON:0011326': {'FMA:53530', 'FMA:53536'},
 'ILX:0787082': {'FMA:6082', 'FMA:6084'},
 'UBERON:0011096': {'FMA:52629', 'FMA:52630'},
 'UBERON:0001649': {'FMA:50892', 'FMA:50894'},
 'ILX:0785932': {'FMA:65919', 'FMA:65920'},
 'UBERON:0001492': {'FMA:37070', 'FMA:37071'},
 'UBERON:0018681': {'FMA:6957', 'FMA:6958'},
 'ILX:0791105': {'FMA:6122', 'FMA:6757'},
 'ILX:0789339': {'FMA:53494', 'FMA:53495'},
 'ILX:0793712': {'FMA:52968', 'FMA:52969'},
 'ILX:0788536': {'FMA:65917', 'FMA:65918'},
 'UBERON:0018680': {'FMA:6955', 'FMA:6956'},
 'UBERON:0001322': {'FMA:22111', 'FMA:22112'},
 'UBERON:0011391': {'FMA:21873', 'FMA:21874'},
 'ILX:0788945': {'FMA:6758', 'FMA:6759'},
 'UBERON:0001323': {'FMA:22109', 'FMA:22110'},
 'ILX:0793228': {'FMA:65991', 'FMA:65992'},
 'UBERON:0035111': {'FMA:45434', 'FMA:45435'},
 'UBERON:0001780': {'FMA:6038',
  'FMA:6039',
  'FMA:6088',
  'FMA:6089',
  'FMA:6116',
  'FMA:6117',


In [84]:
sckan_nerve_data = []
for nerve_id, label in sckan_nerves.nerves.items():
    subclasses = list(candidate_nerves.get(nerve_id, []))
    sckan_nerve_data += [{
        'nerve_id': nerve_id,
        'label': label,
        'FMA subclasses (in human nerves)': subclasses,
        'subclass labels':  [human_nerves[s] for s in subclasses]
    }]

In [85]:
df = pd.DataFrame(sckan_nerve_data)
df.to_csv(GENERATED_DIR / 'sckan_nerve_mapping.csv')

In [86]:
df[df['FMA subclasses (in human nerves)'].apply(lambda x: isinstance(x, list) and len(x) > 0)].shape

(61, 4)

In [92]:
df

Unnamed: 0,nerve_id,label,FMA subclasses (in human nerves),subclass labels
0,ILX:0793723,Auriculotemporal nerve,"[FMA:53002, FMA:53001]","[Left auriculotemporal nerve, Right auriculote..."
1,ILX:0792409,gray communicating ramus of tenth thoracic nerve,"[FMA:6895, FMA:6894]","[Left T10 grey ramus communicans, Right T10 gr..."
2,UBERON:0011326,superior laryngeal nerve,"[FMA:53530, FMA:53536]","[Right superior laryngeal nerve, Left superior..."
3,ILX:0793220,white communicating ramus of first lumbar spin...,[],[]
4,ILX:0787082,gray communicating ramus of the first thoracic...,"[FMA:6082, FMA:6084]","[Right T1 grey ramus communicans, Left T1 grey..."
...,...,...,...,...
111,UBERON:0001494,ulnar nerve,"[FMA:37320, FMA:37321]","[Right ulnar nerve, Left ulnar nerve]"
112,ILX:0794959,posterior rami lower cervical nerves,[],[]
113,UBERON:0001493,axillary nerve,"[FMA:37074, FMA:37073]","[Left axillary nerve, Right axillary nerve]"
114,ILX:0787946,gray communicating ramus of sixth thoracic nerve,"[FMA:6762, FMA:6763]","[Right T6 grey ramus communicans, Left T6 grey..."


### Cells to get the nerve subclass structure

In [None]:
query_0 = """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
# PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
# PREFIX FMA: <http://purl.org/sig/ont/fma/fma>
# PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>

SELECT ?subclass ?superclass WHERE {
  {
    ?subclass rdfs:subClassOf ?superclass
  }
}
"""
results = set()
for row in g.query(query_0):
    subclass = curie(row.subclass)
    superclass = curie(row.superclass)
    results.add((subclass, superclass))

query_1 = """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
# PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
# PREFIX FMA: <http://purl.org/sig/ont/fma/fma>
# PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>

SELECT ?subclass ?superclass WHERE {
  {
    ?subclass rdfs:subClassOf ?restriction .
    ?restriction owl:onProperty obo:BFO_0000050 ;
                owl:someValuesFrom ?superclass .
  }
}
"""

for row in g.query(query_1):
    subclass = curie(row.subclass)
    superclass = curie(row.superclass)
    results.add((subclass, superclass))


In [None]:
COMBINED = rdflib.URIRef("http://example.org/combined")

new_g = rdflib.Graph()
new_g.bind('FMA', 'http://purl.org/sig/ont/fma/fma')
new_g.bind('UBERON', 'http://purl.obolibrary.org/obo/UBERON_')
new_g.bind('CHEBI', 'http://purl.obolibrary.org/obo/CHEBI_')
new_g.bind('CL', 'http://purl.obolibrary.org/obo/')

for sub, super in results:
    if isinstance(sub, str) and not isinstance(sub, rdflib.BNode):
        if sub.startswith('http'):
            sub = rdflib.URIRef(sub)
        else:
            sub = new_g.namespace_manager.expand_curie(sub)
    if isinstance(super, str) and not isinstance(super, rdflib.BNode):
        if super.startswith('http'):
            super = rdflib.URIRef(super)
        else:
            super = new_g.namespace_manager.expand_curie(super)
    new_g.add((sub, COMBINED, super))

In [300]:
def get_subclasses3(g, term):
    query = f"""
        PREFIX ex: <http://example.org/>
        PREFIX FMA: <http://purl.org/sig/ont/fma/fma>
        PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        PREFIX CL: <http://purl.obolibrary.org/obo/CL_>

        SELECT ?subclass ?mid (COUNT(?intermediate) AS ?level)
        WHERE {{
        ?subclass ex:combined+ ?intermediate .
        ?intermediate ex:combined* ?mid .
        ?mid ex:combined* {term} .

        }}
        GROUP BY ?subclass ?label ?mid
        ORDER BY ?subclass ?level
    """

    rows = []
    for row in g.query(query):
        subclass = curie(row.subclass)
        mid = curie(row.mid)
        level = clean_literal(row.level)
        rows.append((subclass, mid, level))

    return rows

def get_labels(g, term):
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX FMA: <http://purl.org/sig/ont/fma/fma>
        PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        PREFIX CL: <http://purl.obolibrary.org/obo/CL_>
        PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

        SELECT DISTINCT ?label
        WHERE {{
            {{
                {term} rdfs:label ?label .
            }}
            UNION
            {{
                {term} <http://purl.org/sig/ont/fma/synonym> ?label .
            }}
            UNION
            {{
                {term} oboInOwl:hasExactSynonym ?label .
            }}

        }}
    """
    labels = []
    for row in g.query(query):
        labels +=  [str(row.label).lower().strip()]
    return list(set(labels)) if label else [term]


# get_subclasses3(new_g, 'UBERON:0001021')


In [286]:
## Need to store FMA and UBERON nerve structure as a simple json file

subclass_to_superclasses = defaultdict(dict)
superclass_to_subclasses = defaultdict(dict)
for nerve_id in ['UBERON:0001021', 'FMA:65132', 'FMA:61284', 'FMA:65239']:
    for subclass, superclass, distance in get_subclasses3(new_g, nerve_id):
        superclass_to_subclasses[superclass][subclass] = {
            'subclass': subclass,
            'distance': distance
        }
        subclass_to_superclasses[subclass][superclass] = {
            'superclass': superclass,
            'distance': distance
        }

In [301]:
(terms:=set(subclass_to_superclasses.keys())).update(set(superclass_to_subclasses.keys()))
labels = {
    term: get_labels(g, term)
    for term in tqdm(terms)
}

100%|██████████| 5856/5856 [00:22<00:00, 265.03it/s]


In [303]:
import json
with open(GENERATED_DIR / 'UBERON_FMA_Nerve_Hierarchy.json', 'w') as f:
    json.dump(
        {
            'subclass_to_superclasses': subclass_to_superclasses,
            'superclass_to_subclasses': superclass_to_subclasses,
            'labels': labels
        },
        f
    )

In [304]:
labels['FMA:44948']

['posterior cutaneous nerve of arm', 'posterior brachial cutaneous nerve']

## REMOVE DUPLICATES

In [96]:
import pandas as pd
from utility import SOURCE_DIR

In [111]:
df = pd.read_csv(SOURCE_DIR/'nervesWithVagus_annotations.csv')
df.shape

(960, 3)

In [112]:
df[df['Term ID'].notna()]['Term ID'].unique().shape

(826,)

### Check duplicate Term ID

In [103]:
dupes = df[df['Term ID'].notna() & df.duplicated('Term ID', keep=False)]

In [104]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

dupes.sort_values('Term ID')


Unnamed: 0,Term ID,Group name,Unnamed: 2


### Check duplicate Group name

In [105]:
df[df.duplicated('Group name', keep=False)]

Unnamed: 0,Term ID,Group name,Unnamed: 2


### Check empty Term ID

In [124]:
### Group name with no Term ID
df_no_ids = df[df['Term ID'].isna()]
df_no_ids.shape

(134, 3)

In [125]:
from utility import GENERATED_DIR
df_mapped = pd.read_csv(GENERATED_DIR/'mapped_fma_nerves.csv')

In [108]:
merged = pd.merge(df_no_ids, df_mapped, on='Group name', how='inner')

In [123]:
merged.head()

Unnamed: 0.1,Term ID_x,Group name,Unnamed: 2,Unnamed: 0,Term ID_y,available,ILX superclass,FMA superclass,superclass label,relation,level
0,,Brachial plexus left T1 root,,67,,,,,,,
1,,Brachial plexus right T1 root,,84,,,,,,,
2,,Branch of right vagus nerve to oesophageal nerve plexus,previously annotated to FMA:53675 (Branch of greater anterior gastric nerve to coeliac plexus),102,FMA:53675,,,,,,
3,,Branch of right vagus nerve to oesophageal nerve plexus,previously annotated to FMA:53675 (Branch of greater anterior gastric nerve to coeliac plexus),103,FMA:53675,,,,,,
4,,Chorda tympani branch of left facial nerve,,112,,['ILX:0795106'],,,,,
