## Investigate the Coverage of Nerves in SCKAN/Flatmap Neuron Populations within the Human Scaffold

This code uses the **FMA-to-ILX/UBERON** mapping file `(data/generated/mapped_fma_nerves.csv)`, which is generated using `fma_nerve.ipynb`, along with several other pre-generated files to speed up execution.

Required Files
- `data/generated/mapped_fma_nerves.csv`: Contains mappings of FMA terms from the full-body human nerves to ILX or UBERON terms. Only about 420 out of 992 terms can be programmatically mapped.
- `data/generated/UBERON_FMA_Nerve_Hierarchy.json`: Describes the hierarchical structure of nerve-related terms, including their superclasses and subclasses.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import ast
from collections import defaultdict
import requests
import json
import rdflib
import urllib
from tqdm import tqdm

from utility import GENERATED_DIR, get_existing_term


In [3]:
## Loading and preprocess mapped_fma_nerves.csv
human_nerve_df = pd.read_csv(GENERATED_DIR / 'mapped_fma_nerves.csv')

## Load set of human nerve terms (ILX/UBERON/FMA)
human_nerves = {
    **(human_nerve_df.loc[
        human_nerve_df['Term ID'].notna(), ['Term ID', 'Group name']
    ].set_index('Term ID')['Group name'].to_dict()),
    **{
        x: row['Group name']
        for _, row in human_nerve_df.iterrows()
        if isinstance(row['available'], str)
        for x in ast.literal_eval(row['available'])
    }
}

In [4]:
NPO_OWNER = 'SciCrunch'
NPO_REPO = 'NIF-Ontology'
NPO_RAW = f'https://raw.githubusercontent.com/{NPO_OWNER}/{NPO_REPO}'
GEN_NEURONS_PATH = 'ttl/generated/neurons/'
TURTLE_SUFFIX = '.ttl'

# A class to explore the relation between terms. The default now is for sckan-2024-09-21
class SCKANTerms():
    def __init__(self, sckan_version='sckan-2024-09-21'):
        self.__g = rdflib.Graph()
        for f in ('../../npo', '../../sparc-community-terms'):
            p = urllib.parse.quote(GEN_NEURONS_PATH + f)
            self.__g.parse(f'{NPO_RAW}/{sckan_version}/{p}{TURTLE_SUFFIX}', format='turtle')

        for k, n in {
            'FMA': rdflib.Namespace('http://purl.org/sig/ont/fma/fma'),
            'UBERON': rdflib.Namespace('http://purl.obolibrary.org/obo/UBERON_'),
            'ILX': rdflib.Namespace('http://uri.interlex.org/base/ilx_'),
            'ilxtr': rdflib.Namespace('http://uri.interlex.org/tgbugs/uris/readable/'),
            'BIRNLEX': rdflib.Namespace('http://uri.neuinfo.org/nif/nifstd/birnlex_')
        }.items():
            self.__g.bind(k, n)


    def get_existing_ids(self, term_id):
        q = f"""
            PREFIX FMA: <http://purl.org/sig/ont/fma/fma>
            PREFIX ilxtr: <http://uri.interlex.org/tgbugs/uris/readable/>
            PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
            PREFIX ILX: <http://uri.interlex.org/base/ilx_>
            PREFIX BIRNLEX: <http://uri.neuinfo.org/nif/nifstd/birnlex_>

            SELECT ?existing WHERE {{
                {{
                    {term_id} ilxtr:hasExistingId ?existing .
                }}
                UNION
                {{
                    ?based ilxtr:hasExistingId {term_id} .
                    ?based ilxtr:hasExistingId ?existing .
                }}
            }}

        """
        if not (existings := list(set([self.__g.qname(row.existing) for row in self.__g.query(q)]))):
            existings = get_existing_term(term_id)
        return existings if isinstance(existings, list) else []

    def get_label(self, term_id):
        q = f"""
            PREFIX FMA: <http://purl.org/sig/ont/fma/fma>
            PREFIX ilxtr: <http://uri.interlex.org/tgbugs/uris/readable/>
            PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
            PREFIX ILX: <http://uri.interlex.org/base/ilx_>
            PREFIX BIRNLEX: <http://uri.neuinfo.org/nif/nifstd/birnlex_>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

            SELECT ?label WHERE {{
                {term_id} rdfs:label ?label .
            }}

        """
        if (rows:= list(self.__g.query(q))):
            return str(rows[0].label)
        else:
            return term_id

sckan_terms = SCKANTerms()
sckan_terms.get_existing_ids('ILX:0792409')
sckan_terms.get_label('ILX:0792409')

'gray communicating ramus of tenth thoracic nerve'

In [None]:
# A function to execute a query and return results from PostgreSQL
def execute_cq(query):
    pg_endpoint = "https://mapcore-demo.org/devel/flatmap/v4/competency/"
    url = pg_endpoint + 'query'
    headers = {"Content-Type": "application/json"}
    response = requests.post(url, json=query, headers=headers)

    if response.status_code in (200, 201):
        return response.json().get('results', [])
    else:
        print(f"Error {response.status_code}: {response.json()}")
        return None

# A class to manage human nerves and SCKAN nerves analysis
class HumanSCKANNerves():
    def __init__(self, source_id, human_nerves: dict):
        self.__source_id = source_id
        self.__human_nerves = human_nerves
        self.__sckan_terms = SCKANTerms()

        self.__load_sckan_nerves(source_id)
        self.__load_uberon_fma_nerve_hierarchy()

    def __load_sckan_nerves(self, source_id):
        ## Load nerves and their paths from competency query
        ## and then stored them into __path_nerves and __nerve_paths
        # get all sckan nerves and their corresponding neuron population (Query 12)
        query = {
            'query_id': '12',
            'parameters': [{'column': 'source_id', 'value': source_id}]
        }
        query_results = execute_cq(query)

        self.__nerve_paths = defaultdict(list)
        for row in query_results['values']:
            self.__nerve_paths[row[1]].append(row[3])

        self.__path_nerves = defaultdict(list)
        for nerve, paths_ids in self.__nerve_paths.items():
            for path_id in paths_ids:
                self.__path_nerves[path_id].append(nerve)

        self.__sckan_nerve_existing_id = {
            nerve_id: set(self.__sckan_terms.get_existing_ids(nerve_id) + [nerve_id])
            for nerve_id in tqdm(self.__nerve_paths)
        }

    def __load_uberon_fma_nerve_hierarchy(self):
        ## Loading UBERON and FMA nerve hierarchy
        with open(GENERATED_DIR / 'UBERON_FMA_Nerve_Hierarchy.json', 'r') as f:
            uberon_fma_nerve_hierarcy = json.load(f)
            self.__subclass_to_superclasses = uberon_fma_nerve_hierarcy['subclass_to_superclasses']
            self.__superclass_to_subclasses = uberon_fma_nerve_hierarcy['superclass_to_subclasses']

    def get_human_subclass_nerves(self, nerve_id) -> dict:
        subclass_nerves = {}
        for existing_id in self.__sckan_nerve_existing_id[nerve_id]:
            for subclass_id, data in self.__superclass_to_subclasses.get(existing_id, {}).items():
                if subclass_id in self.__human_nerves:
                    subclass_nerves[subclass_id] = data['label']
        return subclass_nerves


    def covered_nerves(self, use_subclass=False):
        if not use_subclass:
            return set(self.__nerve_paths) & set(self.__human_nerves)

        sckan_nerve_with_subclasses = set()
        for nerve_id in self.__sckan_nerve_existing_id:
            if self.get_human_subclass_nerves(nerve_id):
                sckan_nerve_with_subclasses.add(nerve_id)
        return sckan_nerve_with_subclasses

    def uncovered_nerves(self, use_subclass=False):
        return set(self.__nerve_paths) - self.covered_nerves(use_subclass)

    def uncovered_paths(self, use_subclass=False):
        if not use_subclass:
            missing_paths = {p for k in self.uncovered_nerves(use_subclass) for p in self.__nerve_paths[k]}
            return missing_paths

        sckan_nerve_with_subclasses = self.covered_nerves(use_subclass)
        missing_paths = {p for k in set(self.__nerve_paths) - sckan_nerve_with_subclasses for p in self.__nerve_paths[k]}
        return missing_paths

    def covered_paths(self, use_subclass=False):
        return set(self.__path_nerves.keys()) - self.uncovered_paths(use_subclass)

    def print_stats(self):
        print(f"SCKAN/Flatmap source: {self.__source_id}")
        print(f"# number of covered nerves: {len(self.covered_nerves())} out of {len(self.__nerve_paths)}")
        print(f"# number of uncovered nerves: {len(self.uncovered_nerves())} out of {len(self.__nerve_paths)}")
        print(f"# number of covered nerve (consider subclasses): {len(self.covered_nerves(True))} out of {len(self.__nerve_paths)}")
        print(f"# number of uncovered nerves (consider subclasses): {len(self.uncovered_nerves(True))} out of {len(self.__nerve_paths)}")
        print(f"# number of covered paths: {len(self.covered_paths())} out of {len(self.__path_nerves)}")
        print(f"# number of uncovered paths: {len(self.uncovered_paths())} out of {len(self.__path_nerves)}")
        print(f"# number of covered paths (consider subclasses): {len(self.covered_paths(True))} out of {len(self.__path_nerves)}")
        print(f"# number of uncovered paths (consider subclasses): {len(self.uncovered_paths(True))} out of {len(self.__path_nerves)}")

    def save_nerve_coverage(self):
        nerves = defaultdict(list)
        for nerve_id in tqdm(self.__nerve_paths):
            nerves['nerve id'] += [nerve_id]
            nerves['label'] += [sckan_terms.get_label(nerve_id)]
            nerves['covered'] += [nerve_id in self.covered_nerves()]
            nerves['subclasses'] += [list(self.get_human_subclass_nerves(nerve_id).keys())]
            nerves['subclass labels'] += [list(self.get_human_subclass_nerves(nerve_id).values())]

        df = pd.DataFrame(nerves)
        df.to_csv(GENERATED_DIR / f'sckan_human_nerve_coverage_{self.__source_id}.csv')

    def save_path_coverage(self):
        paths = defaultdict(list)
        for path_id, nerve_ids in self.__path_nerves.items():
            paths['path id'] += [path_id]
            paths['covered nerve'] += [list(self.covered_nerves() & set(nerve_ids))]
            paths['uncovered nerve'] += [list(set(nerve_ids) - self.covered_nerves())]
            paths['covered nerve (subclasses)'] += [[x for nerve_id in nerve_ids for x in self.get_human_subclass_nerves(nerve_id)]]
            paths['complete (subclasses)'] += [all([len(self.get_human_subclass_nerves(nerve_id)) > 0 for nerve_id in nerve_ids])]
        df = pd.DataFrame(paths)
        df.to_csv(GENERATED_DIR / f'sckan_human_path_coverage_{self.__source_id}.csv')


#### Check the coverage of human nerve on sckan-2024-09-21

In [None]:
# Create an object and print stat
hsn = HumanSCKANNerves('sckan-2024-09-21', human_nerves)
hsn.print_stats()

100%|██████████| 105/105 [01:42<00:00,  1.02it/s]

SCKAN/Flatmap source: sckan-2024-09-21
# number of covered nerves: 0 out of 105
# number of uncovered nerves: 105 out of 105
# number of covered nerve (consider subclasses): 54 out of 105
# number of uncovered nerves (consider subclasses): 51 out of 105
# number of covered paths: 0 out of 274
# number of uncovered paths: 274 out of 274
# number of covered paths (consider subclasses): 145 out of 274
# number of uncovered paths (consider subclasses): 129 out of 274





It is clear that when comparing nerve terms in SCKAN directly to those in the full-body human nerve map, there are no exact matches—resulting in zero neuron populations being covered. This is because the nerve terms in SCKAN typically represent higher-level structures, such as the splanchnic nerve (UBERON:0003715), or generic terms like “nerve” (UBERON:0001021). Some terms also lack specified laterality, such as the auriculotemporal nerve (ILX:0793723). In contrast, the full-body human nerve map includes more specific, anatomically detailed terms.

However, after taking into account the subclasses of the SCKAN nerve terms, the coverage improves significantly: 54 out of 105 nerves are now matched, resulting in 145 out of 274 neuron populations being covered.

In [7]:
# Get uncovered neuron population (considering subclass)
hsn.uncovered_paths(use_subclass=True)

{'ilxtr:neuron-type-aacar-11',
 'ilxtr:neuron-type-aacar-12',
 'ilxtr:neuron-type-aacar-4',
 'ilxtr:neuron-type-aacar-5',
 'ilxtr:neuron-type-bolew-unbranched-10',
 'ilxtr:neuron-type-bolew-unbranched-14',
 'ilxtr:neuron-type-bolew-unbranched-15',
 'ilxtr:neuron-type-bolew-unbranched-16',
 'ilxtr:neuron-type-bolew-unbranched-17',
 'ilxtr:neuron-type-bolew-unbranched-18',
 'ilxtr:neuron-type-bolew-unbranched-19',
 'ilxtr:neuron-type-bolew-unbranched-20',
 'ilxtr:neuron-type-bolew-unbranched-21',
 'ilxtr:neuron-type-bolew-unbranched-23',
 'ilxtr:neuron-type-bolew-unbranched-8',
 'ilxtr:neuron-type-bolew-unbranched-9',
 'ilxtr:neuron-type-bromo-1',
 'ilxtr:neuron-type-bromo-2',
 'ilxtr:neuron-type-keast-1',
 'ilxtr:neuron-type-keast-10',
 'ilxtr:neuron-type-keast-11',
 'ilxtr:neuron-type-keast-2',
 'ilxtr:neuron-type-keast-3',
 'ilxtr:neuron-type-keast-4',
 'ilxtr:neuron-type-keast-5',
 'ilxtr:neuron-type-keast-6',
 'ilxtr:neuron-type-keast-7',
 'ilxtr:neuron-type-keast-8',
 'ilxtr:neuron

In [8]:
# Get uncovered nerve (considering subclass)
hsn.uncovered_nerves(use_subclass=True)

{'ILX:0738293',
 'ILX:0738308',
 'ILX:0738309',
 'ILX:0738312',
 'ILX:0739299',
 'ILX:0739303',
 'ILX:0739304',
 'ILX:0790497',
 'ILX:0793208',
 'ILX:0793209',
 'ILX:0793210',
 'ILX:0793211',
 'ILX:0793212',
 'ILX:0793213',
 'ILX:0793214',
 'ILX:0793215',
 'ILX:0793216',
 'ILX:0793217',
 'ILX:0793218',
 'ILX:0793219',
 'ILX:0793220',
 'ILX:0793221',
 'ILX:0793361',
 'ILX:0793362',
 'ILX:0793559',
 'ILX:0793563',
 'ILX:0793632',
 'ILX:0793711',
 'ILX:0793714',
 'ILX:0793807',
 'ILX:0793809',
 'ILX:0793822',
 'ILX:0793826',
 'ILX:0793827',
 'ILX:0794476',
 'ILX:0794853',
 'ILX:0794916',
 'ILX:0794949',
 'ILX:0794959',
 'ILX:0794967',
 'ILX:0794969',
 'ILX:0794977',
 'ILX:0795005',
 'ILX:0795006',
 'UBERON:0001650',
 'UBERON:0001759',
 'UBERON:0009675',
 'UBERON:0018412',
 'UBERON:0018675',
 'UBERON:0022302',
 'UBERON:0034984'}

In [None]:
# Now it is also possible to save the report of covered nerved and paths in csv files

# Report of the covered SCKAN nerves.
# File: data/generated/sckan_human_nerve_coverage_{source_id}.csv
# Columns:  - `nerve id`: the id of SCKAN nerve
#           - `label`: the nerve label
#           - `covered`: status whether the nerve directly covered (True/False)
#           - `subclasses`: the available nerve subclasses
#           - `subclass labels`: the subclass labels
#
hsn.save_nerve_coverage()

# Report of the covered SCKAN neuron populations.
# File: data/generated/sckan_human_path_coverage_{source_id}.csv
# Columns:  - `path id`: the neuron population id
#           - `covered nerve`: the covered neuron population's nerves
#           - `uncovered nerve`: the uncovered neuron population's nerves
#           - `covered nerve (subclasses)`: the covered neuron population's nerves considering subclasses
hsn.save_path_coverage()

#### Check the coverage of human nerve on male-flatmap 2b76d336-5c56-55e3-ab1e-795d6c63f9c1

In [9]:
# Create an object and print stat
hsn_male = HumanSCKANNerves('2b76d336-5c56-55e3-ab1e-795d6c63f9c1', human_nerves)
hsn_male.print_stats()

100%|██████████| 41/41 [00:20<00:00,  1.99it/s]

SCKAN/Flatmap source: 2b76d336-5c56-55e3-ab1e-795d6c63f9c1
# number of covered nerves: 0 out of 41
# number of uncovered nerves: 41 out of 41
# number of covered nerve (consider subclasses): 24 out of 41
# number of uncovered nerves (consider subclasses): 17 out of 41
# number of covered paths: 0 out of 104
# number of uncovered paths: 104 out of 104
# number of covered paths (consider subclasses): 37 out of 104
# number of uncovered paths (consider subclasses): 67 out of 104





In [10]:
hsn_male.covered_paths(use_subclass=True)

{'ilxtr:neuron-type-bolew-unbranched-12',
 'ilxtr:neuron-type-bolew-unbranched-4',
 'ilxtr:neuron-type-bolew-unbranched-5',
 'ilxtr:neuron-type-bromo-1',
 'ilxtr:neuron-type-keast-12',
 'ilxtr:neuron-type-keast-7',
 'ilxtr:neuron-type-keast-9',
 'ilxtr:neuron-type-pancr-1',
 'ilxtr:neuron-type-sdcol-b',
 'ilxtr:neuron-type-splen-1',
 'ilxtr:sparc-nlp/kidney/131',
 'ilxtr:sparc-nlp/kidney/133',
 'ilxtr:sparc-nlp/kidney/139',
 'ilxtr:sparc-nlp/kidney/140',
 'ilxtr:sparc-nlp/kidney/144',
 'ilxtr:sparc-nlp/kidney/147',
 'ilxtr:sparc-nlp/kidney/148',
 'ilxtr:sparc-nlp/kidney/153',
 'ilxtr:sparc-nlp/kidney/154',
 'ilxtr:sparc-nlp/liver/105',
 'ilxtr:sparc-nlp/liver/107',
 'ilxtr:sparc-nlp/liver/113',
 'ilxtr:sparc-nlp/liver/116',
 'ilxtr:sparc-nlp/liver/118',
 'ilxtr:sparc-nlp/liver/120',
 'ilxtr:sparc-nlp/liver/126',
 'ilxtr:sparc-nlp/liver/130',
 'ilxtr:sparc-nlp/mmset1/1',
 'ilxtr:sparc-nlp/mmset1/3',
 'ilxtr:sparc-nlp/mmset1/5',
 'ilxtr:sparc-nlp/mmset2cn/1',
 'ilxtr:sparc-nlp/mmset2cn/3

In [11]:
hsn_male.uncovered_paths(use_subclass=True)

{'ilxtr:neuron-type-aacar-11',
 'ilxtr:neuron-type-aacar-12',
 'ilxtr:neuron-type-aacar-4',
 'ilxtr:neuron-type-aacar-6',
 'ilxtr:neuron-type-bolew-unbranched-10',
 'ilxtr:neuron-type-bolew-unbranched-11',
 'ilxtr:neuron-type-bolew-unbranched-13',
 'ilxtr:neuron-type-bolew-unbranched-14',
 'ilxtr:neuron-type-bolew-unbranched-15',
 'ilxtr:neuron-type-bolew-unbranched-16',
 'ilxtr:neuron-type-bolew-unbranched-17',
 'ilxtr:neuron-type-bolew-unbranched-18',
 'ilxtr:neuron-type-bolew-unbranched-19',
 'ilxtr:neuron-type-bolew-unbranched-2',
 'ilxtr:neuron-type-bolew-unbranched-20',
 'ilxtr:neuron-type-bolew-unbranched-21',
 'ilxtr:neuron-type-bolew-unbranched-23',
 'ilxtr:neuron-type-bolew-unbranched-8',
 'ilxtr:neuron-type-bolew-unbranched-9',
 'ilxtr:neuron-type-bromo-2',
 'ilxtr:neuron-type-keast-1',
 'ilxtr:neuron-type-keast-10',
 'ilxtr:neuron-type-keast-11',
 'ilxtr:neuron-type-keast-2',
 'ilxtr:neuron-type-keast-3',
 'ilxtr:neuron-type-keast-4',
 'ilxtr:neuron-type-keast-5',
 'ilxtr:n