# Get Atlas Release Dataset

## Imports

In [1]:
import json
from kgforge.core import KnowledgeGraphForge
from kgforge.specializations.resources import Dataset
from kgforge.core.commons.strategies import ResolvingStrategy

## Setup
Get an authentication token

The [Nexus web application](https://bbp.epfl.ch/nexus/web) can be used to get a token.

- Step 1: From the opened web page, click on the login button on the right corner and follow the instructions.

![login-ui](./login-ui.png)

- Step 2: At the end you’ll see a token button on the right corner. Click on it to copy the token.

![login-ui](./copy-token.png)


In [2]:
TOKEN = ""

In [3]:
endpoint_prod = "https://bbp.epfl.ch/nexus/v1"
endpoint_staging = "https://staging.nise.bbp.epfl.ch/nexus/v1"

# Choose the Nexus environment to target (prod or staging)
endpoint = endpoint_prod
#endpoint = endpoint_staging

forge = KnowledgeGraphForge("../forge-config.yml",
                            token=TOKEN,
                            endpoint=endpoint, 
                            bucket="bbp/atlas")

## Set the atlas release id
These atlas releases can be explored through the atlas web app:

* dev: https://bluebrainatlas.kcpdev.bbp.epfl.ch/atlas
* prod: https://bbp.epfl.ch/atlas


In [5]:
Staging_BBP_Mouse_Brain_Atlas_Release = "https://bbp.epfl.ch/neurosciencegraph/data/brainatlasrelease/c96c71a8-4c0d-4bc1-8a1a-141d9ed6693d"
Staging_Cell_Composition = "https://bbp.epfl.ch/neurosciencegraph/data/cellcompositions/54818e46-cf8c-4bd6-9b68-34dffbc8a68c"
staging_tag = "v1.0.1"

Prod_BBP_Mouse_Brain_Atlas_Release = "https://bbp.epfl.ch/neurosciencegraph/data/4906ab85-694f-469d-962f-c0174e901885"
Prod_Cell_Composition = "https://bbp.epfl.ch/neurosciencegraph/data/cellcompositions/54818e46-cf8c-4bd6-9b68-34dffbc8a68c"
prod_tag = "v0.6.0"

if endpoint == endpoint_staging:
    atlas_release_id = Staging_BBP_Mouse_Brain_Atlas_Release
    cell_composition_id = Staging_Cell_Composition
    resources_tag = staging_tag
elif endpoint == endpoint_prod:
    atlas_release_id = Prod_BBP_Mouse_Brain_Atlas_Release
    cell_composition_id = Prod_Cell_Composition
    resources_tag = prod_tag

## Get the atlas release high level metadata

In [6]:
atlas_release = forge.retrieve(atlas_release_id, version=resources_tag)

In [7]:
print(atlas_release)

{
    context: https://bbp.neuroshapes.org
    id: https://bbp.epfl.ch/neurosciencegraph/data/4906ab85-694f-469d-962f-c0174e901885
    type:
    [
        BrainAtlasRelease
        AtlasRelease
        Entity
    ]
    atlasReleaseSpatialReferenceSystem:
    {
        id: https://bbp.epfl.ch/neurosciencegraph/data/allen_ccfv3_spatial_reference_system
        type:
        [
            AtlasSpatialReferenceSystem
            BrainAtlasSpatialReferenceSystem
        ]
    }
    brainLocation:
    {
        atlasSpatialReferenceSystem:
        {
            id: https://bbp.epfl.ch/neurosciencegraph/data/allen_ccfv3_spatial_reference_system
            type:
            [
                AtlasSpatialReferenceSystem
                BrainAtlasSpatialReferenceSystem
            ]
        }
        brainRegion:
        {
            id: http://api.brain-map.org/api/v2/data/Structure/997
            label: root
        }
    }
    brainTemplateDataLayer:
    {
        id: https://bbp.epfl.ch/n

In [8]:
# Get the current revision of the Atlas release
atlas_release._store_metadata["_rev"]

8

## Get the atlas hierarchy

In [9]:
parcellation_ontology = forge.retrieve(atlas_release.parcellationOntology.id, version=resources_tag, cross_bucket=True)

In [10]:
print(parcellation_ontology)

{
    context: https://bbp.neuroshapes.org
    id: https://bbp.epfl.ch/neurosciencegraph/data/0518dd0b-cbc7-43ef-a75f-45631059c8c5
    type:
    [
        ParcellationOntology
        Ontology
        Entity
    ]
    label: BBP Mouse Brain region ontology
    atlasRelease:
    {
        id: https://bbp.epfl.ch/neurosciencegraph/data/4906ab85-694f-469d-962f-c0174e901885
        type:
        [
            BrainAtlasRelease
            AtlasRelease
            Entity
        ]
        _rev: 8
    }
    atlasReleaseSpatialReferenceSystem:
    {
        id: https://bbp.epfl.ch/neurosciencegraph/data/allen_ccfv3_spatial_reference_system
        type:
        [
            AtlasSpatialReferenceSystem
            BrainAtlasSpatialReferenceSystem
        ]
    }
    brainLocation:
    {
        atlasSpatialReferenceSystem:
        {
            id: https://bbp.epfl.ch/neurosciencegraph/data/allen_ccfv3_spatial_reference_system
            type:
            [
                AtlasSpatialRefere

In [11]:
parcellation_ontology_copy = Dataset.from_resource(forge, parcellation_ontology, store_metadata=True)
parcellation_ontology_copy.distribution = [d for d in parcellation_ontology.distribution if d.encodingFormat == "application/json"]

forge.download(parcellation_ontology_copy, "distribution.contentUrl", ".", overwrite=True, cross_bucket=True)

## Get a brain region metadata from its name or acronym

In [13]:
region_label = "Medial septal nucleus"
brain_region = forge.resolve(region_label, scope="ontology", target="terms", strategy=ResolvingStrategy.EXACT_CASEINSENSITIVE_MATCH)
print(brain_region)

{
    id: http://api.brain-map.org/api/v2/data/Structure/564
    type: Class
    label: Medial septal nucleus
    altLabel: MS
    isDefinedBy: http://bbp.epfl.ch/neurosciencegraph/ontologies/core/brainregion
    notation: MS
    prefLabel: Medial septal nucleus
    subClassOf: nsg:BrainRegion
}


In [14]:
# Get acronym
brain_region.notation

'MS'

## Get hierarchy relations directly from the ontology

Each brain region in the ontology has their children listed in the `hasPart` property, and all the leaf nodes as `hasLeafRegionPart`

#### Get all leaf nodes from a region

In [15]:
target = 'FRP'
query_leaf = """
SELECT DISTINCT ?id ?acronym ?leaf
WHERE{
    ?id subClassOf* BrainRegion ;
    notation ?acronym ;
    hasLeafRegionPart ?leaf.
"""
query_leaf += f"FILTER (?acronym = \"{target}\") \n" + "}"

In [16]:
brs_leaves = forge.sparql(query_leaf, cross_bucket=True)

In [17]:
forge.as_dataframe(brs_leaves)

Unnamed: 0,id,acronym,leaf
0,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
1,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure/68
2,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
3,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
4,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
5,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...


### Return only leaves that are represented in the atlas annotation

In [18]:
target = 'FRP'
query_leaf2 = """
SELECT DISTINCT ?id ?acronym ?leaf
WHERE{
    ?id subClassOf* BrainRegion ;
    notation ?acronym ;
    hasLeafRegionPart ?leaf.
    ?leaf representedInAnnotation true .
"""
query_leaf2 += f"FILTER (?acronym = \"{target}\") \n" + "}"

In [19]:
brs_leaves2 = forge.sparql(query_leaf2, cross_bucket=True)

In [20]:
forge.as_dataframe(brs_leaves2)

Unnamed: 0,id,acronym,leaf
0,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
1,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure/68
2,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
3,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
4,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
5,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...


We can inspect the brain region "FRP"

In [21]:
FRP = forge.retrieve(brs_leaves[0].leaf, cross_bucket=True)

In [22]:
print(FRP)

{
    context: https://neuroshapes.org
    id: http://api.brain-map.org/api/v2/data/Structure/614454278
    type: Class
    label: Frontal pole, layer 2
    altLabel: FRP2
    atlasRelease:
    {
        id: https://bbp.epfl.ch/neurosciencegraph/data/4906ab85-694f-469d-962f-c0174e901885
        type: BrainAtlasRelease
        _rev: 8
    }
    color_hex_triplet: 268F45
    hasHierarchyView:
    [
        https://neuroshapes.org/BrainRegion
    ]
    hasLayerLocationPhenotype:
    [
        http://purl.obolibrary.org/obo/UBERON_0005391
    ]
    hemisphere_id: 3
    identifier: "614454278"
    isPartOf:
    [
        http://api.brain-map.org/api/v2/data/Structure/667
    ]
    notation: FRP2
    prefLabel: Frontal pole, layer 2
    regionVolume:
    {
        unitCode: cubic micrometer
        value: 93343750.0
    }
    regionVolumeRatioToWholeBrain:
    {
        unitCode: cubic micrometer
        value: 0.00018445453376368608
    }
    representedInAnnotation: true
    subClassOf:
  

Similarly to the nodes, we can get the direct children of a brain region, present in the annotation, by querying using the `hasPart` property

In [23]:
target = 'MOp'
query_child = """
SELECT DISTINCT ?id ?acronym ?child
WHERE{
    ?id subClassOf* BrainRegion ;
    notation ?acronym ;
    hasPart ?child.
    ?child representedInAnnotation true.
"""
query_child += f"FILTER (?acronym = \"{target}\") \n" + "}"

In [24]:
brs_children = forge.sparql(query_child)

In [25]:
forge.as_dataframe(brs_children)

Unnamed: 0,id,acronym,child
0,http://api.brain-map.org/api/v2/data/Structure...,MOp,http://api.brain-map.org/api/v2/data/Structure...
1,http://api.brain-map.org/api/v2/data/Structure...,MOp,http://api.brain-map.org/api/v2/data/Structure...
2,http://api.brain-map.org/api/v2/data/Structure...,MOp,http://api.brain-map.org/api/v2/data/Structure...
3,http://api.brain-map.org/api/v2/data/Structure...,MOp,http://api.brain-map.org/api/v2/data/Structure...
4,http://api.brain-map.org/api/v2/data/Structure...,MOp,http://api.brain-map.org/api/v2/data/Structure...


If we know the exact brain region, instead of using sparql one can simply resolve teh brain region and get the children directly from the attribute `hasPart`

In [26]:
mop = forge.resolve('MOp', scope='ontology', strategy='EXACT_MATCH')

In [27]:
print(mop)

{
    id: http://api.brain-map.org/api/v2/data/Structure/985
    type: Class
    label: Primary motor area
    altLabel: MOp
    isDefinedBy: http://bbp.epfl.ch/neurosciencegraph/ontologies/core/brainregion
    notation: MOp
    prefLabel: Primary motor area
    subClassOf: nsg:BrainRegion
}


In [28]:
full_mop = forge.retrieve(mop.id, cross_bucket=True)

## Get parcellation (annotation) volume

In [29]:
parcellation_volume = forge.retrieve(atlas_release.parcellationVolume.id, version=resources_tag)

In [30]:
print(parcellation_volume)

{
    context: https://bbp.neuroshapes.org
    id: https://bbp.epfl.ch/neurosciencegraph/data/231f6e2e-6366-4ddc-94b6-35ab50c076c0
    type:
    [
        BrainParcellationDataLayer
        VolumetricDataLayer
        Dataset
    ]
    atlasRelease:
    {
        id: https://bbp.epfl.ch/neurosciencegraph/data/4906ab85-694f-469d-962f-c0174e901885
        type:
        [
            BrainAtlasRelease
            AtlasRelease
            Entity
        ]
        _rev: 8
    }
    brainLocation:
    {
        atlasSpatialReferenceSystem:
        {
            id: https://bbp.epfl.ch/neurosciencegraph/data/allen_ccfv3_spatial_reference_system
            type:
            [
                AtlasSpatialReferenceSystem
                BrainAtlasSpatialReferenceSystem
            ]
        }
        brainRegion:
        {
            id: http://api.brain-map.org/api/v2/data/Structure/997
            label: root
        }
    }
    bufferEncoding: gzip
    componentEncoding: uint32
    contribu

In [31]:
forge.download(parcellation_volume, "distribution.contentUrl", ".", overwrite=True)

## Get orientation field and Direction vectors volumes

In [30]:
direction_vectors = forge.retrieve(atlas_release.directionVector.id, version=resources_tag)
cell_orientation_field = forge.retrieve(atlas_release.cellOrientationField.id, version=resources_tag)

In [31]:
forge.download(direction_vectors, "distribution.contentUrl", ".", overwrite=True)
forge.download(cell_orientation_field, "distribution.contentUrl", ".", overwrite=True)

## Get the CellCompositionSummary
A summary of the M-E type density values by brain region is provided in the `CellCompositionSummary` property of the CellComposition:

In [33]:
cell_composition = forge.retrieve(id=cell_composition_id, version=resources_tag, cross_bucket=True)
cell_composition_summary = forge.retrieve(id=cell_composition.cellCompositionSummary.id, version=resources_tag,cross_bucket=True)
forge.download(cell_composition_summary, path=".", follow="distribution.contentUrl", cross_bucket=True, overwrite=True)

In [41]:
with open(f"./{cell_composition_summary.distribution.name}", "r") as f:
    cell_composition_summary_json = json.load(f)
brain_regions = cell_composition_summary_json['hasPart']
print(f"The CellCompositionSummary contains {len(brain_regions)} M types:\n{brain_regions.keys()}")

The CellCompositionSummary contains 824 M types:
dict_keys(['http://api.brain-map.org/api/v2/data/Structure/23', 'http://api.brain-map.org/api/v2/data/Structure/935', 'http://api.brain-map.org/api/v2/data/Structure/614454342', 'http://api.brain-map.org/api/v2/data/Structure/614454343', 'http://api.brain-map.org/api/v2/data/Structure/1015', 'http://api.brain-map.org/api/v2/data/Structure/919', 'http://api.brain-map.org/api/v2/data/Structure/927', 'http://api.brain-map.org/api/v2/data/Structure/588', 'http://api.brain-map.org/api/v2/data/Structure/614454344', 'http://api.brain-map.org/api/v2/data/Structure/614454345', 'http://api.brain-map.org/api/v2/data/Structure/772', 'http://api.brain-map.org/api/v2/data/Structure/810', 'http://api.brain-map.org/api/v2/data/Structure/819', 'http://api.brain-map.org/api/v2/data/Structure/56', 'http://api.brain-map.org/api/v2/data/Structure/576', 'http://api.brain-map.org/api/v2/data/Structure/64', 'http://api.brain-map.org/api/v2/data/Structure/72', '

In [47]:
first_brain_region = list(brain_regions.keys())[0]
m_types = brain_regions[first_brain_region]['hasPart']
print(f"The brain region '{brain_regions[first_brain_region]['label']}' has {len(m_types)} M-type densities.")
print(f"Their composition in E-types is:\n{m_types}")

The brain region 'Anterior amygdalar area' has 2 M-type densities
Theis composition in E-types is:
{'https://bbp.epfl.ch/ontologies/core/bmo/GenericExcitatoryNeuronMType': {'label': 'GEN_mtype', 'about': 'MType', 'hasPart': {'https://bbp.epfl.ch/ontologies/core/bmo/GenericExcitatoryNeuronEType': {'label': 'GEN_etype', 'about': 'EType', 'composition': {'neuron': {'density': 11167.27060111258, 'count': 5523}}}}}, 'https://bbp.epfl.ch/ontologies/core/bmo/GenericInhibitoryNeuronMType': {'label': 'GIN_mtype', 'about': 'MType', 'hasPart': {'https://bbp.epfl.ch/ontologies/core/bmo/GenericInhibitoryNeuronEType': {'label': 'GIN_etype', 'about': 'EType', 'composition': {'neuron': {'density': 22588.589061799536, 'count': 11171}}}}}}


## Get the released M-E type densities
Each M-E type is registered in Nexus with its corresponding metadata. The list of the M-E type densities annotated in the parcellation volume is provided in the `CellCompositionVolume` property of the CellComposition:

In [35]:
cell_composition_volume = forge.retrieve(id=cell_composition.cellCompositionVolume.id, version=resources_tag,cross_bucket=True)
forge.download(cell_composition_volume, path=".", follow="distribution.contentUrl", cross_bucket=True, overwrite=True)

In [36]:
with open(f"./{cell_composition_volume.distribution.name}", "r") as f:
    metype_density_release_json = json.load(f)

In [37]:
# Install dependencies
!pip install jsonpath_ng

In [38]:
from jsonpath_ng import parse

jsonpath_expr = parse('hasPart[*].hasPart[*].hasPart[*].@id')
metype_density_release_volume_ids = [(match.value, match.context.value["_rev"]) for match in jsonpath_expr.find(metype_density_release_json)] # yields [(metype volume id, revision)]
metype_density_release_volumes = [forge.retrieve(id=volume_id[0], version=volume_id[1], cross_bucket=True) for volume_id in metype_density_release_volume_ids] # metype density volumes

len(metype_density_release_volumes)

245

In [39]:
# Collect metadata as pandas dataframe 
reshaped_resources = forge.reshape(metype_density_release_volumes, keep=["id","type", "annotation.hasBody.id", "annotation.hasBody.label",
                     "brainLocation.brainRegion.id", "brainLocation.brainRegion.label", "distribution.atLocation.location"])
df = forge.as_dataframe(reshaped_resources, nesting=".")

df["mtype"] = df.apply(lambda row: (row.annotation[0]["hasBody"]["label"],row.annotation[0]["hasBody"]["id"]) , axis=1) 
df["etype"] = df.apply(lambda row: (row.annotation[1]["hasBody"]["label"],row.annotation[1]["hasBody"]["id"]) , axis=1) 
type_column = df.pop('type')
mtype_column = df.pop('mtype')
etype_column = df.pop('etype')

df.insert(0, 'type', type_column)
df.insert(1, 'mtype', mtype_column)
df.insert(2, 'etype', etype_column)
df.drop(columns="annotation")

df.head(100)

Unnamed: 0,type,mtype,etype,id,annotation,brainLocation.brainRegion.id,brainLocation.brainRegion.label,distribution.atLocation.location
0,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L23_BP, http://uri.interlex.org/base/ilx_0383...","(dSTUT, http://uri.interlex.org/base/ilx_0738202)",https://bbp.epfl.ch/neurosciencegraph/data/88e...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
1,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L23_BP, http://uri.interlex.org/base/ilx_0383...","(cNAC, http://uri.interlex.org/base/ilx_0738201)",https://bbp.epfl.ch/neurosciencegraph/data/9ab...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
2,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L23_BP, http://uri.interlex.org/base/ilx_0383...","(bAC, http://uri.interlex.org/base/ilx_0738199)",https://bbp.epfl.ch/neurosciencegraph/data/2df...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
3,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L23_BP, http://uri.interlex.org/base/ilx_0383...","(bIR, http://uri.interlex.org/base/ilx_0738206)",https://bbp.epfl.ch/neurosciencegraph/data/6e2...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
4,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L23_BP, http://uri.interlex.org/base/ilx_0383...","(bNAC, http://uri.interlex.org/base/ilx_0738203)",https://bbp.epfl.ch/neurosciencegraph/data/a40...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
...,...,...,...,...,...,...,...,...
95,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L4_LBC, http://uri.interlex.org/base/ilx_0383...","(bAC, http://uri.interlex.org/base/ilx_0738199)",https://bbp.epfl.ch/neurosciencegraph/data/104...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
96,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L4_LBC, http://uri.interlex.org/base/ilx_0383...","(cNAC, http://uri.interlex.org/base/ilx_0738201)",https://bbp.epfl.ch/neurosciencegraph/data/699...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
97,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L4_LBC, http://uri.interlex.org/base/ilx_0383...","(cAC, http://uri.interlex.org/base/ilx_0738197)",https://bbp.epfl.ch/neurosciencegraph/data/045...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
98,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L4_LBC, http://uri.interlex.org/base/ilx_0383...","(bSTUT, http://uri.interlex.org/base/ilx_0738200)",https://bbp.epfl.ch/neurosciencegraph/data/0cb...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...


In [40]:
forge.download(metype_density_release_volumes, path="./test", follow="distribution.contentUrl", cross_bucket=True, overwrite=True)