# Download Atlas Datasets

## Imports

In [None]:
import os
import shutil
import urllib.parse

from kgforge.core import KnowledgeGraphForge

## Setup
Set `download_path` and get a Nexus authentication token

In [None]:
download_path = "/gpfs/bbp.cscs.ch/project/proj162/Model_Data/Brain_atlas/Mouse"

The [Nexus web application](https://bbp.epfl.ch/nexus/web) can be used to get a token.

- Step 1: From the opened web page, click on the login button on the right corner and follow the instructions.

![login-ui](./login-ui.png)

- Step 2: At the end you’ll see a token button on the right corner. Click on it to copy the token.

![login-ui](./copy-token.png)


In [None]:
TOKEN = ""

In [None]:
endpoint_prod = "https://bbp.epfl.ch/nexus/v1"
endpoint_staging = "https://staging.nise.bbp.epfl.ch/nexus/v1"

# Choose the Nexus environment to target (prod or staging)
endpoint = endpoint_prod
#endpoint = endpoint_staging

forge = KnowledgeGraphForge("../forge-config.yml",
                            token=TOKEN,
                            endpoint=endpoint, 
                            bucket="bbp/atlas")

## Set the Atlas release id
These atlas releases can be explored through the atlas web app:

* dev: https://bluebrainatlas.kcpdev.bbp.epfl.ch/atlas
* prod: https://bbp.epfl.ch/atlas


In [None]:
Staging_BBP_Mouse_Brain_Atlas_Release = "https://bbp.epfl.ch/neurosciencegraph/data/brainatlasrelease/c96c71a8-4c0d-4bc1-8a1a-141d9ed6693d"
Staging_Cell_Composition = "https://bbp.epfl.ch/neurosciencegraph/data/cellcompositions/54818e46-cf8c-4bd6-9b68-34dffbc8a68c"
staging_tag = "v1.0.1"

Prod_BBP_Mouse_Brain_Atlas_Release = "https://bbp.epfl.ch/neurosciencegraph/data/4906ab85-694f-469d-962f-c0174e901885"
Prod_Cell_Composition = "https://bbp.epfl.ch/neurosciencegraph/data/cellcompositions/54818e46-cf8c-4bd6-9b68-34dffbc8a68c"
prod_tag = "v1.1.0"

if endpoint == endpoint_staging:
    atlas_release_id = Staging_BBP_Mouse_Brain_Atlas_Release
    cell_composition_id = Staging_Cell_Composition
    resources_tag = staging_tag
elif endpoint == endpoint_prod:
    atlas_release_id = Prod_BBP_Mouse_Brain_Atlas_Release
    cell_composition_id = Prod_Cell_Composition
    resources_tag = prod_tag
    
version_dir = resources_tag.replace('v', 'version_')

## Get the Atlas release high level metadata

In [None]:
atlas_release = forge.retrieve(atlas_release_id, version=resources_tag)
print(atlas_release)

In [None]:
# Get the resolution of the Atlas release
brain_template = forge.retrieve(atlas_release.brainTemplateDataLayer.id, version=resources_tag)
resolution = brain_template.resolution.value
resolution_dir = f"resolution_{resolution}_um"

In [None]:
atlas_path = os.path.join(download_path, resolution_dir, version_dir)
os.makedirs(atlas_path, exist_ok = True)

## Get the distributions files corresponding to the AtlasRelease properties

In [None]:
def get_gpfs_path(nexus_location):
    nexus_location_decoded = urllib.parse.unquote(nexus_location)
    return nexus_location_decoded.replace("file:///", "/")

In [None]:
def get_res_distributions(res_id, res_tag, dir_label):
    prop_res = forge.retrieve(res_id, version=res_tag, cross_bucket=True)
    if not prop_res:
        print(f"No Resource found with id {res_id} at version {res_tag}")
        return 0

    prop_distr_list = prop_res.distribution if isinstance(prop_res.distribution, list) else [prop_res.distribution]
    prop_path = os.path.join(atlas_path, dir_label)
    for prop_distr in prop_distr_list:
        prop_distr_path = os.path.join(prop_path, prop_distr.name)
        if os.path.isfile(prop_distr_path):
            print(f"Distribution file '{prop_distr_path}' already exists, skipping it!")
            # ToDo: don't skip if checksum is different
            continue
            
        location = getattr(getattr(prop_distr, 'atLocation'), 'location', None)
        if location:
            shutil.copy2(get_gpfs_path(location), prop_distr_path)
        else:
            print(f"No location for Resource '{prop}'")
            
    return 1

In [None]:
def get_res_properties(res, props_list, res_tag):
    n_props = len(props_list)
    i_prop = 0
    for prop, prop_label in props_list.items():
        i_prop += 1
        print(f"\nGetting prop {i_prop} of {n_props}: {prop}")
        prop_id = getattr(res, prop).id
        get_res_distributions(prop_id, res_tag, prop_label)

In [None]:
ph_dir = 'Placement_hints'
atlas_props = {'brainTemplateDataLayer': 'Brain_template',
               'cellOrientationField': 'Cell_orientation_field',
               'directionVector': 'Direction_vector',
               'hemisphereVolume': 'Hemisphere_volume',
               'parcellationOntology': 'Parcellation_ontology',
               'parcellationVolume': 'Annotation_volume',
               'placementHintsDataCatalog': ph_dir
}

get_res_properties(atlas_release, atlas_props, resources_tag)

### Get the Placement Hints

In [None]:
os.makedirs(os.path.join(atlas_path, ph_dir), exist_ok = True)

ph_catalog_res = forge.retrieve(id=atlas_release.placementHintsDataCatalog.id, version=resources_tag)
with open(get_gpfs_path(ph_catalog_res.distribution.atLocation.location), "r") as f:
    ph_catalog = json.load(f)
    
ph_id_rev = []
for ph_type in ph_catalog.values():
    ph_list = ph_type if isinstance(ph_type, list) else [ph_type]
    for ph in ph_list:
        ph_id_rev.append((ph['@id'], ph['_rev']))

In [None]:
n_phs = len(ph_id_rev)
for i_ph, ph in enumerate(ph_id_rev):
    ph_id = ph[0]
    ph_version = ph[1]
    print(f"\nGetting PH {i_ph +1} of {n_phs}: {ph_id}")
    get_res_distributions(ph_id, ph_version, ph_dir)

### Get the Meshes

In [None]:
mesh_dir = os.path.join(atlas_path, 'Brain_meshes')
os.makedirs(os.path.join(atlas_path, mesh_dir), exist_ok = True)

In [None]:
filters = {"type": 'BrainParcellationMesh',
           "atlasRelease": {"id": atlas_release_id},
           "subject": {"species": {"id": atlas_release.subject.species.get_identifier()}}
}
atlas_meshes = forge.search(filters, debug=False, limit=10000)

In [None]:
n_meshes = len(atlas_meshes)
downloaded_meshes = 0
for i_mesh, mesh in enumerate(atlas_meshes):
    mesh_id = mesh.id
    print(f"\nGetting mesh {i_mesh +1} of {n_meshes}: {mesh_id}")
    downloaded_meshes += get_res_distributions(mesh_id, resources_tag, mesh_dir)
    
print(f"\nDownloaded {downloaded_meshes} versioned meshes out of {n_meshes} total meshes")

## Get the CellComposition distributions


In [None]:
cell_comp_dir = 'Cell_composition'
cell_comp_props_list = {'cellCompositionSummary': cell_comp_dir,
                        'cellCompositionVolume': cell_comp_dir}
cell_composition = forge.retrieve(id=cell_composition_id, version=resources_tag, cross_bucket=True)

get_res_properties(cell_composition, cell_comp_props_list, resources_tag)

### Get the released M-E type densities
Each M-E type is registered in Nexus with its corresponding metadata. The list of the M-E type densities annotated in the parcellation volume is provided in the `CellCompositionVolume` property of the CellComposition:

In [None]:
me_dir = os.path.join(cell_comp_dir, 'Morphological-electrical_neuron_type_Densities')
os.makedirs(os.path.join(atlas_path, me_dir), exist_ok = True)

cell_composition_volume = forge.retrieve(id=cell_composition.cellCompositionVolume.id, version=resources_tag, cross_bucket=True)
with open(get_gpfs_path(cell_composition_volume.distribution.atLocation.location), "r") as f:
    metype_density_release_json = json.load(f)

In [None]:
# Install extra dependencies
!pip install jsonpath_ng

In [None]:
from jsonpath_ng import parse

jsonpath_expr = parse('hasPart[*].hasPart[*].hasPart[*].@id')
me_ids = [(match.value, match.context.value["_rev"]) for match in jsonpath_expr.find(metype_density_release_json)] # yields [(metype volume id, revision)]

In [None]:
n_mes = len(me_ids)
for i_me, me_rev in enumerate(me_ids):
    me_id = me_rev[0]
    me_version = me_rev[1]
    print(f"\nGetting M-E density {i_me +1} of {n_mes}: {me_id}")
    get_res_distributions(me_id, me_version, me_dir)