# Divide internal ROIs that have voxel counts
This notebook divides ROIs in an anatomical ontology (AO) text-file if these are internal nodes with voxel counts (VC) > 0 in an annotation volume (AV). A divided ROI has a name and acronym suffixed with "peripheral" such as "original name_peripheral" and "original acronym_peri". ID of divided ROI is newly assigned (>= 10^9).

- input
    - 1_VC_pruned.json
- outputs
    - 1_VC_pruned_divided.json
    - dividedIDs.csv

# Set variables

In [4]:
dir_data = 'data'
fn_input_AO = '1_VC_pruned.json'
fn_output_AO = '1_VC_pruned_divided.json'
fn_output_ID = 'dividedIDs.csv'

In [2]:
import os
import nrrd
import numpy as np
import pandas as pd
import json
import copy
from collections import OrderedDict
from jsonpath_rw import jsonpath, parse

# Load data

In [5]:
with open(os.path.join(dir_data, fn_input_AO)) as f:
    df_AO_VC_pruned = json.load(f, object_pairs_hook=OrderedDict)

# Get ID-acronym to prepare candidate ID for divided ROI

In [6]:
jsonpath_expr = parse('$..id')

IDacronym_list = [[match.value, \
                  eval("df_AO_VC_pruned['msg'][0]" +str(match.full_path).\
                      replace('.','').replace('children',"['children']").\
                      replace('id',"") + "['acronym']"),\
                  eval("df_AO_VC_pruned['msg'][0]" +str(match.full_path).\
                      replace('.','').replace('children',"['children']").\
                      replace('id',"") + "['name']")]
                 for match in jsonpath_expr.find(df_AO_VC_pruned['msg'][0])]
IDacronym = pd.DataFrame(IDacronym_list, columns=['ID', 'acronym', 'name'])

In [7]:
maxID = IDacronym['ID'].max() # 614454277 in the original annotation ontology file
print(maxID)

614454277


In [8]:
if maxID < 10**9:
    CandidateID = 10**9 -1 # -1 to start from 10**9, not "10**9 + 1"
else:
    CandidateID = maxID + 1
CandidateID

999999999

# Divide internal nodes with voxel counts > 0

In [9]:
def Divide_internal_ROI_with_VC_in_AO(match_id, match_fullpath):
    if match_id == 0: return # Children is not defined for a root node.
    # id_offset = 10**9 # ID of divided node = original ID + this offset
    global df_AO_VC_pruned_leafed
    global CandidateID
    bool1 = eval(("df_AO_VC_pruned_leafed"+\
                str(match_fullpath).replace('.','')\
                .replace('msg', "['msg']")\
                .replace('children', "['children']")\
                .replace('id','')+"['children'] != []")) # true if non-leaf node
    bool2 = eval(("df_AO_VC_pruned_leafed"+\
                str(match_fullpath).replace('.','')\
                .replace('msg', "['msg']")\
                .replace('children', "['children']")\
                .replace('id','')+"['voxel_count'] is not None")) # true if voxel_count > 0
    if bool1 and bool2:
        CandidateID += 1 
        source_acronym =  eval("df_AO_VC_pruned_leafed"+\
                str(match_fullpath).replace('.','')\
                .replace('msg', "['msg']").replace('children', "['children']")\
                .replace('id', "['acronym']")) # acronym for matched ID
        source_name =  eval("df_AO_VC_pruned_leafed"+\
                str(match_fullpath).replace('.','')\
                .replace('msg', "['msg']").replace('children', "['children']")\
                .replace('id', "['name']")) # name for matched ID
        source_original_content =  eval("copy.deepcopy(df_AO_VC_pruned_leafed"+\
                str(match_fullpath).replace('.','')\
                .replace('msg', "['msg']")\
                .replace('children', "['children']").replace('id', "")+")") # OrderedDict
        source_child_index = int(str(match_fullpath)\
            [str(match_fullpath).rfind("[")+1:str(match_fullpath).rfind("]")])
        source_parent_path = "df_AO_VC_pruned_leafed"+str(match_fullpath)\
            [0:str(match_fullpath).rfind("[")-1].replace('.','')\
            .replace('msg', "['msg']")\
            .replace('children', "['children']") # ./children[x]→./children
        source_original_voxelcount = eval(("df_AO_VC_pruned_leafed"+\
                str(match_fullpath).replace('.','').replace('msg', "['msg']")\
                .replace('children', "['children']").replace('id','')+"['voxel_count']"))
        exec(source_parent_path + "[" +str(source_child_index) +"]['voxel_count'] = None")
        exec(source_parent_path+\
                ".insert(" + str(source_child_index + 1) + ", source_original_content)") 
        exec(source_parent_path + "[" + str(source_child_index + 1) + "]['acronym'] = '"\
             + source_acronym + "_peri'")
        exec(source_parent_path + "[" + str(source_child_index + 1) + "]['name'] = '"\
             + source_name + "_peripheral'")
        exec(source_parent_path + "[" + str(source_child_index + 1) + "]['children'] = None")
        exec(source_parent_path + "[" + str(source_child_index + 1) + "]['id'] = "\
             + str(CandidateID))
        exec(source_parent_path + "[" + str(source_child_index + 1) +\
             "]['voxel_count'] = " + str(source_original_voxelcount))
        return [match_id, source_acronym, source_name, source_original_voxelcount, CandidateID] # Leafed ID and its acronym

In [10]:
jsonpath_expr = parse('$..id')
df_AO_VC_pruned_leafed = copy.deepcopy(df_AO_VC_pruned)
ID_divided = []
ID_divided = [Divide_internal_ROI_with_VC_in_AO(match.value, match.full_path)\
            for match in reversed(jsonpath_expr.find(df_AO_VC_pruned_leafed))]

# Get internal ROIs with voxel size > 0

In [11]:
ID_divided_wo_None = []
ID_divided_wo_None = [x for x in ID_divided if x is not None]
df_dividedIDs = pd.DataFrame(ID_divided_wo_None, columns=['divided_ID', 'acronym', 'name', 'voxel_count', 'new_ID'])

# Save AO and csv files

## AO json-file

In [12]:
with open(os.path.join(dir_data, fn_output_AO), mode='w') as fw:
    json.dump(df_AO_VC_pruned_leafed, fw, indent=4)

## csv file with divided IDs

In [13]:
df_dividedIDs.to_csv(os.path.join(dir_data, fn_output_ID), index=False)

# Check data

In [14]:
df_dividedIDs.head() # .shape (29, 3) There were 29 inner nodes with voxel counts > 0.

Unnamed: 0,divided_ID,acronym,name,voxel_count,new_ID
0,145,V4,fourth ventricle,523,1000000000
1,81,VL,lateral ventricle,2138,1000000001
2,301,st,stria terminalis,292,1000000002
3,863,rust,rubrospinal tract,564,1000000003
4,784,cst,corticospinal tract,92,1000000004


In [15]:
IDacronym['ID'].max()

614454277

In [16]:
# import math
# print(IDacronym['ID'].max())
# print(math.log10(IDacronym['ID'].max()))
# print(math.log10(2**32))
# print(10**0.6329)

614454277
8.788489571542424
9.632959861247398
4.294375337334885
