# Replace ROI-ID with its parent one to reflect combined annotation ontology
This notebook updates ROI-IDs in annotation volume (AV) with their parent ROIs' IDs to reflect combined anatomical ontology.
- inputs
    - annotation_100_divided.nrrd
    - AObase_c.json
    - ID_parentID_AObase.csv
- output
    - AVbase_c.nrrd

# Set variables

In [None]:
dir_data = 'data'

fn_input_AV = 'annotation_100_divided.nrrd'
fn_input_AO = 'AObase_c.json'
fn_input_csv = 'ID_parentID_AObase.csv'

fn_output_AV = 'AVbase_c.nrrd'

In [40]:
import os
import pandas as pd
import nrrd
import numpy as np
import json
import copy
from collections import OrderedDict
from jsonpath_rw import jsonpath, parse

# Load data

In [42]:
df_IDpairs = pd.read_csv(os.path.join(dir_data, fn_input_csv))

AV, header = nrrd.read(os.path.join(dir_data, fn_input_AV))
# ID_unique = np.unique(AV) ##?

with open(os.path.join(dir_data, fn_input_AO)) as f:
    df_combinedAO = json.load(f, object_pairs_hook=OrderedDict)

In [43]:
jsonpath_expr = parse('$..id')
IDacronym_list = [[match.value, \
                  eval("df_combinedAO['msg'][0]" + str(match.full_path).\
                      replace('.', '').replace('children', "['children']").\
                       replace('id',"") + "['acronym']")]
                 for match in jsonpath_expr.find(df_combinedAO['msg'][0])]
df_IDs_in_combinedAO = pd.DataFrame(IDacronym_list, columns = ['ID', 'acronym'])

# Get pairs of deleted ID and its parents ID that exist in combined AO

In [44]:
ID_pairs = df_IDpairs.ID.values
IDs_in_combined_AO = df_IDs_in_combinedAO.ID.values
DeletedIDs = set(ID_pairs) ^ set(IDs_in_combined_AO)

In [45]:
def getParentID(childID):
    return df_IDpairs[df_IDpairs['ID'] == childID].parentID.fillna(-1).astype(int).values[0]

In [46]:
OriginalID = []
ParentID = []
for idx, i in enumerate(DeletedIDs):
    ttemp = i
    while ttemp not in IDs_in_combined_AO:
        ttemp = getParentID(ttemp)
    OriginalID.append(i)
    ParentID.append(ttemp)

# Update IDs in AV to reflect combined IDs in AO

In [47]:
AV_corrected = copy.deepcopy(AV)
for idx, oriID in enumerate(OriginalID):
    AV_corrected[AV_corrected == oriID] = ParentID[idx] 

# Save annotation volume

In [48]:
nrrd.write(os.path.join(dir_data, fn_output_AV), AV_corrected, header)

# Check data

In [49]:
print(df_IDpairs.shape)
df_IDpairs.head() # IDpairs

(865, 2)


Unnamed: 0,ID,parentID
0,997,
1,8,997.0
2,567,8.0
3,688,567.0
4,695,688.0


In [50]:
print(df_IDs_in_combinedAO.shape)
df_IDs_in_combinedAO.head() # IDs_in_combined_AO

(688, 2)


Unnamed: 0,ID,acronym
0,997,root
1,8,grey
2,567,CH
3,688,CTX
4,695,CTXpl


In [51]:
len(DeletedIDs)

177

In [52]:
print(len(set(AV.flatten()))) # 69. count of unique ID in input annotation volume
print(len(set(AV_corrected.flatten()))) # 69

670
551
