A notebook to explore an initial attempt for visually merging the two information modalities, i.e., average cortical thickness and gray matter volume, across the ROIs. This exploration is equivalent to ‘data fusion’ strategies explored in the visual analytics literature, with the derivation of a composite metric (e.g., a ratio, min or max) for a uni-dimensional representation.

# Import Libraries

In [None]:
import plotly.express as px
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
import pickle
import os

from owlready2 import set_render_func
from owlready2 import get_ontology
from owlready2 import default_world

from anytree import AnyNode, Node, RenderTree
from anytree import search
from anytree.search import find
from anytree import PostOrderIter

from operator import add


In [None]:
#Connect to drive to import files
from google.colab import drive
drive.mount('/content/drive')
#drive.mount('/gdrive', force_remount=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Import Volumetric File and Thickness File

In [None]:
#Load data files that contain the w-scores of the data subjects, for all the ROIs.

#File1: csv file with the volumetric w-scores
df_w_Vol = pd.read_csv( '/content/drive/My Drive/ontology/volumetric_w-scores.csv', index_col=False)
#File2: csv file with the average cortical thickness w-scores
df_w_ThickAvg = pd.read_csv( '/content/drive/My Drive/ontology/ThickAvg_w-scores.csv', index_col=False)

In [None]:
# A function to set the cotical thickness w-scores for all the subcortical regions to NaN.
# This just a step to guarantee logical coherence.
def rule_setROItoNAs(df_sample_w):
  ROI_zero = ['White_Matter', 'Right_Cerebral_White_Matter', 'Left_Cerebral_White_Matter', 'WM_Hypointensities', 'Corpus_Callosum', 'Central', 'Anterior','Mid_Anterior','Mid_Posterior', 'Posterior' ,
              '3rd-Ventricle', '4th-Ventricle',  'Right_Lateral_Ventricle', 'Right_Inf-Lat-Vent', 'Left_Lateral_Ventricle', 'Left_Inf-Lat-Vent',  'Left_Choroid_Plexus', 'Right_Choroid_Plexus', 'CSF', 'Cerebrospinal_Fluid']

  for ROI in ROI_zero:
    try:
      df_sample_w.loc[df_sample_w.index,ROI] = np.nan  #0
    except:
      df_sample_w[ROI] = np.nan

  return df_sample_w

In [None]:
df_w_ThickAvg = rule_setROItoNAs(df_w_ThickAvg)

In [None]:
#PreProcess
df_w_Vol['sample'].replace('ADNI2/GO','ADNI', inplace=True)
df_w_Vol['sample'].replace('ADNI3','ADNI', inplace=True)
df_w_Vol['fullsid'] = df_w_Vol['sid'] + '_' + df_w_Vol['sample']

In [None]:
#List of all the ROI names. These are required for plotting the sunburst chart.
ROIs = df_w_Vol.columns[9:-1]

In [None]:
# The core logic for merging two informaion modalities, i.e., w-scores of cortical thickness and gray matter volume.
# For each ROI, we chose the most pathologic signal, to be visualized in the summary chart.

def max_path(a,b):
  #compares the abs of two numbers, i.e. w-scores, and return the bigger one with original sign
  return a if abs(a) >= abs(b) else b


In [None]:
#A new dataframe to hold the new mixed data modality, i.e., maximum pathology.
df_w_max =  df_w_ThickAvg.copy(deep=True)

try:
  #Looping over all ids
  for ele in df_w_Vol['fullsid']:
    #Looping over all ROIs
    for roi in ROIs:
      temp_thick = float(df_w_ThickAvg.loc[df_w_ThickAvg['fullsid']==ele, roi])
      temp_vol =  float(df_w_Vol.loc[df_w_Vol['fullsid']==ele, roi])

      #replace the existing pathology w-score with the max
      df_w_max.loc[df_w_max['fullsid']==ele, roi]  = max_path(temp_thick, temp_vol)
except:
  pass

# Plot and Save

In [None]:
# Dataframe with which scores w-scores for an AD-sample; To be used later, as it has the parent-child relations built into it.
# And the ROIs are manually ordered and have an associated (rign) 'level' with them,
df_ = pd.read_csv('/content/drive/My Drive/ontology/vis_test.csv',index_col=False).fillna('')
df_.drop(['cn_vol', df_.columns[0]], axis=1, inplace=True)  #dropping excessive columns
df_.rename(columns={"z": "w-score"}, inplace=True)

In [None]:
def sort_code (df):
  #To create symertirc Left and Right cerebrum regions, we sort them based on hand-made codes.
  #In this function, we match each region to their sorting codes.

  df_temp = pd.read_csv('/content/drive/My Drive/ontology/roi_SortSymmetryCode.csv')
  sort_dict = dict(zip(df_temp.ROI, df_temp.sort_try))

  df['sort'] = 0
  for i in df.index:
    df.loc[i, 'sort'] = sort_dict[str(df.loc[i, 'ROI'])]

  return df

In [None]:
#A custom colour scale
custom_RYG = [

    "rgb(81,0,17)",  #~rosewood dark red

    "rgb(165,0,38)",  #shades of red
    "rgb(215,48,39)",
    "rgb(244,109,67)",
    "rgb(253,174,97)",

    "rgb(251,211,165)",  #Yellow-red

 #   "rgb(250,230,199)",
    "rgb(250,248,233)", #lighter yellow
  #  "rgb(236,239,209)",

    "rgb(222,230,184)",  #Yellow-Green

    "rgb(183, 225, 186)",
    "rgb(110, 189, 145)",
    "rgb(61, 144, 125)",
    "rgb(42, 115, 113)",  #shades of green

    "rgb(29, 80, 97)",  #~dark blue-green
      ]

In [None]:
#If we pick a random AD sample for plotting.
sample_id = ['6849_ADNI']     #4770, ADNI2
stage = 'AD'
cohort='ADNI'
df_sample_w = df_w_max[df_w_max['fullsid'].isin(sample_id)]
measure = 'Max Pathology (volume or cortical thickness)'


w_dict = df_sample_w[df_sample_w.columns[8:]].mean().to_dict()

#copy the sample-df with the parent-child relations, and replace the w-scores
df_plot = df_.copy()
for index in df_plot.index:
  df_plot.loc[index, 'w-score'] = w_dict[df_plot.loc[index].ROI]
df_plot['w-score'] = df_plot['w-score'].round(2)   #round up the w-scores to get rid of the trailing digits

#cleaning up the names of the ROIs
df_plot['ROI'] = df_plot['ROI'].map(lambda x: x.replace('-',' ').replace('_',' '))
df_plot['parent'] = df_plot['parent'].map(lambda x: x.replace('-',' ').replace('_',' '))

#Add the sorting code to the dataframe and sort the w-scores based on it
df_plot = sort_code(df_plot)
df_plot.sort_values('sort', inplace=True)

In [None]:
fig2 = px.sunburst(df_plot, names='ROI', parents='parent',
                   color='w-score',
                   color_continuous_scale=custom_RYG,
                   color_continuous_midpoint= 0,
                   range_color=[-5,5],
                   maxdepth = 5,      #controls the number of levels one sees at a time, choose between [4,5]
                   )
fig2.update_layout(title_text="Sunburst {} W-score | Sample ID: {} | Data Cohort: {}".format(measure,sample_id[0], cohort),
                   font_size=10,
                   autosize=False,  width=800, height=800)

#Turn off plotly's internal sorting, and rotate the chart to get the vertical L-R alignment
fig2.update_traces(sort=False, rotation=-101, selector=dict(type='sunburst'))    #100,53


fig2.show()
#fig2.write_image("{}_{}_mean_w_Sunburst.html".format(sample_id[0],cohort))
fig2.write_html("{}_{}_mean_w_Sunburst.html".format(sample_id[0],measure))