In [None]:
# import packages
import pickle
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import time
from datetime import datetime
import plotly.io as pio
import os
import copy

In [None]:
registration_scale_factor = 0.001
num_of_light_landmarks = 21

# IXI

In [None]:
datasets_folder = './MRI_datasets/'
current_dataset_name = 'IXI'
dataset_filename = 'Dataset_Chamfer.xlsx' #'Dataset.xlsx'

In [None]:
current_subject_dataframe = pd.ExcelFile(datasets_folder+current_dataset_name+'/'+dataset_filename)
current_sheet_names = current_subject_dataframe.sheet_names
current_num_of_sheets = len(current_sheet_names)

skin_coordinates_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin coordinates')
skin_normals_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin normals')
skin_geodesic_distances_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin distances')
inverse_matrices_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Inverse transformations')
stats_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Stats')

In [None]:
skin_coordinates_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_coordinates_index, index_col=0)
skin_normals_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_normals_index, index_col=0)
skin_geodesic_distances_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_geodesic_distances_index, index_col=0)
inverse_transformations_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=inverse_matrices_index, index_col=0)
stats_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=stats_index, index_col=0)

In [None]:
skin_coordinates_columns_names = list(skin_coordinates_df.columns)
only_coordinates_columns_indices = []

for i in range(len(skin_coordinates_columns_names)):
    if 'indices' not in skin_coordinates_columns_names[i]:
        only_coordinates_columns_indices.append(i)

In [None]:
if 1:
    max_euclidean_distance = 75e-3 # that's a lot

    relevant_indices = []
    for desired_landmark_index, desired_landmark_name in enumerate(skin_coordinates_df.index[:num_of_light_landmarks]):
        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = valid_coordinates_rows#np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)

        desired_landmark_coordinates_mean = np.mean(desired_landmark_subjects_coordinates[valid_rows, :], axis=0)
        euclidean_distances = np.linalg.norm(desired_landmark_subjects_coordinates[valid_rows, :]-desired_landmark_coordinates_mean, axis=1)
        desired_landmark_relevant_indices = np.where(euclidean_distances<max_euclidean_distance)[0]
        relevant_indices.append(desired_landmark_relevant_indices)

    only_valid_score_subjects_rows = relevant_indices[0]
    for desired_landmark_index, desired_landmark_name in enumerate(skin_coordinates_df.index[:num_of_light_landmarks]):
        only_valid_score_subjects_rows = np.intersect1d(relevant_indices[desired_landmark_index], only_valid_score_subjects_rows)
else:
    score_ratio_threshold = 1
    only_valid_score_subjects_rows = np.sort(np.argsort(stats_df.loc['unique_correspondence_final_loss', :].values)[:int(score_ratio_threshold*stats_df.shape[1])])

In [None]:
all_subject_names = np.array((skin_coordinates_df.columns[0::4])[only_valid_score_subjects_rows])
only_valid_score_subject_names = [current_subject_name[:-8] for current_subject_name in all_subject_names]
only_valid_score_subject_names = np.array(only_valid_score_subject_names)

In [None]:
if 0:
    array_folder = datasets_folder+current_dataset_name+'/'
    array_filename = 'only_valid_score_subjects_names'
    array_filetype = '.npy'

    array_path = array_folder + array_filename + array_filetype
    
    if 1:
        with open(array_path, 'wb') as file:
            np.save(file, only_valid_score_subject_names)
    else:
        with open(array_path, 'rb') as file:
            only_valid_score_subject_names = np.load(file)

In [None]:
IXI_valid_df = stats_df.loc[:, only_valid_score_subject_names]

In [None]:
IXI_Chamfer_df = IXI_valid_df.iloc[:2, :]

In [None]:
IXI_head_based_face_chamfer_distance = IXI_Chamfer_df.iloc[0, :].values
IXI_head_based_head_chamfer_distance = IXI_Chamfer_df.iloc[1, :].values

In [None]:
datasets_folder = './MRI_datasets/'
IXI_face_based_Chamfer_df = pd.read_excel(datasets_folder+'Face_based_Chamfer_distances.xlsx', sheet_name=current_dataset_name, index_col=0)
IXI_valid_face_based_Chamfer_df = IXI_face_based_Chamfer_df.loc[:, only_valid_score_subject_names]
IXI_face_based_face_chamfer_distance = IXI_valid_face_based_Chamfer_df.iloc[0, :].values
IXI_face_based_head_chamfer_distance = IXI_valid_face_based_Chamfer_df.iloc[1, :].values

In [None]:
IXI_potential_outlier_number = int(np.round(IXI_head_based_head_chamfer_distance.size*0.025))
IXI_outlier_appearnce_threshold = 2
IXI_head_based_head_outliers = np.argsort(IXI_head_based_head_chamfer_distance)[-IXI_potential_outlier_number:]
IXI_head_based_face_outliers = np.argsort(IXI_head_based_face_chamfer_distance)[-IXI_potential_outlier_number:]
IXI_face_based_head_outliers = np.argsort(IXI_face_based_head_chamfer_distance)[-IXI_potential_outlier_number:]
IXI_face_based_face_outliers = np.argsort(IXI_face_based_face_chamfer_distance)[-IXI_potential_outlier_number:]

IXI_potential_outliers_concatenated = np.sort(np.concatenate((IXI_head_based_head_outliers,
                                                              IXI_head_based_face_outliers,
                                                              IXI_face_based_head_outliers,
                                                              IXI_face_based_face_outliers
                                                             ))
                                             )

IXI_unique_indices, IXI_indices_counts = np.unique(IXI_potential_outliers_concatenated, return_counts=True)
IXI_selected_outliers = np.where(IXI_indices_counts>IXI_outlier_appearnce_threshold)[0]
IXI_selected_outliers_indices = IXI_unique_indices[IXI_selected_outliers]
IXI_non_outlier_indices = np.delete(np.arange(IXI_head_based_head_chamfer_distance.size), IXI_selected_outliers_indices)
IXI_selected_outliers_names = np.array(IXI_valid_face_based_Chamfer_df.iloc[:, IXI_non_outlier_indices].columns)

In [None]:
IXI_head_based_face_chamfer_distance_filtered = IXI_head_based_face_chamfer_distance[IXI_non_outlier_indices]
IXI_head_based_head_chamfer_distance_filtered = IXI_head_based_head_chamfer_distance[IXI_non_outlier_indices]
IXI_face_based_face_chamfer_distance_filtered = IXI_face_based_face_chamfer_distance[IXI_non_outlier_indices]
IXI_face_based_head_chamfer_distance_filtered = IXI_face_based_head_chamfer_distance[IXI_non_outlier_indices]

In [None]:
if 0:
    array_folder = datasets_folder+current_dataset_name+'/'
    array_filename = 'chamfer_distance_subjects_names'
    array_filetype = '.npy'

    array_path = array_folder + array_filename + array_filetype
    
    if 1:
        with open(array_path, 'wb') as file:
            np.save(file, IXI_selected_outliers_names)
    else:
        with open(array_path, 'rb') as file:
            IXI_selected_outliers_names = np.load(file)

In [None]:
IXI_face_based_head_chamfer_distance_filtered.shape

# ADNI

In [None]:
datasets_folder = './MRI_datasets/'
current_dataset_name = 'ADNI'
dataset_filename = 'Dataset_Chamfer.xlsx' #'Dataset.xlsx'

In [None]:
current_subject_dataframe = pd.ExcelFile(datasets_folder+current_dataset_name+'/'+dataset_filename)
current_sheet_names = current_subject_dataframe.sheet_names
current_num_of_sheets = len(current_sheet_names)

skin_coordinates_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin coordinates')
skin_normals_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin normals')
skin_geodesic_distances_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin distances')
inverse_matrices_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Inverse transformations')
stats_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Stats')

In [None]:
skin_coordinates_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_coordinates_index, index_col=0)
skin_normals_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_normals_index, index_col=0)
skin_geodesic_distances_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_geodesic_distances_index, index_col=0)
inverse_transformations_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=inverse_matrices_index, index_col=0)
stats_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=stats_index, index_col=0)

In [None]:
skin_coordinates_columns_names = list(skin_coordinates_df.columns)
only_coordinates_columns_indices = []

for i in range(len(skin_coordinates_columns_names)):
    if 'indices' not in skin_coordinates_columns_names[i]:
        only_coordinates_columns_indices.append(i)

In [None]:
if 1:
    max_euclidean_distance = 75e-3 # that's a lot

    relevant_indices = []
    for desired_landmark_index, desired_landmark_name in enumerate(skin_coordinates_df.index[:num_of_light_landmarks]):
        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = valid_coordinates_rows#np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)

        desired_landmark_coordinates_mean = np.mean(desired_landmark_subjects_coordinates[valid_rows, :], axis=0)
        euclidean_distances = np.linalg.norm(desired_landmark_subjects_coordinates[valid_rows, :]-desired_landmark_coordinates_mean, axis=1)
        desired_landmark_relevant_indices = np.where(euclidean_distances<max_euclidean_distance)[0]
        relevant_indices.append(desired_landmark_relevant_indices)

    only_valid_score_subjects_rows = relevant_indices[0]
    for desired_landmark_index, desired_landmark_name in enumerate(skin_coordinates_df.index[:num_of_light_landmarks]):
        only_valid_score_subjects_rows = np.intersect1d(relevant_indices[desired_landmark_index], only_valid_score_subjects_rows)
else:
    score_ratio_threshold = 1
    only_valid_score_subjects_rows = np.sort(np.argsort(stats_df.loc['unique_correspondence_final_loss', :].values)[:int(score_ratio_threshold*stats_df.shape[1])])

In [None]:
all_subject_names = np.array((skin_coordinates_df.columns[0::4])[only_valid_score_subjects_rows])
only_valid_score_subject_names = [current_subject_name[:-8] for current_subject_name in all_subject_names]
only_valid_score_subject_names = np.array(only_valid_score_subject_names)

In [None]:
if 0:
    array_folder = datasets_folder+current_dataset_name+'/'
    array_filename = 'only_valid_score_subjects_names'
    array_filetype = '.npy'

    array_path = array_folder + array_filename + array_filetype
    
    if 1:
        with open(array_path, 'wb') as file:
            np.save(file, only_valid_score_subject_names)
    else:
        with open(array_path, 'rb') as file:
            only_valid_score_subject_names = np.load(file)

In [None]:
ADNI_valid_df = stats_df.loc[:, only_valid_score_subject_names]

In [None]:
ADNI_Chamfer_df = ADNI_valid_df.iloc[:2, :]

In [None]:
ADNI_head_based_face_chamfer_distance = ADNI_Chamfer_df.iloc[0, :].values
ADNI_head_based_head_chamfer_distance = ADNI_Chamfer_df.iloc[1, :].values

In [None]:
datasets_folder = './MRI_datasets/'
ADNI_face_based_Chamfer_df = pd.read_excel(datasets_folder+'Face_based_Chamfer_distances.xlsx', sheet_name=current_dataset_name, index_col=0)
ADNI_valid_face_based_Chamfer_df = ADNI_face_based_Chamfer_df.loc[:, only_valid_score_subject_names]
ADNI_face_based_face_chamfer_distance = ADNI_valid_face_based_Chamfer_df.iloc[0, :].values
ADNI_face_based_head_chamfer_distance = ADNI_valid_face_based_Chamfer_df.iloc[1, :].values

In [None]:
ADNI_potential_outlier_number = int(np.round(ADNI_head_based_head_chamfer_distance.size*0.025))
ADNI_outlier_appearnce_threshold = 2
ADNI_head_based_head_outliers = np.argsort(ADNI_head_based_head_chamfer_distance)[-ADNI_potential_outlier_number:]
ADNI_head_based_face_outliers = np.argsort(ADNI_head_based_face_chamfer_distance)[-ADNI_potential_outlier_number:]
ADNI_face_based_head_outliers = np.argsort(ADNI_face_based_head_chamfer_distance)[-ADNI_potential_outlier_number:]
ADNI_face_based_face_outliers = np.argsort(ADNI_face_based_face_chamfer_distance)[-ADNI_potential_outlier_number:]

ADNI_potential_outliers_concatenated = np.sort(np.concatenate((ADNI_head_based_head_outliers,
                                                              ADNI_head_based_face_outliers,
                                                              ADNI_face_based_head_outliers,
                                                              ADNI_face_based_face_outliers
                                                             ))
                                             )

ADNI_unique_indices, ADNI_indices_counts = np.unique(ADNI_potential_outliers_concatenated, return_counts=True)
ADNI_selected_outliers = np.where(ADNI_indices_counts>ADNI_outlier_appearnce_threshold)[0]
ADNI_selected_outliers_indices = ADNI_unique_indices[ADNI_selected_outliers]
ADNI_non_outlier_indices = np.delete(np.arange(ADNI_head_based_head_chamfer_distance.size), ADNI_selected_outliers_indices)
ADNI_selected_outliers_names = np.array(ADNI_valid_face_based_Chamfer_df.iloc[:, ADNI_non_outlier_indices].columns)

In [None]:
ADNI_head_based_face_chamfer_distance_filtered = ADNI_head_based_face_chamfer_distance[ADNI_non_outlier_indices]
ADNI_head_based_head_chamfer_distance_filtered = ADNI_head_based_head_chamfer_distance[ADNI_non_outlier_indices]
ADNI_face_based_face_chamfer_distance_filtered = ADNI_face_based_face_chamfer_distance[ADNI_non_outlier_indices]
ADNI_face_based_head_chamfer_distance_filtered = ADNI_face_based_head_chamfer_distance[ADNI_non_outlier_indices]

In [None]:
if 0:
    array_folder = datasets_folder+current_dataset_name+'/'
    array_filename = 'chamfer_distance_subjects_names'
    array_filetype = '.npy'

    array_path = array_folder + array_filename + array_filetype
    
    if 1:
        with open(array_path, 'wb') as file:
            np.save(file, ADNI_selected_outliers_names)
    else:
        with open(array_path, 'rb') as file:
            ADNI_selected_outliers_names = np.load(file)

## Composition

In [None]:
metadata_filename = 'PTDEMOG.csv'
current_subject_metadata = pd.read_csv(datasets_folder+current_dataset_name+'/'+metadata_filename)
ADNI_subject_metadata = current_subject_metadata.loc[current_subject_metadata['Phase'] == 'ADNI1']

filtered_subjects_ages = []
for i in ADNI_selected_outliers_names:
    subject_RID = int(i.split('_')[2])
    current_subject = ADNI_subject_metadata.loc[ADNI_subject_metadata['RID'] == subject_RID]
    current_subject_birth_year = int(current_subject.loc[:, 'PTDOBYY'].values[0])
    current_subject_birth_month = int(current_subject.loc[:, 'PTDOBMM'].values[0])
    current_subject_birth_day = 14
    
    current_subject_scan_date = current_subject.loc[:, 'USERDATE'].values[0]
    current_subject_scan_year = int(current_subject_scan_date.split('/')[2])
    current_subject_scan_month = int(current_subject_scan_date.split('/')[1])
    current_subject_scan_day = int(current_subject_scan_date.split('/')[0])
    
    delta_years = current_subject_scan_year-current_subject_birth_year
    delta_months = current_subject_scan_month-current_subject_birth_month
    delta_days = current_subject_scan_day-current_subject_birth_day
    
    scan_age = delta_years+delta_months/12+delta_days/365
    filtered_subjects_ages.append(scan_age)
    
filtered_subjects_ages = np.array(filtered_subjects_ages)