# Loading data

In [None]:
# import packages
import pickle
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import time
from datetime import datetime
import plotly.io as pio
from plotly.subplots import make_subplots
import os
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import copy
import joblib

In [None]:
media_folder = "./datasets/TMS"

## 3DMM

### Original model including everything

In [None]:
# load model
models_folder = media_folder+"/3DMM/UHM_models/"
currnet_model = 'UHM' 

# UHM model with all the components fused together (i.e. ears, inner mouth, and teeth)
model_name = 'head_model_global_align'

model_file = open(models_folder + model_name + '.pkl', 'rb')
model_dict = pickle.load(model_file)
model_file.close()

In [None]:
# turning coordinates system to be in millimeter units
scale_factor = 100

# get model parameteres
mean_shape = scale_factor*model_dict['Mean']
mean_shape_CCS = mean_shape.reshape(-1,3)
eigen_vec = model_dict['Eigenvectors']
eigen_vec_num =  model_dict['Eigenvectors'].shape[1]
eigen_val = model_dict['EigenValues']
trilist = model_dict['Trilist']
vertices_num = model_dict['Number_of_vertices']

In [None]:
# load modules (landmarks and masks)
modules_folder = models_folder + '/Landmarks and masks/'

modules_to_load = ['68_land_idxs'] # EEG_10_20_full_model / '49_plus_ears_land_idxs' / '68_land_idxs'

landmarks = []
landmarks_names = []
landmarks_groups = []

for currnet_module_name in modules_to_load:
    module_file = open(modules_folder + currnet_module_name + '.pkl', 'rb')
    currnet_module = pickle.load(module_file)
    module_file.close()
    if currnet_module_name=='EEG_10_20':
        currnet_module_names = list(currnet_module.keys())
        currnet_module = np.asarray(list(currnet_module.values()))
    else:
        currnet_module_names = list(map(str, 1+np.arange(len(currnet_module))))
        
    landmarks.append(currnet_module)
    landmarks_names.append(currnet_module_names)
    landmarks_groups.append(np.arange(len(currnet_module)))

# turn list of lists into one list
landmarks = [item for items in landmarks for item in items]
landmarks_names = [item for items in landmarks_names for item in items]

num_of_landmarks = len(landmarks)

### Lighter model including everything but eyes, teeth and inner mouth cavity

In [None]:
# load model
light_models_folder = media_folder+"/3DMM/UHM_models/"
light_currnet_model = 'UHM' 

# UHM model with all the components fused together (i.e. ears, inner mouth, and teeth)
light_model_name = 'head_model_global_align_no_mouth_and_eyes'

light_model_file = open(light_models_folder + light_model_name + '.pkl', 'rb')
light_model_dict = pickle.load(light_model_file)
light_model_file.close()

In [None]:
# turning cartesian coordinates system to be in millimeter units
light_scale_factor = 100

# get model parameteres
light_mean_shape = light_scale_factor*light_model_dict['Mean']
light_mean_shape_CCS = light_mean_shape.reshape(-1,3)
light_eigen_vec = light_model_dict['Eigenvectors']
light_eigen_vec_num =  light_model_dict['Eigenvectors'].shape[1]
light_eigen_val = light_model_dict['EigenValues']
light_trilist = light_model_dict['Trilist']
light_vertices_num = light_model_dict['Number_of_vertices']

In [None]:
# load modules (landmarks and masks)
modules_folder = models_folder + '/Landmarks and masks/'

modules_to_load = ['EEG_10_20'] # EEG_10_20 / '49_plus_ears_land_idxs' / '68_land_idxs'

light_landmarks = []
light_landmarks_names = []
light_landmarks_groups = []

for currnet_module_name in modules_to_load:
    module_file = open(modules_folder + currnet_module_name + '.pkl', 'rb')
    currnet_module = pickle.load(module_file)
    module_file.close()
    if currnet_module_name=='EEG_10_20':
        currnet_module_names = list(currnet_module.keys())
        currnet_module = np.asarray(list(currnet_module.values()))
    else:
        currnet_module_names = list(map(str, 1+np.arange(len(currnet_module))))
        
    light_landmarks.append(currnet_module)
    light_landmarks_names.append(currnet_module_names)
    light_landmarks_groups.append(np.arange(len(currnet_module)))

# turn list of lists into one list
light_landmarks = [item for items in light_landmarks for item in items]
light_landmarks_names = [item for items in light_landmarks_names for item in items]

num_of_light_landmarks = len(light_landmarks)

### Matching landmark indices between models

In [None]:
light_facial_landmarks = landmarks

for current_landmark_index, current_landmark_vertex in enumerate(landmarks):
    original_model_coordinates = mean_shape_CCS[current_landmark_vertex]
    new_model_vertex_diffs = np.linalg.norm(light_mean_shape_CCS-original_model_coordinates, axis=1)
    #sorted_diffs = np.sort(new_model_vertex_diffs)
    #print((sorted_diffs[1]-sorted_diffs[0])/sorted_diffs[0])
    light_facial_landmarks[current_landmark_index] = np.argmin(new_model_vertex_diffs)

### Model Choosing

In [None]:
choose_light_model = True

In [None]:
if choose_light_model:
    landmarks = np.concatenate((light_landmarks, light_facial_landmarks))
    landmarks_names = list(np.concatenate((light_landmarks_names, landmarks_names)))
    mean_shape = light_mean_shape
    mean_shape_CCS = light_mean_shape_CCS
    eigen_vec = light_eigen_vec
    eigen_vec_num =  light_eigen_vec_num
    eigen_val = light_eigen_val
    trilist = light_trilist
    vertices_num = light_vertices_num

# Definitions

## Parameters

In [None]:
num_of_epochs = 500

In [None]:
distinct_landmarks_names = np.array([37, 40, 43, 46, 49, 55, 31, 9])
rigid_facial_landmarks_names = np.array([37, 40, 43, 46, 28, 1, 17])

center_of_the_eyebrows = np.array([20, 25])
corners_of_the_eyebrows = np.array([18, 22, 23, 27])
corners_of_the_eyes = np.array([37, 40, 43, 46])
sides_of_the_face = np.array([1, 17])
nose_bone = np.array([28, 31])
lower_nose = np.array([32, 34, 36])
corners_of_the_mouth = np.array([49, 55])
chin = np.array([9])

facial_landmarks = np.concatenate((center_of_the_eyebrows, corners_of_the_eyebrows, corners_of_the_eyes, sides_of_the_face, nose_bone, lower_nose))
                                   #,corners_of_the_mouth, chin))
selected_facial_indices = np.sort(facial_landmarks+num_of_light_landmarks-1)

selected_EEG_10_20_landmark_names = light_landmarks_names
selected_EEG_10_20_indices = []
for current_index, current_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    selected_EEG_10_20_indices.append(landmarks_names.index(current_landmark_name))
selected_EEG_10_20_indices = np.asarray(selected_EEG_10_20_indices)

In [None]:
MRI_facial_landmarks = ['1', '2', '3', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
                        '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48']
MRI_facial_landmarks = [int(current_landmark) for current_landmark in MRI_facial_landmarks]

In [None]:
selected_indices = np.concatenate((selected_facial_indices, selected_EEG_10_20_indices))
selected_indices_names = np.take(landmarks_names, selected_indices)

In [None]:
feature_sets = {}
feature_sets['eye corners & eyebrow corners'] = np.concatenate((corners_of_the_eyebrows, corners_of_the_eyes))
feature_sets['eye corners & eyebrow centers'] = np.concatenate((center_of_the_eyebrows, corners_of_the_eyes))
feature_sets['eye corners & eyebrow corners and center'] = np.concatenate((center_of_the_eyebrows, corners_of_the_eyebrows, corners_of_the_eyes))
feature_sets['eye corners & nose bone'] = np.concatenate((corners_of_the_eyes, nose_bone))
feature_sets['nose bone & lower nose'] = np.concatenate((nose_bone, lower_nose))
feature_sets['MRI_facial_landmarks'] = np.array(MRI_facial_landmarks)

for current_key in feature_sets:
    feature_sets[current_key] = feature_sets[current_key]+num_of_light_landmarks-1
    feature_sets[current_key] = list(map(str, feature_sets[current_key]))

## Pytorch models definition

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

In [None]:
class MLP_3(nn.Module):
    def __init__(self, inputs_size, output_size, first_hidden_layer_size=2**7, second_hidden_layer_size=2**5, third_hidden_layer_size=2**3):
        super().__init__()
        #self.layers = nn.Sequential(
        self.inputs_size = inputs_size
        
        self.fc1 = nn.Linear(inputs_size, first_hidden_layer_size)
        self.bn1 = nn.BatchNorm1d(first_hidden_layer_size)
        self.d1 = nn.Dropout(p=0.3, inplace=False)
        
        self.fc2 = nn.Linear(first_hidden_layer_size, second_hidden_layer_size)
        self.bn2 = nn.BatchNorm1d(second_hidden_layer_size)
        self.d2 = nn.Dropout(p=0.2, inplace=False)
        
        self.fc3 = nn.Linear(second_hidden_layer_size, third_hidden_layer_size)
        self.bn3 = nn.BatchNorm1d(third_hidden_layer_size)
        self.d3 = nn.Dropout(p=0.2, inplace=False)
        
        self.fc4 = nn.Linear(third_hidden_layer_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = F.leaky_relu(x) #torch.tanh(x)
        x = self.bn1(x)
        x = self.d1(x)
        
        x = self.fc2(x)
        x = F.leaky_relu(x) #torch.tanh(x)
        x = self.bn2(x)
        x = self.d2(x)
        
        x = self.fc3(x)
        x = F.leaky_relu(x) #torch.tanh(x)
        x = self.bn3(x)
        x = self.d3(x)

        x = self.fc4(x)
        return x

## Functions

In [None]:
def find_most_recent_trained_files(trained_folder_path, desired_feature_name, fold_index=-1):
    trained_folder_and_features_set_files = trained_folder_path
    trained_folder_files = os.listdir(trained_folder_and_features_set_files)

    potential_files_creation_time = []
    potential_files_index = []
    
    for i, current_filename in enumerate(trained_folder_files):
        #print(current_filename)
        if desired_feature_name in current_filename and 'model' in current_filename:# and desired_decimation_percentage in current_filename:
            if fold_index != -1:# and len(current_filename)>30:
                if fold_index==int(current_filename[-7]):
                    current_filename_creation_time = current_filename[:17]
                    potential_files_index.append(i)
                    potential_files_creation_time.append(current_filename_creation_time)
            else:
                current_filename_creation_time = current_filename[:17]
                potential_files_index.append(i)
                potential_files_creation_time.append(current_filename_creation_time)
    
    most_recent_trained_model_filename = trained_folder_files[potential_files_index[np.argsort(potential_files_creation_time)[-1]]]
    print(most_recent_trained_model_filename)
    most_recent_trained_scaler_filename = most_recent_trained_model_filename[:-6]+'_scaler.pkl'
    most_recent_trained_documentation_filename = most_recent_trained_model_filename[:-6]+'_documentation.txt'
    
    most_recent_trained_files = [most_recent_trained_model_filename, most_recent_trained_scaler_filename, most_recent_trained_documentation_filename]
    
    return most_recent_trained_files

In [None]:
def run_model(model, dataloader, loss_function, optimizer, output_size, mode):
    current_loss = 0.0
    if mode=='train':
        model.train()
    else:
        model.eval()
    # Iterate over the DataLoader for training data
    for i, data in enumerate(dataloader):
        # Get and prepare inputs
        inputs, targets = data
        inputs, targets = inputs.float().to(device), targets.float().to(device)
                
        targets = targets.reshape((targets.shape[0], output_size))
        if mode=='train':
            # Zero the gradients
            optimizer.zero_grad()
        # Perform forward pass
        current_outputs = model(inputs)
        # Compute loss
        loss = loss_function(current_outputs, targets)
        if mode=='test':
            if i==0:
                outputs = current_outputs
            else:
                outputs = torch.cat((outputs, current_outputs), dim=0)
        if mode=='train':
            # Perform backward pass
            loss.backward()
            # Perform optimization
            optimizer.step()
        #if mode!='test':
        current_loss += loss.item()
        
    if mode=='test':
        return current_loss, model, outputs
    else:
        return current_loss, model

In [None]:
def model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1, num_of_epochs=500):
    np.random.seed(1)
    
    validation_losses = []
    test_losses = []
    validation = False
    batch_size = 2**3
    
    if mode == 'train':
        lr = 5e-3
    elif mode == 'fine_tune':
        lr = 2.5e-3
    else:
        lr = 0

    loss_function = nn.MSELoss() #nn.L1Loss
    optimizer = torch.optim.Adam(mlp.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
    
    if y_test.shape[1]>1:
        output_size = 3
    else:
        output_size = 1
    
    if mode != 'test_only':
        validation = False # False / True
        if np.array_equal(y_test, np.array([-1, -1, -1]).reshape(1, 3)): #only training on the entire dataset
            validation = False
        triggered = False

        train_input = torch.tensor(X_train.astype(np.float32))
        train_target = torch.tensor(y_train.astype(np.float32)) 

        train_tensor = TensorDataset(train_input, train_target)

        if validation==True:
            train_set_percentage = 0.8
            last_validation_loss = 1e6
            lowest_validation_loss = 1e6
            patience = 4
            #tolerance = 0.001*1e-3
            trigger_times = 0

            train_set_size = int(np.round(train_set_percentage*train_input.shape[0]))
            valid_set_size = train_input.shape[0]-train_set_size
            
            if train_set_size%batch_size==1 and valid_set_size%batch_size!=0:
                train_set_size -= 1
                valid_set_size += 1
            elif valid_set_size%batch_size==1 and train_set_size%batch_size!=0:
                train_set_size += 1
                valid_set_size -= 1
            elif train_set_size%batch_size==1:
                train_set_size -= 1
                train_tensor = TensorDataset(train_input[:-1, :], train_target[:-1, :])
            elif valid_set_size%batch_size==1:
                valid_set_size -= 1
                train_tensor = TensorDataset(train_input[:-1, :], train_target[:-1, :])

            train_tensor, valid_tensor = random_split(train_tensor, [train_set_size, valid_set_size])

            validloader = DataLoader(dataset=valid_tensor, batch_size=batch_size, shuffle=False)

        trainloader = DataLoader(dataset=train_tensor, batch_size=batch_size, shuffle=False)
    
    if np.array_equal(y_test, np.array([-1, -1, -1]).reshape(1, 3))==False:
        test_input = torch.tensor(X_test.astype(np.float32))
        test_target = torch.tensor(y_test.astype(np.float32))

        test_tensor = TensorDataset(test_input, test_target) 
        testloader = DataLoader(dataset=test_tensor, batch_size=batch_size, shuffle=False)

    # Run the training loop
    for epoch in range(num_of_epochs):
        if mode != 'test_only':
            if triggered==True:
                continue

            train_loss, mlp = run_model(mlp.to(device), trainloader, loss_function, optimizer, output_size, mode='train')

            if validation==True:
                with torch.no_grad():
                    validation_loss, _ = run_model(mlp.to(device), validloader, loss_function, optimizer, output_size, mode='validation')
                    validation_losses.append(validation_loss)

                    last_validation_loss = validation_loss
                    if validation_loss<lowest_validation_loss:
                        lowest_validation_loss=validation_loss
                        lowest_validation_loss_model=copy.deepcopy(mlp)
                        lowest_validation_loss_epoch=epoch

                    if last_validation_loss < validation_loss and epoch>50:
                        trigger_times += 1
                        if trigger_times > patience:
                            triggered = True
                            print(('Early stopping at epoch '+str(epoch)))
                    else:
                        trigger_times = 0

            test_loss , _, _ = run_model(mlp.to(device), testloader, loss_function, optimizer, output_size, mode='test')
            scheduler.step()

        # Disable grad
        if np.array_equal(y_test, np.array([-1, -1, -1]).reshape(1, 3))==False:
            with torch.no_grad():
                test_loss , _, predictions = run_model(mlp.to(device), testloader, loss_function, optimizer, output_size, mode='test')
                test_losses.append(test_loss)
                #print(epoch, test_loss)

            
    if mode != 'test_only' and np.array_equal(y_test, np.array([-1, -1, -1]).reshape(1, 3))==False:
        lowest_validation_loss_epoch=num_of_epochs-1
        lowest_validation_loss_model=copy.deepcopy(mlp)
        print(f"lowest_validation_loss_epoch is {num_of_epochs-1}")

        if validation==True:
            with torch.no_grad():
                _ , _, predictions = run_model(lowest_validation_loss_model.to(device), testloader, loss_function, optimizer, output_size, mode='test')
    
    if mode != 'test_only' and np.array_equal(y_test, np.array([-1, -1, -1]).reshape(1, 3))==False:#validation==True:
        return predictions, validation_losses, test_losses, lowest_validation_loss_model, lowest_validation_loss_epoch
    elif np.array_equal(y_test, np.array([-1, -1, -1]).reshape(1, 3)):
        return mlp
    else:
        return predictions, validation_losses, test_losses, 'a', 'b'

In [None]:
def transform_to_original_space(prediction, subject_id):
    number_of_transformations=5
    coordinates = []
    coordinates.append(prediction)
    for i in range(number_of_transformations):
        current_transformation_matrix = inverse_transformations_df.iloc[(number_of_transformations-1-i)*4:(number_of_transformations-i)*4, np.arange(4*subject_id, 4*(subject_id+1))].values
        current_extended_coordinates = np.concatenate((np.array(coordinates[i]).reshape(1, coordinates[i].shape[0]), np.ones((1, 1))), axis=1).T
        current_product = current_transformation_matrix@current_extended_coordinates
        coordinates.append(current_product[:3])
    return coordinates[-1]

In [None]:
def project_distances(plane_normal, plane_point, other_point):
    plane_normal = plane_normal/np.linalg.norm(plane_normal)
    distance_vector = other_point-plane_point
    perpendicular_distance = np.dot(distance_vector, plane_normal)
    plane_projected_other_point = other_point - perpendicular_distance*plane_normal
    tangent_distance = np.linalg.norm(plane_projected_other_point-plane_point)
    return perpendicular_distance, tangent_distance

In [None]:
def change_keys(OrderedDict):
    newOrderedDict = OrderedDict.copy()
    for i, key in enumerate(OrderedDict):
        key, value = newOrderedDict.popitem(False)
        if 'module.' in key:
            key = key.replace('module.', '')
            newOrderedDict[key] = value
        else:
            newOrderedDict[key] = value
    return newOrderedDict

In [None]:
def figure_saver(fig, filename):
    
    pio.kaleido.scope.mathjax = None

    timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
    timestamp_string = timestamp_string.replace('_2022_', '_22_')

    figure_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/"
    figure_filename = f"{filename}_{timestamp_string}" #???"experiment_"+str(experiment_number)+"_"+timestamp_string+"_"+filename
    figure_filetype = ".eps"

    figure_path = figure_folder + figure_filename + figure_filetype

    fig.layout.title = ""
    
    fig.layout.margin.t = 0#0.075*fig.layout.height
    fig.layout.margin.b = 0
    fig.layout.margin.l = 0
    fig.layout.margin.r = 10#0.05*fig.layout.width

    pio.write_image(fig, figure_path)#, scale=5)
    pio.write_json(fig, file=figure_folder+figure_filename+".json" ,validate=True, pretty=True, remove_uids=False, engine='json')

# IXI

## Definitions

In [None]:
registration_scale_factor = 0.001

In [None]:
datasets_folder = media_folder+"/MRI_datasets/"
current_dataset_name = 'IXI'
dataset_filename = 'Dataset_Chamfer.xlsx' #'Dataset.xlsx'

In [None]:
current_subject_dataframe = pd.ExcelFile(datasets_folder+current_dataset_name+'/'+dataset_filename)
current_sheet_names = current_subject_dataframe.sheet_names
current_num_of_sheets = len(current_sheet_names)

skin_coordinates_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin coordinates')
skin_normals_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin normals')
skin_geodesic_distances_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin distances')
inverse_matrices_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Inverse transformations')
stats_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Stats')

In [None]:
skin_coordinates_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_coordinates_index, index_col=0)
skin_normals_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_normals_index, index_col=0)
skin_geodesic_distances_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_geodesic_distances_index, index_col=0)
inverse_transformations_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=inverse_matrices_index, index_col=0)
stats_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=stats_index, index_col=0)

In [None]:
skin_coordinates_columns_names = list(skin_coordinates_df.columns)
only_coordinates_columns_indices = []

for i in range(len(skin_coordinates_columns_names)):
    if 'indices' not in skin_coordinates_columns_names[i]:
        only_coordinates_columns_indices.append(i)

In [None]:
if 1:
    max_euclidean_distance = 75e-3 # that's a lot

    relevant_indices = []
    for desired_landmark_index, desired_landmark_name in enumerate(skin_coordinates_df.index[:num_of_light_landmarks]):
        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = valid_coordinates_rows#np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)

        desired_landmark_coordinates_mean = np.mean(desired_landmark_subjects_coordinates[valid_rows, :], axis=0)
        euclidean_distances = np.linalg.norm(desired_landmark_subjects_coordinates[valid_rows, :]-desired_landmark_coordinates_mean, axis=1)
        desired_landmark_relevant_indices = np.where(euclidean_distances<max_euclidean_distance)[0]
        relevant_indices.append(desired_landmark_relevant_indices)

    only_valid_score_subjects_rows = relevant_indices[0]
    for desired_landmark_index, desired_landmark_name in enumerate(skin_coordinates_df.index[:num_of_light_landmarks]):
        only_valid_score_subjects_rows = np.intersect1d(relevant_indices[desired_landmark_index], only_valid_score_subjects_rows)
else:
    score_ratio_threshold = 1
    only_valid_score_subjects_rows = np.sort(np.argsort(stats_df.loc['unique_correspondence_final_loss', :].values)[:int(score_ratio_threshold*stats_df.shape[1])])

In [None]:
if 1:
    array_folder = datasets_folder+current_dataset_name+'/'
    array_filename = 'chamfer_distance_subjects_names'
    array_filetype = '.npy'

    array_path = array_folder + array_filename + array_filetype
    
    if 0:
        with open(array_path, 'wb') as file:
            np.save(file, only_valid_score_subject_names)
    else:
        with open(array_path, 'rb') as file:
            only_valid_score_subject_names = np.load(file, allow_pickle=True)

In [None]:
all_subject_names = np.array(skin_coordinates_df.columns[0::4])
only_valid_score_subjects_rows = []
for current_name in only_valid_score_subject_names:
    only_valid_score_subjects_rows.append(np.where(all_subject_names==f'{current_name}_indices')[0][0])
    
only_valid_score_subjects_rows = np.array(only_valid_score_subjects_rows)
only_valid_score_subject_names = [current_subject_name[:-8] for current_subject_name in all_subject_names[only_valid_score_subjects_rows]]
only_valid_score_subject_names = np.array(only_valid_score_subject_names)

## Predictions

In [None]:
n_splits = 5
ablation_study = False
MLP_nn = MLP_3
MLP_folder = 'MLP_3/'

In [None]:
if ablation_study==False:
    X_perturbations = np.zeros((only_valid_score_subjects_rows.size, 3*len(MRI_facial_landmarks)))
else:
    added_facial_landmarks_noise_norm = 1 #mm
    
    unnormalized_perturbations = np.random.normal(loc=0, scale=1, size=(int(only_valid_score_subjects_rows.size*len(MRI_facial_landmarks)), 3))
    unnormalized_perturbations_magnitudes = np.linalg.norm(unnormalized_perturbations, axis=1)
    normalized_perturbations = unnormalized_perturbations/unnormalized_perturbations_magnitudes.reshape(-1, 1)

    perturbation_magnitudes = np.random.normal(loc=added_facial_landmarks_noise_norm, scale=0.5, size=int(only_valid_score_subjects_rows.size*len(MRI_facial_landmarks))).reshape(-1, 1)    
    
    perturbations = normalized_perturbations*1*registration_scale_factor*perturbation_magnitudes
    
    X_perturbations = perturbations.reshape(-1, len(MRI_facial_landmarks)*3)

X_perturbations = np.concatenate((X_perturbations, np.zeros((only_valid_score_subjects_rows.size, 3))), axis=1)

In [None]:
kfold = KFold(n_splits=n_splits, shuffle=True)
kfold_train_indices = []
kfold_test_indices = []

for i, (train_indices, test_indices) in enumerate(kfold.split(np.arange(only_valid_score_subjects_rows.size))):
    kfold_train_indices.append(train_indices)
    kfold_test_indices.append(test_indices)

### Euclidean coordinates

In [None]:
trained_folder_path = media_folder+"/3DMM/Trained_models/pytorch_MLP/Coordinates/"+MLP_folder

In [None]:
save_model = True

In [None]:
chosen_features_set_name = 'MRI_facial_landmarks'
#chosen_features_set_index = [idx for idx, key in enumerate(list(feature_sets.items()) ) if key[0] == chosen_features_set_name][0]
desired_decimation_percentage = str(100)

In [None]:
IXI_Euclidean_MAE = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_MAE_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

IXI_Euclidean_MSE = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_MSE_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

IXI_Euclidean_std = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_std_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

IXI_Euclidean_perpendicular_distances = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_tangent_distances = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_distances_ratios = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_distances_ratios_std = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))

#### Synthetic model - test

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")

    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+"3DMM/", desired_landmark_name)
        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+"3DMM/"+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        output_size = desired_landmark_subjects_coordinates[valid_rows, :].shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        features_to_use = np.array(MRI_facial_landmarks).astype(str)

        features_data = skin_coordinates_df.loc[features_to_use, :]
        features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

        for i in range(X_array.shape[0]):
            X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        X_array = X_array[valid_rows, :]

        X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
        X_test = X_array[test_indices, :] + X_perturbations[test_indices, :-3]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = joblib.load(trained_folder_path+"3DMM/"+scaler_filename) 
        #X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_test.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+"3DMM/"+model_filename)))

        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)
        #validation_losses.append(validation_loss)
        test_losses.append(test_loss)

        
        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
IXI_Euclidean_MAE[0, :] = mean_errors
IXI_Euclidean_MAE_mean[0, :] = mean_errors_mean

In [None]:
IXI_Euclidean_MSE[0, :] = mean_squared_errors
IXI_Euclidean_MSE_mean[0, :] = mean_squared_errors_mean

In [None]:
IXI_Euclidean_std[0, :] = std_errors
IXI_Euclidean_std_mean[0, :] = std_errors_mean

In [None]:
IXI_Euclidean_perpendicular_distances[0, :] = perpendicular_distances_mean
IXI_Euclidean_tangent_distances[0, :] = tangent_distances_mean
IXI_Euclidean_distances_ratios[0, :] = distances_ratios_mean
IXI_Euclidean_distances_ratios_std[0, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_MSE[0, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_distances_ratios[0, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### ADNI model - test

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+"ADNI_ALL/", desired_landmark_name)
        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+"ADNI_ALL/"+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        output_size = desired_landmark_subjects_coordinates[valid_rows, :].shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        features_to_use = np.array(MRI_facial_landmarks).astype(str)

        features_data = skin_coordinates_df.loc[features_to_use, :]
        features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

        for i in range(X_array.shape[0]):
            X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        X_array = X_array[valid_rows, :]

        X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
        X_test = X_array[test_indices, :] + X_perturbations[test_indices, :-3]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = joblib.load(trained_folder_path+"ADNI_ALL/"+scaler_filename) 
        #X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)
        

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+"ADNI_ALL/"+model_filename)))

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True

        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        test_losses.append(test_loss)

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
IXI_Euclidean_MSE[1, :] = mean_squared_errors

In [None]:
IXI_Euclidean_std[1, :] = std_errors

In [None]:
IXI_Euclidean_perpendicular_distances[1, :] = perpendicular_distances_mean
IXI_Euclidean_tangent_distances[1, :] = tangent_distances_mean
IXI_Euclidean_distances_ratios[1, :] = distances_ratios_mean
IXI_Euclidean_distances_ratios_std[1, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'other_dataset'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_MSE[1, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_distances_ratios[1, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Fine-tune Synthetic model - cv

In [None]:
experiment_model_path = trained_folder_path+"3DMM_IXI/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        if train_now:
            model_to_load_folder = "3DMM/"
        else:
            model_to_load_folder = "3DMM_IXI/"

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        features_to_use = np.array(MRI_facial_landmarks).astype(str)

        features_data = skin_coordinates_df.loc[features_to_use, :]
        features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

        for i in range(X_array.shape[0]):
            X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        X_array = X_array[valid_rows, :]

        X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
        X_test = X_array[test_indices, :] + X_perturbations[test_indices, :-3]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True

        if train_now:
            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='fine_tune', n_jobs_num=1)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)


        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                #f"landmark_names_being_used: {features_to_use}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
landmarks_mean_test_losses = []
for i in range(len(selected_EEG_10_20_landmark_names)):
    i_landmark_test_losses = []
    for j in range(int(len(test_losses)/len(selected_EEG_10_20_landmark_names))):
        ij_losses = np.array(test_losses[i+j*len(selected_EEG_10_20_landmark_names)]).reshape(1, -1)
        #print('ij_losses', ij_losses.shape)
        i_landmark_test_losses.append(ij_losses)
    i_landmark_test_losses = np.vstack(i_landmark_test_losses)
    #print('i_landmark_test_losses', i_landmark_test_losses.shape)
    mean_values = np.mean(i_landmark_test_losses, axis=0)
    #print('mean_values', mean_values.shape)
    landmarks_mean_test_losses.append(mean_values)
landmarks_mean_test_losses = np.vstack(landmarks_mean_test_losses)

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=np.arange(landmarks_mean_test_losses.shape[1]),
        y=np.mean(landmarks_mean_test_losses, axis=0),
        mode='lines',
        name='Mean'
    )
)


for i, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    fig.add_trace(
        go.Scatter(
            x=np.arange(landmarks_mean_test_losses.shape[1]),
            y=landmarks_mean_test_losses[i, :],
            mode='lines',
            name=desired_landmark_name
        )
    )
    
fig.show()

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic_fine_tuned'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Test_losses_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = landmarks_mean_test_losses

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

In [None]:
IXI_Euclidean_MSE[2, :] = mean_squared_errors

In [None]:
IXI_Euclidean_std[2, :] = std_errors

In [None]:
IXI_Euclidean_perpendicular_distances[2, :] = perpendicular_distances_mean
IXI_Euclidean_tangent_distances[2, :] = tangent_distances_mean
IXI_Euclidean_distances_ratios[2, :] = distances_ratios_mean
IXI_Euclidean_distances_ratios_std[2, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic_fine_tuned'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_MSE[2, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_distances_ratios[2, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Fine-tune Synthetic model - all

In [None]:
experiment_model_path = trained_folder_path+"3DMM_IXI_ALL/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
#test_losses = []

for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

    if train_now:
        model_to_load_folder = "3DMM/"
    else:
        model_to_load_folder = "3DMM_IXI_ALL/"

    most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

    model_filename = most_recent_trained_model_filenames[0]
    scaler_filename = most_recent_trained_model_filenames[1]
    documentation_filename = most_recent_trained_model_filenames[2]

    with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
        documentation = [line.rstrip() for line in documentation_file]

    desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
    desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
    desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
    if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
        valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
    else:
        valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

    valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
    #__________________________________________________________________________________________________________
    y_array = desired_landmark_subjects_coordinates[valid_rows, :]
    train_indices = np.arange(y_array.shape[0])
    y_train = y_array[train_indices, :]
    y_test = np.array([-1, -1, -1]).reshape(1, 3)
    output_size = y_test.shape[1]
    
    #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
    features_to_use = np.array(MRI_facial_landmarks).astype(str)

    features_data = skin_coordinates_df.loc[features_to_use, :]
    features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

    X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

    for i in range(X_array.shape[0]):
        X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
    X_array = X_array[valid_rows, :]

    X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
    X_test = []
    
    X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
    X_train = X_imputer.fit_transform(X_train)
    #X_test = X_imputer.transform(X_test)
    
    X_standard_scaler = StandardScaler()
    X_train = X_standard_scaler.fit_transform(X_train)
    #X_test = X_standard_scaler.transform(X_test)
    
    #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
    #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))
    
    mlp = MLP_nn(X_train.shape[1], output_size)
    mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

    if 1:
        for i, param in enumerate(mlp.parameters()):
            param.requires_grad=True

    if train_now:
        predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='fine_tune', n_jobs_num=1)
        validation_losses.append(validation_loss)
        #test_losses.append(test_loss)
    else:
        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)


    if save_model and train_now:
        timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
        timestamp_string = timestamp_string.replace('_2022_', '_22_')

        torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_model')
        pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_scaler.pkl', 'wb'))
        documentation = [
            f"predicted_landmark_name: {desired_landmark_name}",
            #f"landmark_names_being_used: {features_to_use}",
            f"number_of_training_samples: {train_indices.size}",
            f"model: {mlp}",
        ]
        with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_documentation.txt' , "w") as txt_file:
            txt_file.write("\n".join(documentation))

#### Learn from scratch - cv

In [None]:
experiment_model_path = trained_folder_path+"IXI/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        features_to_use = np.array(MRI_facial_landmarks).astype(str)

        features_data = skin_coordinates_df.loc[features_to_use, :]
        features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

        for i in range(X_array.shape[0]):
            X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        X_array = X_array[valid_rows, :]

        X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
        X_test = X_array[test_indices, :] + X_perturbations[test_indices, :-3]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        if train_now:
            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp = nn.DataParallel(mlp, device_ids=[0])

            if 1:
                for i, param in enumerate(mlp.parameters()):
                    param.requires_grad=True

            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1, num_of_epochs=num_of_epochs)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            model_to_load_folder = "IXI/"
            most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

            model_filename = most_recent_trained_model_filenames[0]
            scaler_filename = most_recent_trained_model_filenames[1]
            documentation_filename = most_recent_trained_model_filenames[2]

            with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
                documentation = [line.rstrip() for line in documentation_file]

            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                #f"landmark_names_being_used: {features_to_use}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print('Ablation Study')
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        features_to_use = np.array(MRI_facial_landmarks).astype(str)

        features_data = skin_coordinates_df.loc[features_to_use, :]
        features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

        for i in range(X_array.shape[0]):
            X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        X_array = X_array[valid_rows, :]

        X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
        X_test = X_array[test_indices, :] + X_perturbations[test_indices, :-3]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        if train_now:
            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp = nn.DataParallel(mlp, device_ids=[0])

            if 1:
                for i, param in enumerate(mlp.parameters()):
                    param.requires_grad=True

            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1, num_of_epochs=num_of_epochs)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            model_to_load_folder = "IXI/"
            most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

            model_filename = most_recent_trained_model_filenames[0]
            scaler_filename = most_recent_trained_model_filenames[1]
            documentation_filename = most_recent_trained_model_filenames[2]

            with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
                documentation = [line.rstrip() for line in documentation_file]

            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                #f"landmark_names_being_used: {features_to_use}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
landmarks_mean_test_losses = []
for i in range(len(selected_EEG_10_20_landmark_names)):
    i_landmark_test_losses = []
    for j in range(int(len(test_losses)/len(selected_EEG_10_20_landmark_names))):
        ij_losses = np.array(test_losses[i+j*len(selected_EEG_10_20_landmark_names)]).reshape(1, -1)
        #print('ij_losses', ij_losses.shape)
        i_landmark_test_losses.append(ij_losses)
    i_landmark_test_losses = np.vstack(i_landmark_test_losses)
    #print('i_landmark_test_losses', i_landmark_test_losses.shape)
    mean_values = np.mean(i_landmark_test_losses, axis=0)
    #print('mean_values', mean_values.shape)
    landmarks_mean_test_losses.append(mean_values)
landmarks_mean_test_losses = np.vstack(landmarks_mean_test_losses)

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=np.arange(landmarks_mean_test_losses.shape[1]),
        y=np.mean(landmarks_mean_test_losses, axis=0),
        mode='lines',
        name='Mean'
    )
)


for i, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    fig.add_trace(
        go.Scatter(
            x=np.arange(landmarks_mean_test_losses.shape[1]),
            y=landmarks_mean_test_losses[i, :],
            mode='lines',
            name=desired_landmark_name
        )
    )
    
fig.show()

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'same_dataset'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Test_losses_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = landmarks_mean_test_losses

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
print(f"Ablation Study Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Ablation Study Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
IXI_Euclidean_MSE[3, :] = mean_squared_errors

In [None]:
IXI_Euclidean_std[3, :] = std_errors

In [None]:
IXI_Euclidean_perpendicular_distances[3, :] = perpendicular_distances_mean
IXI_Euclidean_tangent_distances[3, :] = tangent_distances_mean
IXI_Euclidean_distances_ratios[3, :] = distances_ratios_mean
IXI_Euclidean_distances_ratios_std[3, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'same_dataset'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_MSE[3, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_distances_ratios[3, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Learn from scratch - all

In [None]:
experiment_model_path = trained_folder_path+"IXI_ALL/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
#test_losses = []

for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

    desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
    desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
    desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
    if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
        valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
    else:
        valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

    valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
    #__________________________________________________________________________________________________________
    y_array = desired_landmark_subjects_coordinates[valid_rows, :]
    train_indices = np.arange(y_array.shape[0])
    y_train = y_array[train_indices, :]
    y_test = np.array([-1, -1, -1]).reshape(1, 3)
    output_size = y_test.shape[1]

    #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
    features_to_use = np.array(MRI_facial_landmarks).astype(str)

    features_data = skin_coordinates_df.loc[features_to_use, :]
    features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

    X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

    for i in range(X_array.shape[0]):
        X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
    X_array = X_array[valid_rows, :]

    X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
    X_test = []

    X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
    X_train = X_imputer.fit_transform(X_train)
    #X_test = X_imputer.transform(X_test)

    X_standard_scaler = StandardScaler()
    X_train = X_standard_scaler.fit_transform(X_train)
    #X_test = X_standard_scaler.transform(X_test)

    #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
    #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

    if train_now:
        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp = nn.DataParallel(mlp, device_ids=[0])

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True
                
        trained_model = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1, num_of_epochs=num_of_epochs)
        #test_losses.append(test_loss)

    if save_model and train_now:
        timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
        timestamp_string = timestamp_string.replace('_2022_', '_22_')

        torch.save(trained_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_model')
        pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_scaler.pkl', 'wb'))
        documentation = [
            f"predicted_landmark_name: {desired_landmark_name}",
            #f"landmark_names_being_used: {features_to_use}",
            f"number_of_training_samples: {train_indices.size}",
            f"model: {mlp}",
        ]
        with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_documentation.txt' , "w") as txt_file:
            txt_file.write("\n".join(documentation))

### Euclidean coordinates and Geodesic distances

In [None]:
trained_folder_path = media_folder+"/3DMM/Trained_models/pytorch_MLP/Coordinates_Geodesic/"+MLP_folder

In [None]:
save_model = False

In [None]:
chosen_features_set_name = 'MRI_facial_landmarks'
#chosen_features_set_index = [idx for idx, key in enumerate(list(feature_sets.items()) ) if key[0] == chosen_features_set_name][0]
desired_decimation_percentage = str(100)

In [None]:
IXI_Euclidean_Geodesic_MAE = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_Geodesic_MAE_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

IXI_Euclidean_Geodesic_MSE = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_Geodesic_MSE_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

IXI_Euclidean_Geodesic_std = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_Geodesic_std_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

IXI_Euclidean_Geodesic_perpendicular_distances = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_Geodesic_tangent_distances = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_Geodesic_distances_ratios = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
IXI_Euclidean_Geodesic_distances_ratios_std = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))

#### Synthetic model - test

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+"3DMM/", desired_landmark_name)
        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+"3DMM/"+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        output_size = desired_landmark_subjects_coordinates[valid_rows, :].shape[1]

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+"3DMM/"+model_filename)))

        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)
        #validation_losses.append(validation_loss)
        test_losses.append(test_loss)

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
IXI_Euclidean_Geodesic_MAE[0, :] = mean_errors
IXI_Euclidean_Geodesic_MAE_mean[0, :] = mean_errors_mean

In [None]:
IXI_Euclidean_Geodesic_MSE[0, :] = mean_squared_errors
IXI_Euclidean_Geodesic_MSE_mean[0, :] = mean_squared_errors_mean

In [None]:
IXI_Euclidean_Geodesic_std[0, :] = std_errors
IXI_Euclidean_Geodesic_std_mean[0, :] = std_errors_mean

In [None]:
IXI_Euclidean_Geodesic_perpendicular_distances[0, :] = perpendicular_distances_mean
IXI_Euclidean_Geodesic_tangent_distances[0, :] = tangent_distances_mean
IXI_Euclidean_Geodesic_distances_ratios[0, :] = distances_ratios_mean
IXI_Euclidean_Geodesic_distances_ratios_std[0, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_Geodesic_MSE[0, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_Geodesic_distances_ratios[0, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### ADNI model - test

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+"ADNI_ALL/", desired_landmark_name)
        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+"ADNI_ALL/"+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        output_size = desired_landmark_subjects_coordinates[valid_rows, :].shape[1]

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = joblib.load(trained_folder_path+"ADNI_ALL/"+scaler_filename) 
        #X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+"ADNI_ALL/"+model_filename)))

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True

        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        test_losses.append(test_loss)

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
IXI_Euclidean_Geodesic_MSE[1, :] = mean_squared_errors

In [None]:
IXI_Euclidean_Geodesic_std[1, :] = std_errors

In [None]:
IXI_Euclidean_Geodesic_perpendicular_distances[1, :] = perpendicular_distances_mean
IXI_Euclidean_Geodesic_tangent_distances[1, :] = tangent_distances_mean
IXI_Euclidean_Geodesic_distances_ratios[1, :] = distances_ratios_mean
IXI_Euclidean_Geodesic_distances_ratios_std[1, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'other_dataset_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_Geodesic_MSE[1, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_Geodesic_distances_ratios[1, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Fine-tune Synthetic model - cv

In [None]:
experiment_model_path = trained_folder_path+"3DMM_IXI/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        if train_now:
            model_to_load_folder = "3DMM/"
        else:
            model_to_load_folder = "3DMM_IXI/"

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True

        if train_now:
            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='fine_tune', n_jobs_num=1)
            #validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
landmarks_mean_test_losses = []
for i in range(len(selected_EEG_10_20_landmark_names)):
    i_landmark_test_losses = []
    for j in range(int(len(test_losses)/len(selected_EEG_10_20_landmark_names))):
        ij_losses = np.array(test_losses[i+j*len(selected_EEG_10_20_landmark_names)]).reshape(1, -1)
        #print('ij_losses', ij_losses.shape)
        i_landmark_test_losses.append(ij_losses)
    i_landmark_test_losses = np.vstack(i_landmark_test_losses)
    #print('i_landmark_test_losses', i_landmark_test_losses.shape)
    mean_values = np.mean(i_landmark_test_losses, axis=0)
    #print('mean_values', mean_values.shape)
    landmarks_mean_test_losses.append(mean_values)
landmarks_mean_test_losses = np.vstack(landmarks_mean_test_losses)

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=np.arange(landmarks_mean_test_losses.shape[1]),
        y=np.mean(landmarks_mean_test_losses, axis=0),
        mode='lines',
        name='Mean'
    )
)


for i, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    fig.add_trace(
        go.Scatter(
            x=np.arange(landmarks_mean_test_losses.shape[1]),
            y=landmarks_mean_test_losses[i, :],
            mode='lines',
            name=desired_landmark_name
        )
    )
    
fig.show()

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic_fine_tuned_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Test_losses_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = landmarks_mean_test_losses

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

In [None]:
IXI_Euclidean_Geodesic_MSE[2, :] = mean_squared_errors

In [None]:
IXI_Euclidean_Geodesic_std[2, :] = std_errors

In [None]:
IXI_Euclidean_Geodesic_perpendicular_distances[2, :] = perpendicular_distances_mean
IXI_Euclidean_Geodesic_tangent_distances[2, :] = tangent_distances_mean
IXI_Euclidean_Geodesic_distances_ratios[2, :] = distances_ratios_mean
IXI_Euclidean_Geodesic_distances_ratios_std[2, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic_fine_tuned_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_Geodesic_MSE[2, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_Geodesic_distances_ratios[2, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Fine-tune Synthetic model - all

In [None]:
experiment_model_path = trained_folder_path+"3DMM_IXI_ALL/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
#test_losses = []

for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

	if train_now:
		model_to_load_folder = "3DMM/"
	else:
		model_to_load_folder = "3DMM_IXI_ALL/"

	most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

	model_filename = most_recent_trained_model_filenames[0]
	scaler_filename = most_recent_trained_model_filenames[1]
	documentation_filename = most_recent_trained_model_filenames[2]

	with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
		documentation = [line.rstrip() for line in documentation_file]

	desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
	desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
	desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
	if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
		valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
	else:
		valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

	valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
	#__________________________________________________________________________________________________________   
    y_array = desired_landmark_subjects_coordinates[valid_rows, :]
    train_indices = np.arange(y_array.shape[0])
    y_train = y_array[train_indices, :]
    y_test = np.array([-1, -1, -1]).reshape(1, 3)
    output_size = y_test.shape[1]

	coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

	coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
	coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

	coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

	for i in range(coordinates_X_array.shape[0]):
		coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
	coordinates_X_array = coordinates_X_array[valid_rows, :]

	geodesic_features_data = skin_geodesic_distances_df
	geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

	for i in range(geodesic_features_data.shape[1]):
		if geodesic_features_data.iloc[0, i]:
			geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
		else:
			geodesic_X_array[i, 0] = 0

		if geodesic_features_data.iloc[1, i]:
			geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
		else:
			geodesic_X_array[i, 1] = 0

		if np.all(np.array(geodesic_features_data.iloc[2:, i])):
			geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
		else:
			geodesic_X_array[i, 2] = 0

	geodesic_X_array = geodesic_X_array[valid_rows, :]

	X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
	X_test = []

	X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
	X_train = X_imputer.fit_transform(X_train)
	#X_test = X_imputer.transform(X_test)

	X_standard_scaler = StandardScaler()
	X_train = X_standard_scaler.fit_transform(X_train)
	#X_test = X_standard_scaler.transform(X_test)

	#print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
	#print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

	mlp = MLP_nn(X_train.shape[1], output_size)
	mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

	if 1:
		for i, param in enumerate(mlp.parameters()):
			param.requires_grad=True

	if train_now:
		predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='fine_tune', n_jobs_num=1)
		validation_losses.append(validation_loss)
		#test_losses.append(test_loss)
	else:
		predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

	if save_model and train_now:
		timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
		timestamp_string = timestamp_string.replace('_2022_', '_22_')

		torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_model')
		pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_scaler.pkl', 'wb'))
		documentation = [
			f"predicted_landmark_name: {desired_landmark_name}",
			f"number_of_training_samples: {train_indices.size}",
			f"model: {mlp}",
		]
		with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_documentation.txt' , "w") as txt_file:
			txt_file.write("\n".join(documentation))

#### Learn from scratch - cv

In [None]:
experiment_model_path = trained_folder_path+"IXI/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        #for i in range (len(coordinates_features_to_use)):
        #    coordinates_features_to_use[i] = coordinates_features_to_use[i].replace("'", "")

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        if train_now:
            mlp = MLP_nn(X_train.shape[1], output_size)

            if 1:
                for i, param in enumerate(mlp.parameters()):
                    param.requires_grad=True

            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            model_to_load_folder = "IXI/"
            most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

            model_filename = most_recent_trained_model_filenames[0]
            scaler_filename = most_recent_trained_model_filenames[1]
            documentation_filename = most_recent_trained_model_filenames[2]

            with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
                documentation = [line.rstrip() for line in documentation_file]

            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print('Ablation Study')
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        #for i in range (len(coordinates_features_to_use)):
        #    coordinates_features_to_use[i] = coordinates_features_to_use[i].replace("'", "")

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        if train_now:
            mlp = MLP_nn(X_train.shape[1], output_size)

            if 1:
                for i, param in enumerate(mlp.parameters()):
                    param.requires_grad=True

            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            model_to_load_folder = "IXI/"
            most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

            model_filename = most_recent_trained_model_filenames[0]
            scaler_filename = most_recent_trained_model_filenames[1]
            documentation_filename = most_recent_trained_model_filenames[2]

            with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
                documentation = [line.rstrip() for line in documentation_file]

            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        #for i in range (len(coordinates_features_to_use)):
        #    coordinates_features_to_use[i] = coordinates_features_to_use[i].replace("'", "")

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        if train_now:
            mlp = MLP_nn(X_train.shape[1], output_size)

            if 1:
                for i, param in enumerate(mlp.parameters()):
                    param.requires_grad=True

            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            model_to_load_folder = "IXI/"
            most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

            model_filename = most_recent_trained_model_filenames[0]
            scaler_filename = most_recent_trained_model_filenames[1]
            documentation_filename = most_recent_trained_model_filenames[2]

            with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
                documentation = [line.rstrip() for line in documentation_file]

            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        #for i in range (len(coordinates_features_to_use)):
        #    coordinates_features_to_use[i] = coordinates_features_to_use[i].replace("'", "")

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        if train_now:
            mlp = MLP_nn(X_train.shape[1], output_size)

            if 1:
                for i, param in enumerate(mlp.parameters()):
                    param.requires_grad=True

            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            model_to_load_folder = "IXI/"
            most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

            model_filename = most_recent_trained_model_filenames[0]
            scaler_filename = most_recent_trained_model_filenames[1]
            documentation_filename = most_recent_trained_model_filenames[2]

            with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
                documentation = [line.rstrip() for line in documentation_file]

            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
landmarks_mean_test_losses = []
for i in range(len(selected_EEG_10_20_landmark_names)):
    i_landmark_test_losses = []
    for j in range(int(len(test_losses)/len(selected_EEG_10_20_landmark_names))):
        ij_losses = np.array(test_losses[i+j*len(selected_EEG_10_20_landmark_names)]).reshape(1, -1)
        #print('ij_losses', ij_losses.shape)
        i_landmark_test_losses.append(ij_losses)
    i_landmark_test_losses = np.vstack(i_landmark_test_losses)
    #print('i_landmark_test_losses', i_landmark_test_losses.shape)
    mean_values = np.mean(i_landmark_test_losses, axis=0)
    #print('mean_values', mean_values.shape)
    landmarks_mean_test_losses.append(mean_values)
landmarks_mean_test_losses = np.vstack(landmarks_mean_test_losses)

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=np.arange(landmarks_mean_test_losses.shape[1]),
        y=np.mean(landmarks_mean_test_losses, axis=0),
        mode='lines',
        name='Mean'
    )
)


for i, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    fig.add_trace(
        go.Scatter(
            x=np.arange(landmarks_mean_test_losses.shape[1]),
            y=landmarks_mean_test_losses[i, :],
            mode='lines',
            name=desired_landmark_name
        )
    )
    
fig.show()

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'same_dataset_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Test_losses_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = landmarks_mean_test_losses

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
print(f"Mean RMSE seed=4: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds seed=4: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
print(f"Mean RMSE MLP2: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds MLP2: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
print(f"Ablation Study Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Ablation Study Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
IXI_Euclidean_Geodesic_MSE[3, :] = mean_squared_errors

In [None]:
IXI_Euclidean_Geodesic_std[3, :] = std_errors

In [None]:
IXI_Euclidean_Geodesic_perpendicular_distances[3, :] = perpendicular_distances_mean
IXI_Euclidean_Geodesic_tangent_distances[3, :] = tangent_distances_mean
IXI_Euclidean_Geodesic_distances_ratios[3, :] = distances_ratios_mean
IXI_Euclidean_Geodesic_distances_ratios_std[3, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'same_dataset_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_Geodesic_MSE[3, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = IXI_Euclidean_Geodesic_distances_ratios[3, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Learn from scratch - all

In [None]:
experiment_model_path = trained_folder_path+"IXI_ALL/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
#test_losses = []

for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

    desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
    desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
    desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
    if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
        valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
    else:
        valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

    valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
    #__________________________________________________________________________________________________________
    y_array = desired_landmark_subjects_coordinates[valid_rows, :]
    train_indices = np.arange(y_array.shape[0])
    y_train = y_array[train_indices, :]
    y_test = np.array([-1, -1, -1]).reshape(1, 3)
    output_size = y_test.shape[1]
    #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
    #for i in range (len(coordinates_features_to_use)):
    #    coordinates_features_to_use[i] = coordinates_features_to_use[i].replace("'", "")

    coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

    coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
    coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

    coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

    for i in range(coordinates_X_array.shape[0]):
        coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
    coordinates_X_array = coordinates_X_array[valid_rows, :]

    geodesic_features_data = skin_geodesic_distances_df
    geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

    for i in range(geodesic_features_data.shape[1]):
        if geodesic_features_data.iloc[0, i]:
            geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
        else:
            geodesic_X_array[i, 0] = 0

        if geodesic_features_data.iloc[1, i]:
            geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
        else:
            geodesic_X_array[i, 1] = 0

        if np.all(np.array(geodesic_features_data.iloc[2:, i])):
            geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
        else:
            geodesic_X_array[i, 2] = 0

    geodesic_X_array = geodesic_X_array[valid_rows, :]

    X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
    X_test = []

    X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
    X_train = X_imputer.fit_transform(X_train)
    #X_test = X_imputer.transform(X_test)

    X_standard_scaler = StandardScaler()
    X_train = X_standard_scaler.fit_transform(X_train)
    #X_test = X_standard_scaler.transform(X_test)

    #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
    #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

    if train_now:
        mlp = MLP_nn(X_train.shape[1], output_size)

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True

        trained_model = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1, num_of_epochs=num_of_epochs)
        #validation_losses.append(validation_loss)
        #test_losses.append(test_loss)
    else:
        model_to_load_folder = "IXI/"
        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

    if save_model and train_now:
        timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
        timestamp_string = timestamp_string.replace('_2022_', '_22_')

        torch.save(trained_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_model')
        pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_scaler.pkl', 'wb'))
        documentation = [
            f"predicted_landmark_name: {desired_landmark_name}",
            f"number_of_training_samples: {train_indices.size}",
            f"model: {mlp}",
        ]
        with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_documentation.txt' , "w") as txt_file:
            txt_file.write("\n".join(documentation))

# ADNI

## Definitions

In [None]:
registration_scale_factor = 0.001

In [None]:
datasets_folder = media_folder+"/MRI_datasets/"
current_dataset_name = 'ADNI'
dataset_filename = 'Dataset_Chamfer.xlsx'

In [None]:
current_subject_dataframe = pd.ExcelFile(datasets_folder+current_dataset_name+'/'+dataset_filename)
current_sheet_names = current_subject_dataframe.sheet_names
current_num_of_sheets = len(current_sheet_names)

skin_coordinates_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin coordinates')
skin_normals_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin normals')
skin_geodesic_distances_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Skin distances')
inverse_matrices_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Inverse transformations')
stats_index = next(i for i in range(len(current_sheet_names)) if current_sheet_names[i]=='Stats')

In [None]:
skin_coordinates_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_coordinates_index, index_col=0)
skin_normals_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_normals_index, index_col=0)
skin_geodesic_distances_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=skin_geodesic_distances_index, index_col=0)
inverse_transformations_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=inverse_matrices_index, index_col=0)
stats_df = pd.read_excel(datasets_folder+current_dataset_name+'/'+dataset_filename, sheet_name=stats_index, index_col=0)

In [None]:
skin_coordinates_columns_names = list(skin_coordinates_df.columns)
only_coordinates_columns_indices = []

for i in range(len(skin_coordinates_columns_names)):
    if 'indices' not in skin_coordinates_columns_names[i]:
        only_coordinates_columns_indices.append(i)

In [None]:
if 1:
    max_euclidean_distance = 75e-3 # that's a lot

    relevant_indices = []
    for desired_landmark_index, desired_landmark_name in enumerate(skin_coordinates_df.index[:num_of_light_landmarks]):
        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = valid_coordinates_rows#np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)

        desired_landmark_coordinates_mean = np.mean(desired_landmark_subjects_coordinates[valid_rows, :], axis=0)
        euclidean_distances = np.linalg.norm(desired_landmark_subjects_coordinates[valid_rows, :]-desired_landmark_coordinates_mean, axis=1)
        desired_landmark_relevant_indices = np.where(euclidean_distances<max_euclidean_distance)[0]
        relevant_indices.append(desired_landmark_relevant_indices)

    only_valid_score_subjects_rows = relevant_indices[0]
    for desired_landmark_index, desired_landmark_name in enumerate(skin_coordinates_df.index[:num_of_light_landmarks]):
        only_valid_score_subjects_rows = np.intersect1d(relevant_indices[desired_landmark_index], only_valid_score_subjects_rows)
else:
    score_ratio_threshold = 1
    only_valid_score_subjects_rows = np.sort(np.argsort(stats_df.loc['unique_correspondence_final_loss', :].values)[:int(score_ratio_threshold*stats_df.shape[1])])
    #score_threshold = 2
    #only_valid_score_subjects_rows = np.where(stats_df.loc['unique_correspondence_final_loss', :].values<score_threshold)[0]

In [None]:
if 1:
    array_folder = datasets_folder+current_dataset_name+'/'
    array_filename = 'chamfer_distance_subjects_names' # only_valid_score_subjects_names / chamfer_distance_subjects_names
    array_filetype = '.npy'

    array_path = array_folder + array_filename + array_filetype
    
    if 0:
        with open(array_path, 'wb') as file:
            np.save(file, only_valid_score_subject_names)
    else:
        with open(array_path, 'rb') as file:
            only_valid_score_subject_names = np.load(file, allow_pickle=True)

In [None]:
all_subject_names = np.array(skin_coordinates_df.columns[0::4])
only_valid_score_subjects_rows = []
for current_name in only_valid_score_subject_names:
    only_valid_score_subjects_rows.append(np.where(all_subject_names==f'{current_name}_indices')[0][0])
    
only_valid_score_subjects_rows = np.array(only_valid_score_subjects_rows)
only_valid_score_subject_names = [current_subject_name[:-8] for current_subject_name in all_subject_names[only_valid_score_subjects_rows]]
only_valid_score_subject_names = np.array(only_valid_score_subject_names)

## Predictions

In [None]:
n_splits = 5
ablation_study = False
MLP_nn = MLP_3 # MLP_2 / MLP_3 / MLP_4
if MLP_nn==MLP_2:
    MLP_folder = 'MLP_2/'
elif MLP_nn==MLP_3:
    MLP_folder = 'MLP_3/'
else: #MLP_nn==MLP_4
    MLP_folder = 'MLP_4/'

In [None]:
if ablation_study==False:
    X_perturbations = np.zeros((only_valid_score_subjects_rows.size, 3*len(MRI_facial_landmarks)))
else:
    added_facial_landmarks_noise_norm = 1 #mm
    
    # https://stackoverflow.com/questions/6283080/random-unit-vector-in-multi-dimensional-space
    unnormalized_perturbations = np.random.normal(loc=0, scale=1, size=(int(only_valid_score_subjects_rows.size*len(MRI_facial_landmarks)), 3))
    unnormalized_perturbations_magnitudes = np.linalg.norm(unnormalized_perturbations, axis=1)
    normalized_perturbations = unnormalized_perturbations/unnormalized_perturbations_magnitudes.reshape(-1, 1)

    perturbation_magnitudes = np.random.normal(loc=added_facial_landmarks_noise_norm, scale=0.5, size=int(only_valid_score_subjects_rows.size*len(MRI_facial_landmarks))).reshape(-1, 1)    
    
    perturbations = normalized_perturbations*1*registration_scale_factor*perturbation_magnitudes
    
    X_perturbations = perturbations.reshape(-1, len(MRI_facial_landmarks)*3)

X_perturbations = np.concatenate((X_perturbations, np.zeros((only_valid_score_subjects_rows.size, 3))), axis=1)

In [None]:
np.random.seed(1)

kfold = KFold(n_splits=n_splits, shuffle=True)
kfold_train_indices = []
kfold_test_indices = []

for i, (train_indices, test_indices) in enumerate(kfold.split(np.arange(only_valid_score_subjects_rows.size))):
    kfold_train_indices.append(train_indices)
    kfold_test_indices.append(test_indices)

### Euclidean coordinates

In [None]:
trained_folder_path = media_folder+"/3DMM/Trained_models/pytorch_MLP/Coordinates/"+MLP_folder

In [None]:
save_model = False

In [None]:
chosen_features_set_name = 'MRI_facial_landmarks'
#chosen_features_set_index = [idx for idx, key in enumerate(list(feature_sets.items()) ) if key[0] == chosen_features_set_name][0]
desired_decimation_percentage = str(100)

In [None]:
ADNI_Euclidean_MAE = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_MAE_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

ADNI_Euclidean_MSE = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_MSE_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

ADNI_Euclidean_std = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_std_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

ADNI_Euclidean_perpendicular_distances = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_tangent_distances = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_distances_ratios = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_distances_ratios_std = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))

#### Synthetic model - test

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")

    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+"3DMM/", desired_landmark_name)
        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+"3DMM/"+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        output_size = desired_landmark_subjects_coordinates[valid_rows, :].shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        features_to_use = np.array(MRI_facial_landmarks).astype(str)

        features_data = skin_coordinates_df.loc[features_to_use, :]
        features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

        for i in range(X_array.shape[0]):
            X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        X_array = X_array[valid_rows, :]

        X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
        X_test = X_array[test_indices, :] + X_perturbations[test_indices, :-3]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = joblib.load(trained_folder_path+"3DMM/"+scaler_filename) 
        #X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_test.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+"3DMM/"+model_filename)))

        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)
        #validation_losses.append(validation_loss)
        test_losses.append(test_loss)

        
        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
ADNI_Euclidean_MAE[0, :] = mean_errors
ADNI_Euclidean_MAE_mean[0, :] = mean_errors_mean

In [None]:
ADNI_Euclidean_MSE[0, :] = mean_squared_errors
ADNI_Euclidean_MSE_mean[0, :] = mean_squared_errors_mean

In [None]:
ADNI_Euclidean_std[0, :] = std_errors
ADNI_Euclidean_std_mean[0, :] = std_errors_mean

In [None]:
ADNI_Euclidean_perpendicular_distances[0, :] = perpendicular_distances_mean
ADNI_Euclidean_tangent_distances[0, :] = tangent_distances_mean
ADNI_Euclidean_distances_ratios[0, :] = distances_ratios_mean
ADNI_Euclidean_distances_ratios_std[0, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_MSE[0, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_distances_ratios[0, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### IXI model - test

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+"IXI_ALL/", desired_landmark_name)
        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+"IXI_ALL/"+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        output_size = desired_landmark_subjects_coordinates[valid_rows, :].shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        features_to_use = np.array(MRI_facial_landmarks).astype(str)

        features_data = skin_coordinates_df.loc[features_to_use, :]
        features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

        for i in range(X_array.shape[0]):
            X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        X_array = X_array[valid_rows, :]

        X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
        X_test = X_array[test_indices, :] + X_perturbations[test_indices, :-3]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = joblib.load(trained_folder_path+"IXI_ALL/"+scaler_filename) 
        #X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)
        

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+"IXI_ALL/"+model_filename)))

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True

        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        test_losses.append(test_loss)

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
ADNI_Euclidean_MSE[1, :] = mean_squared_errors

In [None]:
ADNI_Euclidean_std[1, :] = std_errors

In [None]:
ADNI_Euclidean_perpendicular_distances[1, :] = perpendicular_distances_mean
ADNI_Euclidean_tangent_distances[1, :] = tangent_distances_mean
ADNI_Euclidean_distances_ratios[1, :] = distances_ratios_mean
ADNI_Euclidean_distances_ratios_std[1, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'other_dataset'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_MSE[1, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_distances_ratios[1, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Fine-tune Synthetic model - cv

In [None]:
experiment_model_path = trained_folder_path+"3DMM_ADNI/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        if train_now:
            model_to_load_folder = "3DMM/"
        else:
            model_to_load_folder = "3DMM_ADNI/"

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        features_to_use = np.array(MRI_facial_landmarks).astype(str)

        features_data = skin_coordinates_df.loc[features_to_use, :]
        features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

        for i in range(X_array.shape[0]):
            X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        X_array = X_array[valid_rows, :]

        X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
        X_test = X_array[test_indices, :] + X_perturbations[test_indices, :-3]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True

        if train_now:
            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='fine_tune', n_jobs_num=1)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)


        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                #f"landmark_names_being_used: {features_to_use}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
landmarks_mean_test_losses = []
for i in range(len(selected_EEG_10_20_landmark_names)):
    i_landmark_test_losses = []
    for j in range(int(len(test_losses)/len(selected_EEG_10_20_landmark_names))):
        ij_losses = np.array(test_losses[i+j*len(selected_EEG_10_20_landmark_names)]).reshape(1, -1)
        #print('ij_losses', ij_losses.shape)
        i_landmark_test_losses.append(ij_losses)
    i_landmark_test_losses = np.vstack(i_landmark_test_losses)
    #print('i_landmark_test_losses', i_landmark_test_losses.shape)
    mean_values = np.mean(i_landmark_test_losses, axis=0)
    #print('mean_values', mean_values.shape)
    landmarks_mean_test_losses.append(mean_values)
landmarks_mean_test_losses = np.vstack(landmarks_mean_test_losses)

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=np.arange(landmarks_mean_test_losses.shape[1]),
        y=np.mean(landmarks_mean_test_losses, axis=0),
        mode='lines',
        name='Mean'
    )
)


for i, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    fig.add_trace(
        go.Scatter(
            x=np.arange(landmarks_mean_test_losses.shape[1]),
            y=landmarks_mean_test_losses[i, :],
            mode='lines',
            name=desired_landmark_name
        )
    )
    
fig.show()

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic_fine_tuned'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Test_losses_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = landmarks_mean_test_losses

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

In [None]:
ADNI_Euclidean_MSE[2, :] = mean_squared_errors

In [None]:
mean_squared_errors

In [None]:
ADNI_Euclidean_std[2, :] = std_errors

In [None]:
ADNI_Euclidean_perpendicular_distances[2, :] = perpendicular_distances_mean
ADNI_Euclidean_tangent_distances[2, :] = tangent_distances_mean
ADNI_Euclidean_distances_ratios[2, :] = distances_ratios_mean
ADNI_Euclidean_distances_ratios_std[2, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic_fine_tuned'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_MSE[2, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_distances_ratios[2, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Fine-tune Synthetic model - all

In [None]:
experiment_model_path = trained_folder_path+"3DMM_ADNI_ALL/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
#test_losses = []

for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

    if train_now:
        model_to_load_folder = "3DMM/"
    else:
        model_to_load_folder = "3DMM_ADNI_ALL/"

    most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

    model_filename = most_recent_trained_model_filenames[0]
    scaler_filename = most_recent_trained_model_filenames[1]
    documentation_filename = most_recent_trained_model_filenames[2]

    with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
        documentation = [line.rstrip() for line in documentation_file]

    desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
    desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
    desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
    if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
        valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
    else:
        valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

    valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
    #__________________________________________________________________________________________________________
    y_array = desired_landmark_subjects_coordinates[valid_rows, :]
    train_indices = np.arange(y_array.shape[0])
    y_train = y_array[train_indices, :]
    y_test = np.array([-1, -1, -1]).reshape(1, 3)
    output_size = y_test.shape[1]
    
    #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
    features_to_use = np.array(MRI_facial_landmarks).astype(str)

    features_data = skin_coordinates_df.loc[features_to_use, :]
    features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

    X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

    for i in range(X_array.shape[0]):
        X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
    X_array = X_array[valid_rows, :]

    X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
    X_test = []
    
    X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
    X_train = X_imputer.fit_transform(X_train)
    #X_test = X_imputer.transform(X_test)
    
    X_standard_scaler = StandardScaler()
    X_train = X_standard_scaler.fit_transform(X_train)
    #X_test = X_standard_scaler.transform(X_test)
    
    #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
    #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))
    
    mlp = MLP_nn(X_train.shape[1], output_size)
    mlp.load_state_dict(change_keys(torch.load(rained_folder_path+model_to_load_folder+model_filename)))

    if 1:
        for i, param in enumerate(mlp.parameters()):
            param.requires_grad=True

    if train_now:
        predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='fine_tune', n_jobs_num=1)
        validation_losses.append(validation_loss)
        #test_losses.append(test_loss)
    else:
        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)


    if save_model and train_now:
        timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
        timestamp_string = timestamp_string.replace('_2022_', '_22_')

        torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_model')
        pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_scaler.pkl', 'wb'))
        documentation = [
            f"predicted_landmark_name: {desired_landmark_name}",
            #f"landmark_names_being_used: {features_to_use}",
            f"number_of_training_samples: {train_indices.size}",
            f"model: {mlp}",
        ]
        with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_documentation.txt' , "w") as txt_file:
            txt_file.write("\n".join(documentation))

#### Learn from scratch - cv

In [None]:
experiment_model_path = trained_folder_path+"ADNI/"
train_now = False

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        features_to_use = np.array(MRI_facial_landmarks).astype(str)

        features_data = skin_coordinates_df.loc[features_to_use, :]
        features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

        for i in range(X_array.shape[0]):
            X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        X_array = X_array[valid_rows, :]

        X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
        X_test = X_array[test_indices, :] + X_perturbations[test_indices, :-3]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        if train_now:
            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp = nn.DataParallel(mlp, device_ids=[0])

            if 1:
                for i, param in enumerate(mlp.parameters()):
                    param.requires_grad=True

            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1, num_of_epochs=num_of_epochs)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            model_to_load_folder = "ADNI/"
            most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

            model_filename = most_recent_trained_model_filenames[0]
            scaler_filename = most_recent_trained_model_filenames[1]
            documentation_filename = most_recent_trained_model_filenames[2]

            with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
                documentation = [line.rstrip() for line in documentation_file]

            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                #f"landmark_names_being_used: {features_to_use}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print('Ablation Study')
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        features_to_use = np.array(MRI_facial_landmarks).astype(str)

        features_data = skin_coordinates_df.loc[features_to_use, :]
        features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

        for i in range(X_array.shape[0]):
            X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        X_array = X_array[valid_rows, :]

        X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
        X_test = X_array[test_indices, :] + X_perturbations[test_indices, :-3]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        if train_now:
            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp = nn.DataParallel(mlp, device_ids=[0])

            if 1:
                for i, param in enumerate(mlp.parameters()):
                    param.requires_grad=True

            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1, num_of_epochs=num_of_epochs)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            model_to_load_folder = "ADNI/"
            most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

            model_filename = most_recent_trained_model_filenames[0]
            scaler_filename = most_recent_trained_model_filenames[1]
            documentation_filename = most_recent_trained_model_filenames[2]

            with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
                documentation = [line.rstrip() for line in documentation_file]

            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                #f"landmark_names_being_used: {features_to_use}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
landmarks_mean_test_losses = []
for i in range(len(selected_EEG_10_20_landmark_names)):
    i_landmark_test_losses = []
    for j in range(int(len(test_losses)/len(selected_EEG_10_20_landmark_names))):
        ij_losses = np.array(test_losses[i+j*len(selected_EEG_10_20_landmark_names)]).reshape(1, -1)
        #print('ij_losses', ij_losses.shape)
        i_landmark_test_losses.append(ij_losses)
    i_landmark_test_losses = np.vstack(i_landmark_test_losses)
    #print('i_landmark_test_losses', i_landmark_test_losses.shape)
    mean_values = np.mean(i_landmark_test_losses, axis=0)
    #print('mean_values', mean_values.shape)
    landmarks_mean_test_losses.append(mean_values)
landmarks_mean_test_losses = np.vstack(landmarks_mean_test_losses)

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=np.arange(landmarks_mean_test_losses.shape[1]),
        y=np.mean(landmarks_mean_test_losses, axis=0),
        mode='lines',
        name='Mean'
    )
)


for i, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    fig.add_trace(
        go.Scatter(
            x=np.arange(landmarks_mean_test_losses.shape[1]),
            y=landmarks_mean_test_losses[i, :],
            mode='lines',
            name=desired_landmark_name
        )
    )
    
fig.show()

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'same_dataset'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Test_losses_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = landmarks_mean_test_losses

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
print(f"Ablation Study Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Ablation Study Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
ADNI_Euclidean_MSE[3, :] = mean_squared_errors

In [None]:
ADNI_Euclidean_std[3, :] = std_errors

In [None]:
ADNI_Euclidean_perpendicular_distances[3, :] = perpendicular_distances_mean
ADNI_Euclidean_tangent_distances[3, :] = tangent_distances_mean
ADNI_Euclidean_distances_ratios[3, :] = distances_ratios_mean
ADNI_Euclidean_distances_ratios_std[3, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'same_dataset'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_MSE[3, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_distances_ratios[3, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Learn from scratch - all

In [None]:
experiment_model_path = trained_folder_path+"ADNI_ALL/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
#test_losses = []

for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

    desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
    desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
    desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
    if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
        valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
    else:
        valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

    valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
    #__________________________________________________________________________________________________________
    y_array = desired_landmark_subjects_coordinates[valid_rows, :]
    train_indices = np.arange(y_array.shape[0])
    y_train = y_array[train_indices, :]
    y_test = np.array([-1, -1, -1]).reshape(1, 3)
    output_size = y_test.shape[1]

    #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
    features_to_use = np.array(MRI_facial_landmarks).astype(str)

    features_data = skin_coordinates_df.loc[features_to_use, :]
    features_data_subjects_coordinates = features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

    X_array = np.zeros((int(features_data_subjects_coordinates.shape[0]/output_size), output_size*len(features_to_use)))

    for i in range(X_array.shape[0]):
        X_array[i, :] = np.array(features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
    X_array = X_array[valid_rows, :]

    X_train = X_array[train_indices, :] + X_perturbations[train_indices, :-3]
    X_test = []

    X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
    X_train = X_imputer.fit_transform(X_train)
    #X_test = X_imputer.transform(X_test)

    X_standard_scaler = StandardScaler()
    X_train = X_standard_scaler.fit_transform(X_train)
    #X_test = X_standard_scaler.transform(X_test)

    #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
    #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

    if train_now:
        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp = nn.DataParallel(mlp, device_ids=[0])

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True
                
        trained_model = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1, num_of_epochs=num_of_epochs)
        #test_losses.append(test_loss)

    if save_model and train_now:
        timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
        timestamp_string = timestamp_string.replace('_2022_', '_22_')

        torch.save(trained_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_model')
        pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_scaler.pkl', 'wb'))
        documentation = [
            f"predicted_landmark_name: {desired_landmark_name}",
            #f"landmark_names_being_used: {features_to_use}",
            f"number_of_training_samples: {train_indices.size}",
            f"model: {mlp}",
        ]
        with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_documentation.txt' , "w") as txt_file:
            txt_file.write("\n".join(documentation))

### Euclidean coordinates and Geodesic distances

In [None]:
trained_folder_path = media_folder+"/3DMM/Trained_models/pytorch_MLP/Coordinates_Geodesic/"+MLP_folder

In [None]:
save_model = False

In [None]:
chosen_features_set_name = 'MRI_facial_landmarks'
#chosen_features_set_index = [idx for idx, key in enumerate(list(feature_sets.items()) ) if key[0] == chosen_features_set_name][0]
desired_decimation_percentage = str(100)

In [None]:
ADNI_Euclidean_Geodesic_MAE = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_Geodesic_MAE_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

ADNI_Euclidean_Geodesic_MSE = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_Geodesic_MSE_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

ADNI_Euclidean_Geodesic_std = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_Geodesic_std_mean = np.zeros((1, len(selected_EEG_10_20_landmark_names), n_splits))

ADNI_Euclidean_Geodesic_perpendicular_distances = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_Geodesic_tangent_distances = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_Geodesic_distances_ratios = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))
ADNI_Euclidean_Geodesic_distances_ratios_std = np.zeros((4, len(selected_EEG_10_20_landmark_names), n_splits))

#### Synthetic model - test

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")

    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+"3DMM/", desired_landmark_name)
        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+"3DMM/"+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        output_size = desired_landmark_subjects_coordinates[valid_rows, :].shape[1]

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+"3DMM/"+model_filename)))

        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)
        #validation_losses.append(validation_loss)
        test_losses.append(test_loss)

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
ADNI_Euclidean_Geodesic_MAE[0, :] = mean_errors
ADNI_Euclidean_Geodesic_MAE_mean[0, :] = mean_errors_mean

In [None]:
ADNI_Euclidean_Geodesic_MSE[0, :] = mean_squared_errors
ADNI_Euclidean_Geodesic_MSE_mean[0, :] = mean_squared_errors_mean

In [None]:
ADNI_Euclidean_Geodesic_std[0, :] = std_errors
ADNI_Euclidean_Geodesic_std_mean[0, :] = std_errors_mean

In [None]:
ADNI_Euclidean_Geodesic_perpendicular_distances[0, :] = perpendicular_distances_mean
ADNI_Euclidean_Geodesic_tangent_distances[0, :] = tangent_distances_mean
ADNI_Euclidean_Geodesic_distances_ratios[0, :] = distances_ratios_mean
ADNI_Euclidean_Geodesic_distances_ratios_std[0, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_Geodesic_MSE[0, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_Geodesic_distances_ratios[0, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### IXI model - test

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+"IXI_ALL/", desired_landmark_name)
        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+"IXI_ALL/"+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        output_size = desired_landmark_subjects_coordinates[valid_rows, :].shape[1]

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = joblib.load(trained_folder_path+"IXI_ALL/"+scaler_filename) 
        #X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+"IXI_ALL/"+model_filename)))

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True

        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        test_losses.append(test_loss)

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
ADNI_Euclidean_Geodesic_MSE[1, :] = mean_squared_errors

In [None]:
ADNI_Euclidean_Geodesic_std[1, :] = std_errors

In [None]:
ADNI_Euclidean_Geodesic_perpendicular_distances[1, :] = perpendicular_distances_mean
ADNI_Euclidean_Geodesic_tangent_distances[1, :] = tangent_distances_mean
ADNI_Euclidean_Geodesic_distances_ratios[1, :] = distances_ratios_mean
ADNI_Euclidean_Geodesic_distances_ratios_std[1, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'other_dataset_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_Geodesic_MSE[1, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_Geodesic_distances_ratios[1, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Fine-tune Synthetic model - cv

In [None]:
experiment_model_path = trained_folder_path+"3DMM_ADNI/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        if train_now:
            model_to_load_folder = "3DMM/"
        else:
            model_to_load_folder = "3DMM_ADNI/"

        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True

        if train_now:
            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='fine_tune', n_jobs_num=1)
            #validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
landmarks_mean_test_losses = []
for i in range(len(selected_EEG_10_20_landmark_names)):
    i_landmark_test_losses = []
    for j in range(int(len(test_losses)/len(selected_EEG_10_20_landmark_names))):
        ij_losses = np.array(test_losses[i+j*len(selected_EEG_10_20_landmark_names)]).reshape(1, -1)
        #print('ij_losses', ij_losses.shape)
        i_landmark_test_losses.append(ij_losses)
    i_landmark_test_losses = np.vstack(i_landmark_test_losses)
    #print('i_landmark_test_losses', i_landmark_test_losses.shape)
    mean_values = np.mean(i_landmark_test_losses, axis=0)
    #print('mean_values', mean_values.shape)
    landmarks_mean_test_losses.append(mean_values)
landmarks_mean_test_losses = np.vstack(landmarks_mean_test_losses)

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=np.arange(landmarks_mean_test_losses.shape[1]),
        y=np.mean(landmarks_mean_test_losses, axis=0),
        mode='lines',
        name='Mean'
    )
)


for i, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    fig.add_trace(
        go.Scatter(
            x=np.arange(landmarks_mean_test_losses.shape[1]),
            y=landmarks_mean_test_losses[i, :],
            mode='lines',
            name=desired_landmark_name
        )
    )
    
fig.show()

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic_fine_tuned_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Test_losses_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = landmarks_mean_test_losses

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

In [None]:
ADNI_Euclidean_Geodesic_MSE[2, :] = mean_squared_errors

In [None]:
ADNI_Euclidean_Geodesic_std[2, :] = std_errors

In [None]:
ADNI_Euclidean_Geodesic_perpendicular_distances[2, :] = perpendicular_distances_mean
ADNI_Euclidean_Geodesic_tangent_distances[2, :] = tangent_distances_mean
ADNI_Euclidean_Geodesic_distances_ratios[2, :] = distances_ratios_mean
ADNI_Euclidean_Geodesic_distances_ratios_std[2, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'synthetic_fine_tuned_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_Geodesic_MSE[2, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_Geodesic_distances_ratios[2, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Fine-tune Synthetic model - all

In [None]:
experiment_model_path = trained_folder_path+"3DMM_ADNI_ALL/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
#test_losses = []

for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

	if train_now:
		model_to_load_folder = "3DMM/"
	else:
		model_to_load_folder = "3DMM_ADNI_ALL/"

	most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

	model_filename = most_recent_trained_model_filenames[0]
	scaler_filename = most_recent_trained_model_filenames[1]
	documentation_filename = most_recent_trained_model_filenames[2]

	with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
		documentation = [line.rstrip() for line in documentation_file]

	desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
	desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
	desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
	if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
		valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
	else:
		valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

	valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
	#__________________________________________________________________________________________________________   
    y_array = desired_landmark_subjects_coordinates[valid_rows, :]
    train_indices = np.arange(y_array.shape[0])
    y_train = y_array[train_indices, :]
    y_test = np.array([-1, -1, -1]).reshape(1, 3)
    output_size = y_test.shape[1]

	coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

	coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
	coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

	coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

	for i in range(coordinates_X_array.shape[0]):
		coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
	coordinates_X_array = coordinates_X_array[valid_rows, :]

	geodesic_features_data = skin_geodesic_distances_df
	geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

	for i in range(geodesic_features_data.shape[1]):
		if geodesic_features_data.iloc[0, i]:
			geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
		else:
			geodesic_X_array[i, 0] = 0

		if geodesic_features_data.iloc[1, i]:
			geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
		else:
			geodesic_X_array[i, 1] = 0

		if np.all(np.array(geodesic_features_data.iloc[2:, i])):
			geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
		else:
			geodesic_X_array[i, 2] = 0

	geodesic_X_array = geodesic_X_array[valid_rows, :]

	X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
	X_test = []

	X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
	X_train = X_imputer.fit_transform(X_train)
	#X_test = X_imputer.transform(X_test)

	X_standard_scaler = StandardScaler()
	X_train = X_standard_scaler.fit_transform(X_train)
	#X_test = X_standard_scaler.transform(X_test)

	#print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
	#print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

	mlp = MLP_nn(X_train.shape[1], output_size)
	mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

	if 1:
		for i, param in enumerate(mlp.parameters()):
			param.requires_grad=True

	if train_now:
		predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='fine_tune', n_jobs_num=1)
		validation_losses.append(validation_loss)
		#test_losses.append(test_loss)
	else:
		predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

	if save_model and train_now:
		timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
		timestamp_string = timestamp_string.replace('_2022_', '_22_')

		torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_model')
		pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_scaler.pkl', 'wb'))
		documentation = [
			f"predicted_landmark_name: {desired_landmark_name}",
			f"number_of_training_samples: {train_indices.size}",
			f"model: {mlp}",
		]
		with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_documentation.txt' , "w") as txt_file:
			txt_file.write("\n".join(documentation))

#### Learn from scratch - cv

In [None]:
experiment_model_path = trained_folder_path+"ADNI/"
train_now = False

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        #for i in range (len(coordinates_features_to_use)):
        #    coordinates_features_to_use[i] = coordinates_features_to_use[i].replace("'", "")

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        if train_now:
            mlp = MLP_nn(X_train.shape[1], output_size)

            if 1:
                for i, param in enumerate(mlp.parameters()):
                    param.requires_grad=True

            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            model_to_load_folder = "ADNI/"
            most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

            model_filename = most_recent_trained_model_filenames[0]
            scaler_filename = most_recent_trained_model_filenames[1]
            documentation_filename = most_recent_trained_model_filenames[2]

            with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
                documentation = [line.rstrip() for line in documentation_file]

            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
print('Ablation Study')
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
test_losses = []

for fold_index in range(n_splits):
    train_indices = kfold_train_indices[fold_index]
    test_indices = kfold_test_indices[fold_index]
    print(f"Started fold {fold_index+1}/{n_splits}")
    
    for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
        print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

        desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
        desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
        desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
        if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
            valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
        else:
            valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

        valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
        #__________________________________________________________________________________________________________
        y_array = desired_landmark_subjects_coordinates[valid_rows, :]
        y_train = y_array[train_indices, :]
        y_test = y_array[test_indices, :]
        output_size = y_test.shape[1]

        #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
        #for i in range (len(coordinates_features_to_use)):
        #    coordinates_features_to_use[i] = coordinates_features_to_use[i].replace("'", "")

        coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

        coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
        coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

        coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

        for i in range(coordinates_X_array.shape[0]):
            coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
        coordinates_X_array = coordinates_X_array[valid_rows, :]

        geodesic_features_data = skin_geodesic_distances_df
        geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

        for i in range(geodesic_features_data.shape[1]):
            if geodesic_features_data.iloc[0, i]:
                geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
            else:
                geodesic_X_array[i, 0] = 0

            if geodesic_features_data.iloc[1, i]:
                geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
            else:
                geodesic_X_array[i, 1] = 0

            if np.all(np.array(geodesic_features_data.iloc[2:, i])):
                geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
            else:
                geodesic_X_array[i, 2] = 0

        geodesic_X_array = geodesic_X_array[valid_rows, :]

        X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
        X_test = np.concatenate((coordinates_X_array[test_indices, :], geodesic_X_array[test_indices, :]), axis=1) + X_perturbations[test_indices, :]

        X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
        X_train = X_imputer.fit_transform(X_train)
        X_test = X_imputer.transform(X_test)

        X_standard_scaler = StandardScaler()
        X_train = X_standard_scaler.fit_transform(X_train)
        X_test = X_standard_scaler.transform(X_test)

        #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
        #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

        if train_now:
            mlp = MLP_nn(X_train.shape[1], output_size)

            if 1:
                for i, param in enumerate(mlp.parameters()):
                    param.requires_grad=True

            predictions, validation_loss, test_loss, lowest_validation_loss_model, lowest_validation_loss_epoch = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1)
            validation_losses.append(validation_loss)
            test_losses.append(test_loss)
        else:
            model_to_load_folder = "ADNI/"
            most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

            model_filename = most_recent_trained_model_filenames[0]
            scaler_filename = most_recent_trained_model_filenames[1]
            documentation_filename = most_recent_trained_model_filenames[2]

            with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
                documentation = [line.rstrip() for line in documentation_file]

            mlp = MLP_nn(X_train.shape[1], output_size)
            mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

            predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

        if save_model and train_now:
            timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
            timestamp_string = timestamp_string.replace('_2022_', '_22_')

            torch.save(lowest_validation_loss_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_model')
            pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_scaler.pkl', 'wb'))
            documentation = [
                f"predicted_landmark_name: {desired_landmark_name}",
                f"number_of_training_samples: {train_indices.size}",
                f"model: {mlp}",
            ]
            with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_'+str(fold_index)+'_documentation.txt' , "w") as txt_file:
                txt_file.write("\n".join(documentation))

        predictions_original_space = np.zeros((predictions.shape))
        for i in range(predictions_original_space.shape[0]):
            current_prediction = predictions[i, :].to("cpu")
            predictions_original_space[i, :] = transform_to_original_space(current_prediction, test_indices[i]).squeeze()

        y_train_original_space = np.zeros((y_train.shape))
        for i in range(y_train_original_space.shape[0]):
            current_y = y_train[i, :]
            y_train_original_space[i, :] = transform_to_original_space(current_y, train_indices[i]).squeeze()

        y_test_original_space = np.zeros((y_test.shape))
        for i in range(y_test_original_space.shape[0]):
            current_y = y_test[i, :]
            y_test_original_space[i, :] = transform_to_original_space(current_y, test_indices[i]).squeeze()

        prediction_errors = np.linalg.norm(predictions_original_space-y_test_original_space, axis=1)
        mean_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors)
        mean_squared_errors[desired_landmark_index, fold_index] = np.mean(prediction_errors**2)
        print(f"RMSE: {1000*np.sqrt(mean_squared_errors[desired_landmark_index, fold_index])}")

        std_errors[desired_landmark_index, fold_index] = np.std(prediction_errors)

        prediction_errors_mean = np.linalg.norm(np.mean(y_train_original_space, axis=0)-y_test_original_space, axis=1)
        mean_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean)
        mean_squared_errors_mean[desired_landmark_index, fold_index] = np.mean(prediction_errors_mean**2)
        print(f"Mean guess RMSE: {1000*np.sqrt(mean_squared_errors_mean[desired_landmark_index, fold_index])}")

        std_errors_mean[desired_landmark_index, fold_index] = np.std(prediction_errors_mean)

        perpendicular_distances = []
        tangent_distances = []

        for i, current_subject in enumerate(test_indices):
            current_plane_point = y_test_original_space[i, :]
            current_other_point = predictions_original_space[i, :]

            current_landmark_index = list(skin_normals_df.index).index(desired_landmark_name)
            current_plane_normal = skin_normals_df.iloc[current_landmark_index, 3*i:3*(i+1)].values

            if np.linalg.norm(current_plane_normal)>0:
                current_perpendicular_distance, current_tangent_distance = project_distances(current_plane_normal, current_plane_point, current_other_point)
                perpendicular_distances.append(current_perpendicular_distance)
                tangent_distances.append(current_tangent_distance)

        perpendicular_distances = np.array(perpendicular_distances)
        tangent_distances = np.array(tangent_distances)

        perpendicular_distances_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_distances)
        tangent_distances_mean[desired_landmark_index, fold_index] = np.mean(tangent_distances)
        perpendicular_ratios = np.abs(perpendicular_distances/tangent_distances)
        distances_ratios_mean[desired_landmark_index, fold_index] = np.mean(perpendicular_ratios)
        distances_ratios_std[desired_landmark_index, fold_index] = np.std(perpendicular_ratios)
        
    print(f"Fold index {fold_index+1} mean RMSE across landmarks: {np.mean(1000*np.sqrt(mean_squared_errors[:, fold_index]))}")

In [None]:
landmarks_mean_test_losses = []
for i in range(len(selected_EEG_10_20_landmark_names)):
    i_landmark_test_losses = []
    for j in range(int(len(test_losses)/len(selected_EEG_10_20_landmark_names))):
        ij_losses = np.array(test_losses[i+j*len(selected_EEG_10_20_landmark_names)]).reshape(1, -1)
        #print('ij_losses', ij_losses.shape)
        i_landmark_test_losses.append(ij_losses)
    i_landmark_test_losses = np.vstack(i_landmark_test_losses)
    #print('i_landmark_test_losses', i_landmark_test_losses.shape)
    mean_values = np.mean(i_landmark_test_losses, axis=0)
    #print('mean_values', mean_values.shape)
    landmarks_mean_test_losses.append(mean_values)
landmarks_mean_test_losses = np.vstack(landmarks_mean_test_losses)

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=np.arange(landmarks_mean_test_losses.shape[1]),
        y=np.mean(landmarks_mean_test_losses, axis=0),
        mode='lines',
        name='Mean'
    )
)


for i, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    fig.add_trace(
        go.Scatter(
            x=np.arange(landmarks_mean_test_losses.shape[1]),
            y=landmarks_mean_test_losses[i, :],
            mode='lines',
            name=desired_landmark_name
        )
    )
    
fig.show()

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'same_dataset_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Test_losses_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = landmarks_mean_test_losses

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

In [None]:
print(f"Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
print(f"Ablation Study Mean RMSE: {np.mean(1000*np.sqrt(mean_squared_errors))}")

In [None]:
print(f"Ablation Study Mean RMSE over folds: {np.mean(1000*np.sqrt(mean_squared_errors), axis=1)}")

In [None]:
ADNI_Euclidean_Geodesic_MSE[3, :] = mean_squared_errors

In [None]:
ADNI_Euclidean_Geodesic_std[3, :] = std_errors

In [None]:
ADNI_Euclidean_Geodesic_perpendicular_distances[3, :] = perpendicular_distances_mean
ADNI_Euclidean_Geodesic_tangent_distances[3, :] = tangent_distances_mean
ADNI_Euclidean_Geodesic_distances_ratios[3, :] = distances_ratios_mean
ADNI_Euclidean_Geodesic_distances_ratios_std[3, :] = distances_ratios_std

In [None]:
timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
timestamp_string = timestamp_string.replace('_2022_', '_22_')

experiment_type = 'same_dataset_CG'

array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Prediction_arrays/"+MLP_folder
if not ablation_study:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_{timestamp_string}"
else:
    array_filename = f"{current_dataset_name}_{trained_folder_path.split('/')[-2]}_{experiment_type}_ablation_study_{timestamp_string}"
array_filetype = '.npy'

array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_Geodesic_MSE[3, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)
        
array_folder = media_folder+"/Cranium_estimation_paper/Figures/Predictions/Projected_prediction_ratio_arrays/"
array_path = array_folder + array_filename + array_filetype

array_to_save = ADNI_Euclidean_Geodesic_distances_ratios[3, :]

if 1:
    with open(array_path, 'wb') as file:
        np.save(file, array_to_save)
else:
    with open(array_path, 'rb') as file:
        array_to_save = np.load(file)

#### Learn from scratch - all

In [None]:
experiment_model_path = trained_folder_path+"ADNI_ALL/"
train_now = True

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

mean_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
mean_squared_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
std_errors_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
perpendicular_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
tangent_distances_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_mean = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
distances_ratios_std = np.zeros((len(selected_EEG_10_20_landmark_names), n_splits))
validation_losses = []
#test_losses = []

for desired_landmark_index, desired_landmark_name in enumerate(selected_EEG_10_20_landmark_names):
    print(f"Started {desired_landmark_name}, {desired_landmark_index+1}/{len(selected_EEG_10_20_landmark_names)}")

    desired_landmark_data = skin_coordinates_df.loc[desired_landmark_name, :]
    desired_landmark_subjects_coordinates = desired_landmark_data.iloc[np.array(only_coordinates_columns_indices)]
    desired_landmark_subjects_coordinates = np.array(desired_landmark_subjects_coordinates).reshape(-1, 3)
    if np.where(np.isnan(desired_landmark_subjects_coordinates)==True)[0].size>0:
        valid_coordinates_rows = np.unique(np.where(np.isnan(desired_landmark_subjects_coordinates)==False)[0])
    else:
        valid_coordinates_rows = np.arange(desired_landmark_subjects_coordinates.shape[0])

    valid_rows = np.intersect1d(valid_coordinates_rows, only_valid_score_subjects_rows)
    #__________________________________________________________________________________________________________
    y_array = desired_landmark_subjects_coordinates[valid_rows, :]
    train_indices = np.arange(y_array.shape[0])
    y_train = y_array[train_indices, :]
    y_test = np.array([-1, -1, -1]).reshape(1, 3)
    output_size = y_test.shape[1]
    #features_to_use = documentation[2].split(': ')[1][1:-1].split(', ')
    #for i in range (len(coordinates_features_to_use)):
    #    coordinates_features_to_use[i] = coordinates_features_to_use[i].replace("'", "")

    coordinates_features_to_use = np.array(MRI_facial_landmarks).astype(str)

    coordinates_features_data = skin_coordinates_df.loc[coordinates_features_to_use, :]
    coordinates_features_data_subjects_coordinates = coordinates_features_data.iloc[:, np.array(only_coordinates_columns_indices)].T

    coordinates_X_array = np.zeros((int(coordinates_features_data_subjects_coordinates.shape[0]/output_size), output_size*len(coordinates_features_to_use)))

    for i in range(coordinates_X_array.shape[0]):
        coordinates_X_array[i, :] = np.array(coordinates_features_data_subjects_coordinates.iloc[output_size*i:output_size*(i+1), :].T).reshape(-1, 1).T
    coordinates_X_array = coordinates_X_array[valid_rows, :]

    geodesic_features_data = skin_geodesic_distances_df
    geodesic_X_array = np.zeros((geodesic_features_data.shape[1], 3))

    for i in range(geodesic_features_data.shape[1]):
        if geodesic_features_data.iloc[0, i]:
            geodesic_X_array[i, 0] = geodesic_features_data.iloc[0, i]
        else:
            geodesic_X_array[i, 0] = 0

        if geodesic_features_data.iloc[1, i]:
            geodesic_X_array[i, 1] = geodesic_features_data.iloc[1, i]
        else:
            geodesic_X_array[i, 1] = 0

        if np.all(np.array(geodesic_features_data.iloc[2:, i])):
            geodesic_X_array[i, 2] = np.sum(np.array(geodesic_features_data.iloc[2:, i]))
        else:
            geodesic_X_array[i, 2] = 0

    geodesic_X_array = geodesic_X_array[valid_rows, :]

    X_train = np.concatenate((coordinates_X_array[train_indices, :], geodesic_X_array[train_indices, :]), axis=1) + X_perturbations[train_indices, :]
    X_test = []

    X_imputer = KNNImputer(missing_values=0, n_neighbors=int(X_train.shape[0]/10), weights="uniform")
    X_train = X_imputer.fit_transform(X_train)
    #X_test = X_imputer.transform(X_test)

    X_standard_scaler = StandardScaler()
    X_train = X_standard_scaler.fit_transform(X_train)
    #X_test = X_standard_scaler.transform(X_test)

    #print(repr(np.std(X_train, axis=0).reshape(-1, 3)))
    #print(repr(np.std(X_test, axis=0).reshape(-1, 3)))

    if train_now:
        mlp = MLP_nn(X_train.shape[1], output_size)

        if 1:
            for i, param in enumerate(mlp.parameters()):
                param.requires_grad=True

        trained_model = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='train', n_jobs_num=1, num_of_epochs=num_of_epochs)
        #validation_losses.append(validation_loss)
        #test_losses.append(test_loss)
    else:
        model_to_load_folder = "ADNI/"
        most_recent_trained_model_filenames = find_most_recent_trained_files(trained_folder_path+model_to_load_folder, desired_landmark_name, fold_index)

        model_filename = most_recent_trained_model_filenames[0]
        scaler_filename = most_recent_trained_model_filenames[1]
        documentation_filename = most_recent_trained_model_filenames[2]

        with open(trained_folder_path+model_to_load_folder+documentation_filename) as documentation_file:
            documentation = [line.rstrip() for line in documentation_file]

        mlp = MLP_nn(X_train.shape[1], output_size)
        mlp.load_state_dict(change_keys(torch.load(trained_folder_path+model_to_load_folder+model_filename)))

        predictions, _, test_loss, _, _ = model_choose_and_predict(mlp, X_train, y_train, X_test, y_test, mode='test_only', n_jobs_num=1)

    if save_model and train_now:
        timestamp_string = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
        timestamp_string = timestamp_string.replace('_2022_', '_22_')

        torch.save(trained_model.state_dict(), experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_model')
        pickle.dump(X_standard_scaler, open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_scaler.pkl', 'wb'))
        documentation = [
            f"predicted_landmark_name: {desired_landmark_name}",
            f"number_of_training_samples: {train_indices.size}",
            f"model: {mlp}",
        ]
        with open(experiment_model_path+timestamp_string+'_'+desired_landmark_name+'_documentation.txt' , "w") as txt_file:
            txt_file.write("\n".join(documentation))