In [1]:
from pcntoolkit.util.hbr_utils import *
import scipy

import os
import pickle
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import pcntoolkit as ptk
from itertools import product
from functools import reduce
from pcntoolkit.model.SHASH import SHASHb, SHASH, SHASHo
from pcntoolkit.util.utils import scaler
from scipy.stats import gaussian_kde
import pymc3 as pm
import scipy.special as spp
import arviz as av
from scipy.stats import skew, kurtosis
import seaborn as sns
import time
sns.set(style='darkgrid')

In [2]:
projdir = '/project_cephfs/3022017.02/projects/stijdboe/make_results/10_folds_results/'
data_dir = '/project_cephfs/3022017.02/projects/stijdboe/Data'
folds_dir = os.path.join(data_dir,'10_folds_sexcov')

In [None]:
    
# Load the train and test data
p_dir = f'/project_cephfs/3022017.02/projects/stijdboe/Data/10_folds_sexcov'
with open(os.path.join(p_dir, 'X_patient.pkl'),'rb') as file:
          X_p = pickle.load(file)
with open(os.path.join(p_dir, 'Y_patient.pkl'),'rb') as file:
          Y_p = pickle.load(file)
with open(os.path.join(p_dir, 'Z_patient.pkl'),'rb') as file:
          Z_p = pickle.load(file)



# For each fold
for i_fold in range(10):
    
    # Load the train and test data
    fold_data_dir = f'/project_cephfs/3022017.02/projects/stijdboe/Data/10_folds_sexcov/fold_{i_fold}'
    with open(os.path.join(fold_data_dir, 'X_train.pkl'),'rb') as file:
              X_train = pickle.load(file)
    with open(os.path.join(fold_data_dir, 'Y_train.pkl'),'rb') as file:
              Y_train = pickle.load(file)
    with open(os.path.join(fold_data_dir, 'Z_train.pkl'),'rb') as file:
              Z_train = pickle.load(file)

    # For each feature
    features = ['Right-Cerebellum-White-Matter',
                'EstimatedTotalIntraCranialVol',
                'Right-Lateral-Ventricle',
                'WM-hypointensities',
                'rh_S_interm_prim-Jensen_thickness', 
                'Brain-Stem']
    for i_f, feature in enumerate(features):

        # Load that particular data
        this_X_train = X_train.to_numpy()
        this_Y_train = Y_train[feature].to_numpy()
        this_X_test = X_p.to_numpy()
        this_Y_test = Y_p[feature].to_numpy()
        this_Z_test = Z_p.to_numpy()

        # Apply in- and outscale to train and test (use util.scaler)
        inscaler = scaler('standardize')
        outscaler = scaler('standardize')
        inscaler.fit(this_X_train)
        outscaler.fit(this_Y_train)
        this_X_test = inscaler.transform(this_X_test)
        this_Y_test = outscaler.transform(this_Y_test)

        # For each model type:
        for model_type in ['SHASHo','SHASHb','Normal']:
            # Load the model
            fold_dir = f'fold_{i_fold}_{model_type}_sexcov'
            batch_dir = f'batch_{i_f+1}'
            nm_name = f'NM_0_0_fold{i_fold}{model_type}sexcov.pkl'
            model_path = os.path.join(projdir, fold_dir, batch_dir, 'Models', nm_name)
            if os.path.exists(model_path):
                with open(model_path, 'rb') as file:
                    norm_hbr_model = pickle.load(file)
                # Find the MCMC z-scores
                X_test_transformed = bspline_transform(this_X_test,norm_hbr_model.hbr.bsp)
                print(f"Computing Z-scores for {nm_name}")
                start = time.time()
                MCMC_z_scores = np.squeeze(get_MCMC_zscores(np.squeeze(X_test_transformed), this_Y_test[:,None], this_Z_test, norm_hbr_model))
                end = time.time()
                print("Found MCMC_zscores in ", end-start)

                n = np.random.randn(*MCMC_z_scores.shape)

#                     plt.scatter(np.sort(n), np.sort(MCMC_z_scores), alpha = 0.2)
#                     plt.show()

                # Save the z-scores
                mcmc_zscoresdir = os.path.join(projdir, fold_dir, batch_dir, 'MCMC_zscores')
                if not os.path.exists(mcmc_zscoresdir):
                    os.mkdir(mcmc_zscoresdir)
                with open(os.path.join(mcmc_zscoresdir, f'MCMC_patient_zscores_{i_f}_{model_type}_fold{i_fold}_sexcov.pkl'),'wb') as file:
                    pickle.dump(MCMC_z_scores, file)



Computing Z-scores for NM_0_0_fold0SHASHosexcov.pkl
Found MCMC_zscores in  740.3534564971924
Computing Z-scores for NM_0_0_fold0SHASHbsexcov.pkl
