In [1]:
import os
import pickle
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import pcntoolkit as ptk
from itertools import product
from functools import reduce
from pcntoolkit.model.SHASH import SHASHb, SHASH, SHASHo
from scipy.stats import gaussian_kde
import pymc as pm
import scipy.special as spp
import arviz as av
from scipy.stats import skew, kurtosis
import seaborn as sns
sns.set(style='darkgrid')

In [2]:
projdir = '/project_cephfs/3022017.02/projects/stijdboe/make_results/10_folds_results/'
data_dir = '/project_cephfs/3022017.02/projects/stijdboe/Data'
folds_dir = os.path.join(data_dir,'10_folds_sexcov')
python_path = '/home/preclineu/stijdboe/.conda/envs/docker_venv/bin/python'
normative_path = '/home/preclineu/stijdboe/.conda/envs/docker_venv/lib/python3.9/site-packages/pcntoolkit-0.28-py3.9.egg/pcntoolkit/normative.py'

In [3]:
batch_size= 1
n_chains= '2'
n_cores_per_batch= '4'  # <- this specifies the number of cores for the job
cores = '4'           # <- this specifies the number of cores for pymc3
n_samples = '1000'
n_tuning='500'
memory = '30gb'
inscaler = 'standardize'
outscaler = 'standardize'
method='bspline'
linear_mu = 'True'
random_intercept_mu='True'
linear_sigma   = 'True'
target_accept = '0.99'


In [4]:
model_names = ['SHASHo','SHASHb_1','SHASHb_2','Normal']
# model_names = ['SHASHb_1','SHASHb_2']

likelihood_map = {'SHASHb_1':'SHASHb','SHASHb_2':'SHASHb','SHASHo':'SHASHo','Normal':'Normal'}
durationmap = {'Normal':'12:00:00','SHASHb':'50:00:00','SHASHo':'25:00:00'}
epsilon_linear_map = {'SHASHb_1':'False','SHASHb_2':'True','Normal':'False','SHASHo':'False'}
delta_linear_map = {'SHASHb_1':'False','SHASHb_2':'True','Normal':'False','SHASHo':'False'}

for model_name in model_names:
    likelihood = likelihood_map[model_name]
    duration = durationmap[likelihood]
    linear_epsilon = epsilon_linear_map[model_name]
    linear_delta = delta_linear_map[model_name]
    
    for i in range(10):
        
        this_identifier = f"fold_{i}_{model_name}"
        job_name = this_identifier
        fold_dir = os.path.join(folds_dir,f'fold_{i}')
        processing_dir = os.path.join(projdir, this_identifier+'/')
        if not os.path.exists(processing_dir):
            os.mkdir(processing_dir)

        log_dir = os.path.join(processing_dir, 'log')           #
        if not os.path.isdir(log_dir):
            os.mkdir(log_dir)

        # The paths to the data
        X_path = os.path.join(folds_dir, f'fold_{i}', 'X_train.pkl')
        Y_path = os.path.join(folds_dir, f'fold_{i}', 'Y_train.pkl')
        Z_path = os.path.join(folds_dir, f'fold_{i}', 'Z_train.pkl')

        ptk.normative_parallel.execute_nm(processing_dir=processing_dir,
                                          python_path=python_path,
                                          normative_path=normative_path,
                                          job_name = job_name,
                                          n_cores_per_batch = n_cores_per_batch,
                                          cores=cores,
                                          memory=memory,
                                          duration=duration,
                                          batch_size= batch_size,
                                          
                                          savemodel='True',
                                          outputsuffix='estimate',
                                          log_path=log_dir,
                                          binary=True,
                                 
                                          covfile_path=X_path,
                                          respfile_path=Y_path,
                                          trbefile=Z_path,
        
                                          alg='hbr',
                                          func='fit',
                                          inscaler=inscaler,
                                          outscaler=outscaler, 
                                          model_type=method,
 
                                          likelihood = likelihood,
                                          linear_mu=linear_mu,
                                          random_intercept_mu=random_intercept_mu,
                                          random_slope_mu = 'False',
                                          random_sigma='False',
                                          random_intercept_sigma='False',
                                          random_slope_sigma='False',
                                          linear_sigma=linear_sigma,
                                          linear_epsilon=linear_epsilon,
                                          linear_delta=linear_delta,
                                          target_accept = target_accept,
                                          
                                          n_samples=n_samples,
                                          n_tuning=n_tuning,
                                          n_chains=n_chains,
                                          interactive=False)