In [10]:

import os
os.environ['PYTENSOR_FLAGS'] = 'floatX=float32,config.profile=True,config.profile_memory=True'
import numpy as np

from pcntoolkit.normative_model.norm_utils import norm_init
from pcntoolkit.util.utils import simulate_data
import matplotlib.pyplot as plt
from pcntoolkit.normative import estimate
from warnings import filterwarnings
filterwarnings('ignore')
import pymc as pm
import pytensor
import cProfile
import timeit
import pickle
from pytensor.compile.profiling import ProfileStats



In [11]:


########################### Experiment Settings ###############################


random_state = 29

working_dir = '/home/guus/tmp'  # Specify a working directory to save data and results.

simulation_method = 'linear'
n_features = 1      # The number of input features of X
n_grps = 10          # Number of batches in data
n_samples = 2500     # Number of samples in each group (use a list for different
# sample numbers across different batches)

model_type = 'bspline' #  modelto try 'linear, ''polynomial', 'bspline'   



In [12]:


# ############################## Data Simulation ################################


# X_train, Y_train, grp_id_train, X_test, Y_test, grp_id_test, coef = \
#     simulate_data(simulation_method, n_samples, n_features, n_grps,
#                   working_dir=working_dir, plot=True, noise='heteroscedastic_nongaussian', 
#                   random_state=random_state)


In [13]:
# train_sex_mask = np.random.choice([0, 1], size=X_train.shape[0])
# Y_train[train_sex_mask == 0] += 0.2
# grp_id_train = np.stack([grp_id_train[:,0], train_sex_mask],axis=1)
# print(grp_id_train.shape)


# test_sex_mask = np.random.choice([0, 1], size=X_test.shape[0])
# Y_test[test_sex_mask == 0] += 0.2
# grp_id_test = np.stack([grp_id_test[:,0], test_sex_mask],axis=1)
# print(grp_id_test.shape)


In [14]:
# savedir = "/home/guus/Desktop/pcn_profile_data"
# with open(os.path.join(savedir, 'train_data'), 'wb') as f:
#     pickle.dump((X_train, Y_train, grp_id_train), f)
# with open(os.path.join(savedir, 'test_data'), 'wb') as f:
#     pickle.dump((X_test, Y_test, grp_id_test), f)
    

In [15]:
savedir = "/home/guus/Desktop/pcn_profile_data"


In [16]:

def load_data():
    import pickle
    with open(os.path.join(savedir, 'train_data'), 'rb') as f:
        X_train, Y_train, grp_id_train = pickle.load(f)
    with open(os.path.join(savedir, 'test_data'), 'rb') as f:
        X_test, Y_test, grp_id_test = pickle.load(f)
    return X_train, Y_train, grp_id_train, X_test, Y_test, grp_id_test

In [17]:
X_train, Y_train, grp_id_train, X_test, Y_test, grp_id_test = load_data()

In [20]:

################################# Fittig and Predicting ###############################
# model_confs = {"M4":{"random_intercept_mu":'True',"likelihood":"SHASHb"}}

model_confs = {"M1":{"random_intercept_mu":'False',"likelihood":"Normal"},
               "M2":{"random_intercept_mu":'True',"likelihood":"Normal"},
               "M3":{"random_intercept_mu":'True',"likelihood":"SHASHo"},
               "M4":{"random_intercept_mu":'True',"likelihood":"SHASHb"}}

for model_name, model_conf in model_confs.items():
    print(f"Model: {model_name}")
    print("Initializing the model")
    nm = norm_init(X_train, Y_train, 
               alg='hbr',
               model_type=model_type,
               linear_mu='True',
               random_slope_mu='False',
               random_sigma='False',
               linear_sigma='False',
               linear_epsilon='False',
               linear_delta='False',
               **model_conf)
    
    os.makedirs(os.path.join(savedir, 'v30', model_name), exist_ok=True)

    print("Getting the model")
    model = nm.hbr.get_model(X_train, Y_train, grp_id_train)
    with model:
        pm.sample(1000, tune=500, chains=1, cores=1, compute_convergence_checks=False, progressbar=False)

    # print("Profiling logp")
    # logp_profile = model.profile(model.logp(), point=point)
    # print("Saving profile as text")
    # with open(os.path.join(savedir, 'v30', model_name, 'logp_profile.txt'), 'wt') as a:
    #     logp_profile.summary(a)
    # print("Saving profile as pickle")
    # with open(os.path.join(savedir, 'v30', model_name, 'logp_profile.pkl'), 'wb') as a:
    #     pickle.dump(logp_profile, a)

    # print("Profiling gradient")
    # grad_profile = model.profile(pm.gradient(model.logp()), point=point) 
    # print("Saving profile as text")
    # with open(os.path.join(savedir, 'v30', model_name, 'grad_profile.txt'), 'wt') as a:
    #     grad_profile.summary(a)
    # print("Saving profile as pickle")
    # with open(os.path.join(savedir, 'v30', model_name, 'grad_profile.pkl'), 'wb') as a:
    #     pickle.dump(grad_profile, a)
        
    # print("Saving visualization of logp")
    # pytensor.printing.pydotprint(model.logp(), os.path.join(savedir,'v30', model_name, 'logp.png'), var_with_name_simple=True)
    # print("Saving visualization of logp gradient")
    # pytensor.printing.pydotprint(pm.gradient(model.logp()), os.path.join(savedir,'v30', model_name, 'logp_grad.png'), var_with_name_simple=True)

Model: M1
Initializing the model
Getting the model


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [slope_mu, intercept_mu, sigma]
Sampling 1 chain for 500 tune and 156 draw iterations (500 + 156 draws total) took 267 seconds.


Model: M2
Initializing the model
Getting the model


In [14]:
def get_string_max_of(thing):
    the_max = max(thing.items(), key=lambda x: x[1])
    return str(the_max[0]), the_max[1]

In [15]:
def extract_data(data:ProfileStats):
    data_dict = {}
    data_dict['compile_time'] = data.compile_time
    data_dict['max_op_time_name'],data_dict['max_op_time'] = get_string_max_of(data.op_time())
    data_dict['max_class_time_name'],data_dict['max_class_time'] =   get_string_max_of(data.class_time())
    data_dict['max_apply_time_name'],data_dict['max_apply_time'] =  get_string_max_of(data.apply_time)
    data_dict['max_compute_total_times_name'],data_dict['max_compute_total_times'] = get_string_max_of(data.compute_total_times())
    data_dict['nb_nodes'] = data.nb_nodes
    data_dict['fct_call_time']  = data.fct_call_time
    return data_dict

In [16]:
# Load the profiles
model_confs = {"M1":{"random_intercept_mu":'False',"likelihood":"Normal"},
               "M2":{"random_intercept_mu":'True',"likelihood":"Normal"},
               "M3":{"random_intercept_mu":'True',"likelihood":"SHASHo"},
               "M4":{"random_intercept_mu":'True',"likelihood":"SHASHb"}}

data_dict = {}

for model_name, model_conf in model_confs.items():

    with open(os.path.join(savedir, 'v30', model_name, 'logp_profile.pkl'), 'rb') as a:
        logp_profile = pickle.load(a)

    with open(os.path.join(savedir, 'v30', model_name, 'grad_profile.pkl'), 'rb') as a:
        grad_profile = pickle.load(a)

    print(f"Model: {model_name}")
    data_dict[('v0.30',model_name, 'logp_profile')] = extract_data(logp_profile)
    data_dict[('v0.30',model_name, 'grad_profile')] = extract_data(grad_profile)
    


    
    

Model: M1
Model: M2
Model: M3
Model: M4


In [17]:
with open(os.path.join(savedir, 'v30', 'data_dict.pkl'), 'wb') as a:
    pickle.dump(data_dict, a)
    