In [None]:
import generation as gen
import imputation as imp
import prediction
import utils
import files
import os
import numpy as np
from tqdm.autonotebook import tqdm

In [None]:
alpha = 0.1
d = 10
phi = 0.8
regression = 'Linear'
n_rep = 100
beta = np.array([1, 2, -1, 3, -0.5, -1, 0.3, 1.7, 0.4, -0.3])

train_size = 500
cal_size = 250
params_test = {'iid':{'test_size': 2000}, 
               'fixed_nb_sample_pattern':{'nb_sample_pattern': 100}, 
               'fixed_nb_sample_pattern_size':{'nb_sample_pattern': 100}}
params_test = gen.process_test(params_test, d=d)

params_reg = {'regression':regression, 'beta': beta, 'phi': phi}

params_noise = {'noise':'Gaussian'}

prob_missing = 0.2
var_missing = np.full(d, 1)
params_missing = {'prob_missing':prob_missing, 'var_missing':var_missing, 'mechanism': 'MCAR'}

imputations = np.array(['iterative_ridge'])

methods = ['QR', 'QR_TrainCal', 'CQR', 'CQR_MDA']
basemodels = ['NNet']
masks = ['Yes']
protections = ['No']#, 'Pattern', 'Pattern_size']
exacts = [False, True]

cores = 1

params_basemodel = {'cores':cores}

In [None]:
name = files.get_name_data(train_size, cal_size, params_test, dim=d,
                           params_reg=params_reg, params_noise=params_noise,
                           params_missing=params_missing, seed=n_rep)

if os.path.isfile('data/'+name+'.xz'):
    print('data found')
    data = files.load_file('data', name, 'xz')
else:
    print('data not found')
    X, X_missing, M, Y, params_missing = gen.generate_multiple_data(train_size, cal_size, params_test, n_rep=n_rep, dim=d, 
                                                    params_reg=params_reg, params_noise=params_noise,
                                                    params_missing=params_missing)
    data = {'X': X, 'X_missing': X_missing, 'M': M,'Y': Y}
    files.write_file('data', name, 'xz', data)

In [None]:
for imputation in tqdm(imputations):

    name_imputed = files.get_name_data_imputed(train_size, cal_size, params_test, imputation,
                                               dim=d, 
                                               params_reg=params_reg, params_noise=params_noise,
                                               params_missing=params_missing, seed=n_rep)

    if os.path.isfile('data/'+name_imputed+'.xz'):
        print('imputation found')
        X_imp = files.load_file('data', name_imputed, 'xz')
    else:
        print('imputation not found')
        if imputation == 'complete':
            X_imp = data['X']
        else:
            X_imp = imp.impute(data, imputation)
        files.write_file('data', name_imputed, 'xz', X_imp)
    data_imputed = {'X': data['X'], 'X_missing': data['X_missing'], 'X_imp': X_imp, 'M': data['M'],'Y': data['Y']}



    results, methods_ran = prediction.run_experiments(data_imputed, alpha=alpha, methods=methods,
                                                      basemodels=basemodels, params_basemodel=params_basemodel,
                                                      masks=masks, protections=protections, 
                                                      exacts=exacts, imputation=imputation,
                                                      params_reg=params_reg)#, params_noise=params_noise)

    for method in methods_ran:
        name_dir, name_method = files.get_name_results(method, train_size, cal_size, n_rep, d=d, imputation=imputation,
                                                       params_reg=params_reg, params_noise=params_noise, params_missing=params_missing)
        results_method = results[method]
        files.write_file('results/'+name_dir, name_method, 'xz', results_method)