# Config (2 I-Nets)

### Logistic Regression

In [96]:
config_LR = {
    'data': {
        'n_datasets': 10_000, # the number of datasets
        
        'n_samples': 5_000, # the number of samples per dataset
        
        'n_features': 20, 
        # The total number of features. 
        # These comprise n_informative informative features, n_redundant redundant features, n_repeated duplicated features and 
        # n_features-n_informative-n_redundant-n_repeated useless features drawn at random.
        
        'n_informative': 10,
        # The number of informative features. Each class is composed of a number of gaussian clusters each located around the vertices 
        # of a hypercube in a subspace of dimension n_informative. For each cluster, informative features are drawn independently 
        # from N(0, 1) and then randomly linearly combined within each cluster in order to add covariance. The clusters are then 
        # placed on the vertices of the hypercube.
        
        'n_targets': 2,
        # The number of targets (or labels) of the classification problem.
    
        'n_clusters_per_class': 2,
        # The number of clusters per class.
        
        'class_sep': 1.0,
        # class_sepfloat, default=1.0
        # The factor multiplying the hypercube size. Larger values spread out the clusters/classes and make the classification task 
        # easier.
        
        'noise': 0.01,
        # flip_y (fraction of samples whose class is assigned randomly)
        
        'shuffle': True,
        # Shuffle the samples and the features.
        
        'random_state': None,
        # Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls.
    },    
    'lambda': {
        'data_prep': {
            'train_test_val_split': { # refer to sklearn doc
                'test_size': 0.2,
                'val_size': 0.1,
                'random_state': None,
                'shuffle': True,
                'stratify': None
            }
        },
        'model_compile': {
            
        },
        'model_fit': { # refer to keras API
            'batch_size': 32,
            'epochs': 150,
            'verbose': 0,
            'callbacks': None,
            'shuffle': True,
            'class_weight': None,
            'sample_weight': None,
            'initial_epoch': 0,
            'steps_per_epoch': None,
            'validation_steps': None,
            'validation_batch_size': None,
            'validation_freq': 1
        }
    },
    'inets': {
        'data_prep': {
            'train_test_val_split': { # refer to sklearn doc
                'test_size': 0.2,
                'val_size': 0.1,
                'random_state': None,
                'shuffle': True,
                'stratify': None
            }
        },
        'model_compile': {
            
        },
        'model_fit': { # refer to keras API
            'batch_size': 32,
            'epochs': 1000,
            'verbose': 'auto',
            'callbacks': None,
            'shuffle': True,
            'class_weight': None,
            'sample_weight': None,
            'initial_epoch': 0,
            'steps_per_epoch': None,
            'validation_steps': None,
            'validation_batch_size': None,
            'validation_freq': 1
        }
    },
    'computation':{
        'n_jobs': 200,
        'use_gpu': False,
        'gpu_numbers': '4',
        'RANDOM_SEED': 1,   
    }
}

### Decision Tree

In [45]:

config_DT = {
    'function_family': {
        'maximum_depth': 3,
        'beta': 1,
        'decision_sparsity': 1,
        'fully_grown': True,    
        'dt_type': 'vanilla', #'SDT', 'vanilla'
    },
    'data': {
        'number_of_variables': 15, 
        'num_classes': 2,
        'categorical_indices': [],
        
        'use_distribution_list': True,
        'random_parameters_distribution': True, ##MAKEPATH DIFFERENT FILES
        'max_distributions_per_class': 1, # None; 0; int >= 1  
        'exclude_linearly_seperable': True,
        'data_generation_filtering': False,
        'fixed_class_probability': False,
        'balanced_data': True,
        'weighted_data_generation': False,
        'shift_distrib': False,
        
        'dt_type_train': 'vanilla', # (None, 'vanilla', 'SDT')
        'maximum_depth_train': 3, #None or int
        'decision_sparsity_train': 1, #None or int
        
        'function_generation_type': 'distribution',# 'make_classification_distribution', 'make_classification_distribution_trained', 'distribution', 'distribution_trained', 'make_classification', 'make_classification_trained', 'random_decision_tree', 'random_decision_tree_trained'
        'distrib_by_feature': True,
        'distribution_list': ['uniform', 'normal', 'gamma', 'beta', 'poisson'],#['uniform', 'gamma', 'poisson', 'exponential', 'weibull'],#['uniform', 'normal', 'gamma', 'exponential', 'beta', 'binomial', 'poisson'], 
        'distribution_list_eval': ['uniform', 'normal', 'gamma', 'beta', 'poisson'],#['uniform', 'gamma', 'poisson', 'exponential', 'weibull'],#['uniform', 'normal', 'gamma', 'beta', 'poisson'],
        
        'objective': 'classification', # 'regression'
        
        'x_max': 1,
        'x_min': 0,
        'x_distrib': 'uniform', #'normal', 'uniform',       
                
        'lambda_dataset_size': 5000, #number of samples per function
        'number_of_generated_datasets': 100,
        
        'noise_injected_level': 0, 
        'noise_injected_type': 'flip_percentage', # '' 'normal' 'uniform' 'normal_range' 'uniform_range'
        
        'data_noise': 0, #None or float
        
        'distrib_param_max': 5,
    }, 
    'lambda_net': {
        'epochs_lambda': 1000,
        'early_stopping_lambda': True, 
        'early_stopping_min_delta_lambda': 1e-3,
        'restore_best_weights': True,
        'patience_lambda': 50,
        
        'batch_lambda': 64,
        'dropout_lambda': 0,
        'lambda_network_layers': [128],
        'use_batchnorm_lambda': False,
        
        'optimizer_lambda': 'adam',
        'loss_lambda': 'binary_crossentropy', #categorical_crossentropy
        
        'number_of_lambda_weights': None,
        
        'number_initializations_lambda': 1, 
        
        'number_of_trained_lambda_nets': 100,
    },     
    
    'i_net': {
        #'dense_layers': [1024, 1024, 256, 2048, 2048],
        'dense_layers': [1792, 512, 512],
        #'dense_layers': [1792, 512, 512],
        
        #'dropout': [0, 0, 0, 0, 0.3],#[0.3, 0.3, 0.3, 0.3, 0.3],
        'dropout': [0, 0, 0.5],
        #'dropout': [0, 0, 0.5],

        #'hidden_activation': 'relu',
        'hidden_activation': 'sigmoid',
        #'hidden_activation': 'swish',

        #'optimizer': 'rmsprop', 
        'optimizer': 'adam', 
        #'optimizer': 'adam', 
        
        #'learning_rate': 0.001,
        'learning_rate': 0.001,
        #'learning_rate': 0.001, 
        
        'separate_weight_bias': False,
        
        'convolution_layers': None,
        'lstm_layers': None,        
        'additional_hidden': False,
        
        'loss': 'binary_crossentropy', #mse; binary_crossentropy; 'binary_accuracy'
        'metrics': ['binary_accuracy'], #soft_ or _penalized
        
        'epochs': 500, 
        'early_stopping': True,
        'batch_size': 256,

        'interpretation_dataset_size': 10000,
                
        'test_size': 5, #Float for fraction, Int for number 0
        'evaluate_distribution': True,
        'force_evaluate_real_world': False,
        
        'function_representation_type': 5, # 1=standard representation; 2=sparse representation with classification for variables; 3=softmax to select classes (n top probabilities)
        'normalize_lambda_nets': False,

        'optimize_decision_function': True, #False
        'function_value_loss': True, #False
                      
        'data_reshape_version': None, #default to 2 options:(None, 0,1 2,3) #3=autoencoder dimensionality reduction
        
        'resampling_strategy': None,#'ADASYN', #'SMOTE', None
        'resampling_threshold': 0.25,#0.2,
        
        'nas': False,
        'nas_type': 'SEQUENTIAL', #options:(None, 'SEQUENTIAL', 'CNN', 'LSTM', 'CNN-LSTM', 'CNN-LSTM-parallel')      
        'nas_trials': 60,
        'nas_optimizer': 'greedy' #'hyperband',#"bayesian",'greedy', 'random'
    },    
    
    'evaluation': {   
        #'inet_holdout_seed_evaluation': False,
        
        'number_of_random_evaluations_per_distribution': 10,
        'random_evaluation_dataset_size_per_distribution': 10_000, 
        'optimize_sampling': True,
            
        'random_evaluation_dataset_size': 500, 
        'random_evaluation_dataset_distribution': 'uniform', 
        
        'per_network_optimization_dataset_size': 5000,

        #'sklearn_dt_benchmark': False,
        #'sdt_benchmark': False,
        
        'different_eval_data': False,
        
        'eval_data_description': {
            ######### data #########
            'eval_data_function_generation_type': 'make_classification',
            'eval_data_lambda_dataset_size': 5000, #number of samples per function
            'eval_data_noise_injected_level': 0, 
            'eval_data_noise_injected_type': 'flip_percentage', # '' 'normal' 'uniform' 'normal_range' 'uniform_range'     
            ######### lambda_net #########
            'eval_data_number_of_trained_lambda_nets': 100,
            ######### i_net #########
            'eval_data_interpretation_dataset_size': 100,
        }
        
    },    
    
    'computation':{
        'load_model': False,
        'n_jobs': 15,
        'use_gpu': False,
        'gpu_numbers': '2',
        'RANDOM_SEED': 42,   
        'verbosity': 0
    }
}


In [46]:
computation_config = {
        'n_jobs': 15,
        'use_gpu': True,
        'gpu_numbers': '2',
        'RANDOM_SEED': 42,   
        'verbosity': 0
    }

# Imports

In [47]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from keras import models

from utilities.InterpretationNet import *
from utilities.LambdaNet import *
from utilities.metrics import *
from utilities.utility_functions import *
from utilities.DecisionTree_BASIC import *

import utilities_LR

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import sklearn

import matplotlib.pyplot as plt

In [48]:
random_model = generate_base_model(config_DT)#generate_base_model(config_DT, disable_batchnorm=True)

np.random.seed(config_DT['computation']['RANDOM_SEED'])
        
random_network_parameters = random_model.get_weights()
network_parameters_structure = [network_parameter.shape for network_parameter in random_network_parameters]  


try:
    use_distribution_list = config_DT['data']['use_distribution_list'] if config_DT['data']['max_distributions_per_class'] is not None else False
except:
    use_distribution_list = False if config_DT['data']['max_distributions_per_class'] is None else True

metrics = []
loss_function = None

if config_DT['i_net']['function_value_loss']:
    if config_DT['i_net']['function_representation_type'] == 1:
        pass
        #metrics.append(tf.keras.losses.get('mae'))
    if config_DT['i_net']['optimize_decision_function']:
        loss_function = inet_decision_function_fv_loss_wrapper(random_model, network_parameters_structure, config_DT, use_distribution_list=use_distribution_list)
        #metrics.append(inet_target_function_fv_loss_wrapper(config_DT))
        for metric in config_DT['i_net']['metrics']:
            metrics.append(inet_decision_function_fv_metric_wrapper(random_model, network_parameters_structure, config_DT, metric, use_distribution_list=use_distribution_list))  
            #metrics.append(inet_target_function_fv_metric_wrapper(config_DT, metric))  
    else:
        loss_function = inet_target_function_fv_loss_wrapper(config_DT)
        metrics.append(inet_decision_function_fv_loss_wrapper(random_model, network_parameters_structure, config_DT, use_distribution_list=use_distribution_list))
        for metric in config_DT['i_net']['metrics']:
            metrics.append(inet_target_function_fv_metric_wrapper(config_DT, metric))  
            metrics.append(inet_decision_function_fv_metric_wrapper(random_model, network_parameters_structure, config_DT, metric, use_distribution_list=use_distribution_list))  
else:
    if config_DT['i_net']['function_representation_type'] >= 3:
        if config_DT['i_net']['optimize_decision_function']:
            
            loss_function = inet_decision_function_fv_loss_wrapper_parameters(config_DT)
            
            metrics.append(inet_decision_function_fv_loss_wrapper(random_model, network_parameters_structure, config_DT, use_distribution_list=use_distribution_list))
            for metric in config_DT['i_net']['metrics']:
                metrics.append(inet_decision_function_fv_metric_wrapper(random_model, network_parameters_structure, config_DT, metric, use_distribution_list=use_distribution_list))    
            if False:
                metrics.append(inet_decision_function_fv_loss_wrapper(random_model, network_parameters_structure, config_DT, use_distribution_list=use_distribution_list))
                #metrics.append(inet_target_function_fv_loss_wrapper(config_DT))
                for metric in config_DT['i_net']['metrics']:
                    metrics.append(inet_decision_function_fv_metric_wrapper(random_model, network_parameters_structure, config_DT, metric, use_distribution_list=use_distribution_list))  
                    #metrics.append(inet_target_function_fv_metric_wrapper(config_DT, metric))                  
    else:
        raise SystemExit('Coefficient Loss not implemented for config_DTuration')
    
    if False:
        metrics.append(inet_target_function_fv_loss_wrapper(config_DT))
        metrics.append(inet_decision_function_fv_loss_wrapper(random_model, network_parameters_structure, config_DT, use_distribution_list=use_distribution_list))
        if config_DT['i_net']['optimize_decision_function']:
            raise SystemExit('Coefficient Loss not implemented for decision function optimization')            
        else:
            if config_DT['i_net']['function_representation_type'] == 1:
                loss_function = tf.keras.losses.get('mae') #inet_coefficient_loss_wrapper(inet_loss)
            else:
                raise SystemExit('Coefficient Loss not implemented for selected function representation')

                
# dill.dumps(loss_function)
# dill.dumps(metrics)

# Load Models

In [49]:
def load_LR_inet():
    path = utilities_LR.inet_path_LR(config_LR)
    
    model = keras.models.load_model(path + '/modelKeras')
    print(path)
    return model

In [50]:
#def load_DT_inet():
#    model = load_inet(loss_function=dill.dumps(loss_function), metrics=dill.dumps(metrics), config=config_DT)

In [51]:
def load_inet_custom():
    
    loss_function_local = dill.dumps(loss_function)
    metrics_local = dill.dumps(metrics)
    
    path = './data/saved_models/lNetSize5000_numLNets100_var15_class2_distribution_xMax1_xMin0_xDistuniform_dNoise0_randParamDist_maxDistClass1_distribParamMax5_randClassProb_exLinSepun-no-ga-be-po_depth3_beta1_decisionSpars1_vanilla_fullyGrown/128_e1000ES0.001_b64_drop0_adam_binary_crossentropy_fixedInit1-seed42/inet_dense1792-512-512_drop0-0-0.5e500b256_adam_funcRep5_reshapeNone_depth3_beta1_decisionSpars1_vanilla_reshapeNone'    
    model = []
    from tensorflow.keras.utils import CustomObjectScope
    loss_function_local = dill.loads(loss_function_local)
    metrics_local = dill.loads(metrics_local)       

    #with CustomObjectScope({'custom_loss': loss_function}):
    custom_object_dict = {}
    custom_object_dict[loss_function.__name__] = loss_function_local
    for metric in  metrics_local:
        custom_object_dict[metric.__name__] = metrics_local        
        
    model = tf.keras.models.load_model(path, custom_objects=custom_object_dict) # #, compile=False
        
    return model


In [52]:
model_DT = load_inet_custom()

In [53]:
model_LR = load_LR_inet()

data_LR/nda10000_nsa5000_nfe20_nin10_nta2_ncc2_sep1.0_noi0.01_shuTrue_ranNone/tsi0.2_vsi0.1_ranNone_shuTrue_strNone_bat32_epo150_shuTrue_claNone_samNone_ini0_steNone_vstNone_vbsNone_vfr1/tsi0.2_vsi0.1_ranNone_shuTrue_strNone_bat32_epo1000_shuTrue_claNone_samNone_ini0_steNone_vstNone_vbsNone_vfr1


In [54]:
model_DT.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 2177)]       0           []                               
                                                                                                  
 hidden1_1792 (Dense)           (None, 1792)         3902976     ['input[0][0]']                  
                                                                                                  
 activation1_sigmoid (Activatio  (None, 1792)        0           ['hidden1_1792[0][0]']           
 n)                                                                                               
                                                                                                  
 hidden2_512 (Dense)            (None, 512)          918016      ['activation1_sigmoid[0][0]']

In [55]:
model_LR.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1000)              8363000   
                                                                 
 re_lu (ReLU)                (None, 1000)              0         
                                                                 
 dense_1 (Dense)             (None, 20)                20020     
                                                                 
Total params: 8,383,020
Trainable params: 8,383,020
Non-trainable params: 0
_________________________________________________________________


# Load Testing Data

### Logistic Regression

In [56]:
directory = utilities_LR.data_path_LR(config_LR)

# with open(directory + '/coef_list_LR_targetForInet.npy', "rb") as f:
y_coef_truth_test_data_LR = np.load(directory + '/coef_list_targetForInet.npy', allow_pickle=True)

In [57]:
directory = utilities_LR.lambda_path_LR(config_LR)

# with open(directory + '/coef_list_LR_targetForInet.npy', "rb") as f:
x_lambda_weights_test_data_LR = np.load(directory + '/lambda_weights_list.npy', allow_pickle=True)

In [58]:
x_lambda_weights_test_data_LR.shape

(10000, 8362)

In [59]:
x_lambda_weights_test_data_LR.shape

(10000, 8362)

In [60]:
X_datasets_list_LR = np.zeros([config_LR['data']['n_datasets'], config_LR['data']['n_samples'], config_LR['data']['n_features']])

if  config_LR['data']['n_targets'] < 2:
    y_datasets_list_LR = np.zeros([config_LR['data']['n_datasets'], config_LR['data']['n_samples'], ])
    coef_list_LR = np.zeros([config_LR['data']['n_datasets'], config_LR['data']['n_features'], ])
else:
    y_datasets_list_LR = np.zeros([config_LR['data']['n_datasets'], config_LR['data']['n_samples'], config_LR['data']['n_targets']])
    coef_list_LR = np.zeros([config_LR['data']['n_datasets'], config_LR['data']['n_features'], config_LR['data']['n_targets']])

In [61]:
directory = utilities_LR.data_path_LR(config_LR)

with open(directory + '/X_datasets_list_dataForLambda.npy', "rb") as f:
    X_datasets_list_LR = np.load(f, allow_pickle=True)
with open(directory + '/y_datasets_list_dataForLambda.npy', "rb") as f:
    y_datasets_list_LR = np.load(f, allow_pickle=True)

In [62]:
X_datasets_list_LR[0]

array([[ 0.8401008 ,  0.92280326,  2.23989907, ..., -0.55136766,
        -0.60520431, -0.63277389],
       [-1.2511341 ,  0.71312782,  0.32677291, ...,  0.18646706,
        -1.57381743, -1.52085394],
       [-0.37001888, -1.40234081, -0.18360112, ...,  0.72612712,
        -0.85788755, -0.80915735],
       ...,
       [-0.12935821,  0.83558003,  0.99822303, ..., -1.62740263,
        -1.58153914, -0.72611333],
       [ 0.72202123,  0.53020562, -1.87553047, ...,  0.81279277,
        -0.14161651,  0.50097359],
       [ 0.24799997,  0.28745629,  0.86334372, ...,  0.86426244,
         0.61354275, -3.11922997]])

In [63]:
y_datasets_list_LR[0]

array([0., 0., 0., ..., 1., 0., 1.])

### Decision Trees

In [64]:
# globals().update(generate_paths(config_DT, path_type='lambda_net'))

# directory = './data/saved_function_lists/functions_' + path_identifier_function_data + '.csv'

#directory = './data/saved_function_lists/functions_lNetSize5000_numDatasets100_var15_class2_distribution_xMax1_xMin0_xDistuniform_dNoise0_randParamDist_maxDistClass1_distribParamMax5_randClassProb_exLinSepun-no-ga-be-po_depth3_beta1_decisionSpars1_vanilla_fullyGrown.csv'
#
#function_df = pd.read_csv(directory)

In [65]:
# function_df.describe()

In [66]:
# y_test_data_DT = 

# Evaluate Inet for LR

In [67]:
def precision(tp, fp, tn, fn):
    return tp / (tp + fp)

In [68]:
def recall(tp, fp, tn, fn):
    return tp / (tp + fn)

In [69]:
def f1(tp, fp, tn, fn):
    pre = precision(tp, fp, tn, fn)
    rec = recall(tp, fp, tn, fn)
    2 * (pre * rec) / (pre + rec)

In [70]:
score = model_LR.evaluate(x=x_lambda_weights_test_data_LR,
    y=y_coef_truth_test_data_LR,
    batch_size=32,
    verbose=1,
    sample_weight=None,
    steps=None,
    callbacks=None,
    max_queue_size=10,
    workers=10,
    use_multiprocessing=True,
    return_dict=False
                         )



In [71]:
x_lambda_weights_test_data_LR.shape

(10000, 8362)

In [72]:
x_lambda_weights_test_data_LR[0]

array([0.8486591 , 0.885742  , 1.5397259 , ..., 0.36521238, 0.04474328,
       0.03070703], dtype=float32)

In [73]:
score

[0.07568378001451492, 0.0595974437892437, 0.2441258579492569]

In [74]:
def get_LR(X, y):
    model = LogisticRegression(penalty='l2',
        dual=False,
        tol=0.0001,
        C=1.0,
        fit_intercept=True,
        intercept_scaling=1,
        class_weight=None,
        random_state=None,
        solver='lbfgs',
        max_iter=100,
        multi_class='auto',
        verbose=0,
        warm_start=False,
        n_jobs=None,
        l1_ratio=None
                              )
    model.fit(X, y)
    return model

In [89]:


def confusionMatrixAggregated_SingleSample(i):
    x_lambda_weights = x_lambda_weights_test_data_LR[i, :]
    y_coef_truth = y_coef_truth_test_data_LR[i, :]
    x_lambda_weights = x_lambda_weights.reshape((1, 8362))
    y_coef_pred = model_LR.predict(x=x_lambda_weights,
        batch_size=None,
        verbose=0,
        steps=None,
        callbacks=None,
        max_queue_size=10,
        workers=1,
        use_multiprocessing=False,
                    )
    #print(type(x_lambda_weights))
    #print(type(y_coef_truth))
    #print(type(y_coef_pred))
    
    mse = tf.keras.metrics.mean_squared_error(
        y_coef_truth, y_coef_pred
    )
    model_groundTruth = get_LR(X_datasets_list_LR[i], y_datasets_list_LR[i])
    
    model_pred = LogisticRegression()
    model_pred.coef_ = y_coef_pred
    model_pred.intercept_ = 0
    model_pred.classes_ = model_groundTruth.classes_
    
    score_groundTruthModel = model_groundTruth.score(X_datasets_list_LR[i], y_datasets_list_LR[i])
    score_predModel = model_pred.score(X_datasets_list_LR[i], y_datasets_list_LR[i])
    y_truth_set = model_groundTruth.predict(X_datasets_list_LR[i])
    y_pred_set  = model_pred.predict(X_datasets_list_LR[i])
    tn, fp, fn, tp = confusion_matrix(y_truth_set, y_pred_set).ravel()
    
    #tn_sum += tn
    #fp_sum += fp
    #fn_sum += fn
    #tp_sum += tp
    
    return tn, fp, fn, tp


In [90]:
parallel = Parallel(n_jobs=config_LR['computation']['n_jobs'], verbose=10, backend='loky') #loky
#parallel = Parallel(n_jobs=1, verbose=10, backend='loky') #loky

confusion_Matrix_Array = parallel(delayed(confusionMatrixAggregated_SingleSample)(i) for i in range(x_lambda_weights_test_data_LR.shape[0]))
                                  
print(confusion_Matrix_Array)
    
#disp = ConfusionMatrixDisplay(np.array([[tn_sum, fp_sum],[fn_sum, tp_sum]]))
#disp.plot()
#plt.show()

[Parallel(n_jobs=100)]: Using backend LokyBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done  21 tasks      | elapsed:   37.5s
[Parallel(n_jobs=100)]: Done  42 tasks      | elapsed:   57.3s
[Parallel(n_jobs=100)]: Done  65 tasks      | elapsed:  1.3min
[Parallel(n_jobs=100)]: Done  88 tasks      | elapsed:  1.6min
[Parallel(n_jobs=100)]: Done 113 tasks      | elapsed:  1.9min
[Parallel(n_jobs=100)]: Done 138 tasks      | elapsed:  2.3min
[Parallel(n_jobs=100)]: Done 165 tasks      | elapsed:  2.7min
[Parallel(n_jobs=100)]: Done 192 tasks      | elapsed:  3.0min
[Parallel(n_jobs=100)]: Done 221 tasks      | elapsed:  3.4min
[Parallel(n_jobs=100)]: Done 250 tasks      | elapsed:  3.9min
[Parallel(n_jobs=100)]: Done 281 tasks      | elapsed:  4.3min
[Parallel(n_jobs=100)]: Done 312 tasks      | elapsed:  4.8min
[Parallel(n_jobs=100)]: Done 345 tasks      | elapsed:  5.3min
[Parallel(n_jobs=100)]: Done 378 tasks      | elapsed:  5.8min
[Parallel(n_jobs=100)]: Done 413 tasks 

[(996, 1471, 939, 1594), (922, 1581, 634, 1863), (1458, 1108, 792, 1642), (1540, 988, 742, 1730), (1165, 1334, 913, 1588), (2034, 376, 1104, 1486), (854, 1639, 1289, 1218), (1731, 904, 1118, 1247), (2110, 430, 211, 2249), (988, 1469, 1330, 1213), (1957, 392, 855, 1796), (1097, 1434, 810, 1659), (883, 1727, 1972, 418), (2277, 226, 349, 2148), (765, 1720, 1251, 1264), (1533, 1040, 1321, 1106), (1445, 1081, 1522, 952), (1659, 835, 528, 1978), (2228, 229, 896, 1647), (2244, 208, 471, 2077), (2161, 331, 485, 2023), (723, 1724, 1843, 710), (1280, 1268, 930, 1522), (2023, 561, 272, 2144), (1910, 558, 850, 1682), (2501, 23, 350, 2126), (1515, 906, 667, 1912), (1622, 793, 1170, 1415), (1938, 571, 254, 2237), (2365, 213, 1062, 1360), (2139, 409, 826, 1626), (1241, 1156, 871, 1732), (911, 1554, 633, 1902), (227, 2195, 1275, 1303), (1511, 980, 1292, 1217), (1849, 772, 674, 1705), (1906, 750, 890, 1454), (1440, 1088, 301, 2171), (2422, 154, 906, 1518), (1330, 1151, 600, 1919), (1603, 928, 1113, 135

In [None]:
print(confusion_Matrix_Array.shape)

# Evaluate on arbitrary Data

In [91]:
X_datasets_list_valid = np.zeros([config_LR['data']['n_datasets'], config_LR['data']['n_samples'], config_LR['data']['n_features']])

if  config_LR['data']['n_targets'] < 3:
    y_datasets_list_valid = np.zeros([config_LR['data']['n_datasets'], config_LR['data']['n_samples'], ])
    coef_list_valid = np.zeros([config_LR['data']['n_datasets'], config_LR['data']['n_features'], ])
else:
    y_datasets_list_valid = np.zeros([config_LR['data']['n_datasets'], config_LR['data']['n_samples'], config_LR['data']['n_targets']])
    coef_list_valid = np.zeros([config_LR['data']['n_datasets'], config_LR['data']['n_features'], config_LR['data']['n_targets']])

In [92]:
for i in range(config_LR['data']['n_datasets']):
    X_datasets_list_valid[i], y_datasets_list_valid[i] = sklearn.datasets.make_classification(n_samples=config_LR['data']['n_samples'], 
                                                                                         n_features=config_LR['data']['n_features'],
                                                                                         n_informative=config_LR['data']['n_informative'], 
                                                                                         n_classes=config_LR['data']['n_targets'], 
                                                                                         n_clusters_per_class=config_LR['data']['n_clusters_per_class'],
                                                                                         weights=None,
                                                                                         flip_y=config_LR['data']['noise'],
                                                                                         class_sep=config_LR['data']['class_sep'],
                                                                                         shuffle=config_LR['data']['shuffle'],
                                                                                         random_state=config_LR['data']['random_state'])
    model_valid = LogisticRegression()
    model_valid.fit(X_datasets_list_valid[i], y_datasets_list_valid[i])
    coef_list_valid[i] = model_valid.coef_

In [93]:
results = pd.DataFrame(columns=["index_0=aggregated", "scoreOnClassfication_BaseModel", "scoreOnClassfication_PredictedModel" , "mse",  "tp", "fn", "fp", "tn", "precision", "recall", "f1"])

In [94]:
def evaluateSingleSampleOnValidSet(i):
    x_lambda_weights = x_lambda_weights_test_data_LR[i, :]
    y_coef_truth = y_coef_truth_test_data_LR[i, :]
    
    x_lambda_weights = x_lambda_weights.reshape((1, 8362))
    
    y_coef_pred = model_LR.predict(x=x_lambda_weights,
        batch_size=None,
        verbose=0,
        steps=None,
        callbacks=None,
        max_queue_size=10,
        workers=1,
        use_multiprocessing=False,
                    )
    mse = tf.keras.metrics.mean_squared_error(
        y_coef_truth, y_coef_pred
    )
    
    model_groundTruth = get_LR(X_datasets_list_LR[i], y_datasets_list_LR[i])
    
    model_pred = LogisticRegression()
    model_pred.coef_ = y_coef_pred
    model_pred.intercept_ = 0
    model_pred.classes_ = model_groundTruth.classes_
    
    score_groundTruthModel = model_groundTruth.score(X_datasets_list_LR[i], y_datasets_list_LR[i])
    score_predModel = model_pred.score(X_datasets_list_LR[i], y_datasets_list_LR[i])
    
    y_truth_set = model_groundTruth.predict(X_datasets_list_LR[i])
    y_pred_set  = model_pred.predict(X_datasets_list_LR[i])
    tn, fp, fn, tp = confusion_matrix(y_truth_set, y_pred_set, labels=[1,0]).ravel()
    
    pre = precision(tp, fp, tn, fn)
    rec = recall(tp, fp, tn, fn)
    fone = f1(tp, fp, tn, fn)
    
    #results.append([i, score_groundTruthModel, score_predModel, mse, tp, fn, fp, tn, pre, rec, fone])
    
    return i, score_groundTruthModel, score_predModel, mse, tp, fn, fp, tn, pre, rec, fone

In [None]:
parallel = Parallel(n_jobs=config_LR['computation']['n_jobs'], verbose=10, backend='loky') #loky

result_list = parallel(delayed(evaluateSingleSampleOnValidSet)(i) for i in range(x_lambda_weights_test_data_LR.shape[0]))
                   
    
results = pd.DataFrame(columns=["index_0=aggregated", "scoreOnClassfication_BaseModel", "scoreOnClassfication_PredictedModel" , "mse",  "tp", "fn", "fp", "tn", "precision", "recall", "f1"], data=result_list)
    
print(result_list)

[Parallel(n_jobs=200)]: Using backend LokyBackend with 200 concurrent workers.
[Parallel(n_jobs=200)]: Done  21 tasks      | elapsed:   46.5s
[Parallel(n_jobs=200)]: Done  50 tasks      | elapsed:  1.2min
[Parallel(n_jobs=200)]: Done  81 tasks      | elapsed:  1.7min
[Parallel(n_jobs=200)]: Done 112 tasks      | elapsed:  2.1min
[Parallel(n_jobs=200)]: Done 145 tasks      | elapsed:  2.6min
[Parallel(n_jobs=200)]: Done 178 tasks      | elapsed:  3.1min
[Parallel(n_jobs=200)]: Done 213 tasks      | elapsed:  3.5min
[Parallel(n_jobs=200)]: Done 248 tasks      | elapsed:  4.0min
[Parallel(n_jobs=200)]: Done 285 tasks      | elapsed:  4.5min
[Parallel(n_jobs=200)]: Done 322 tasks      | elapsed:  5.0min
[Parallel(n_jobs=200)]: Done 361 tasks      | elapsed:  5.6min
[Parallel(n_jobs=200)]: Done 400 tasks      | elapsed:  6.1min
[Parallel(n_jobs=200)]: Done 441 tasks      | elapsed:  6.8min
[Parallel(n_jobs=200)]: Done 482 tasks      | elapsed:  7.3min
[Parallel(n_jobs=200)]: Done 525 tasks 

In [None]:
def save_eval_res(df)
    path = utilities_LR.inet_path_LR(config_LR)
    
    model = df.to_csvl(path + '/evalRes.csv')
    print(path)

In [None]:
save_eval_res(results)