In [None]:
import ipywidgets as widgets

# Make the notebook full screen
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import os
import pandas as pd
import importlib
import sys 

pd.set_option('display.max_columns', 100)

if sys.version_info[:3] < (3,4):
    os.getcdw()
    code_dir = os.path.dirname(os.getcdw())
    project_dir = os.path.dirname(os.path.dirname(os.getcdw()))
    data_path = os.path.join(code_dir, "data")
    functions_path = os.path.join(project_dir, "functions")
else: 
    from pathlib import Path
    current_directory = os.path.dirname(Path.cwd())
    code_dir = os.path.dirname(os.path.dirname(current_directory))
    project_dir = os.path.join(code_dir, "2_Supervised_Modeling\\Logistic_Regression")
    data_path = os.path.join(code_dir, "2_Supervised_Modeling\\Logistic_Regression\\data")
    functions_path = os.path.join(code_dir, 'functions')
    
# General Python modules
import time
import json

# Set the path for the library
import sys
sys.path.insert(0, functions_path)
import data_transformation as dtran
import variable_reduction as vr
import feature_elimination as fe
import machine_learning as ml
import reports as rp
import useful_functions as ufun
from load_data import load_data
from decorators import time_function 

#################################################################################################

# Output widget
output1 = widgets.Output()

def load_data_fun(Load_from, table_name, sam):
    # Set the path for the library
    global data_path, data_full

    sample = eval(sam)
    
    try: 
        start_time = time.time()
        
        data_location = '{}/input'.format(data_path)
        data_full = load_data(method = Load_from, 
                     data_path = data_location, 
                     table_name = table_name, 
                     sample = sample)

        total_time = time.time() - start_time

        with output1:
            output1.clear_output()
            print(f"✅ Data loaded.")
            print(data_full.info())
            display(data_full.head())
            print('This code took %.2fs. to run'%(total_time))
#        return data_full
    except Exception as e:
        with output1:
            output1.clear_output()
            print(f"❌ Error data path: {e}")

#################################################################################################

# Output widget
output2 = widgets.Output()

def data_preprocessing(original_candidate_variables_num, original_candidate_variables_char, target_var, weight_var, 
                       sample_var, sample_val, amount_var):

    global data_full, character_variables_list, numeric_variables_list, weight_variable_name_solution, data, sample_values_dict, amount_variable_name_solution, \
        original_candidate_variables, sample_values_solution, original_candidate_variables_numeric, original_candidate_variables_character, \
        sample_variable_name_solution, amount_variable_name, target_variable_name
    
    original_candidate_variables_numeric = eval(original_candidate_variables_num)
    original_candidate_variables_character = eval(original_candidate_variables_char)
    target_variable_name = target_var
    if weight_var in ['None', '']:
        weight_variable_name = None
    else: 
        weight_variable_name = weight_var
    if sample_var in ['None', '']:
        sample_variable_name = None
    else: 
        sample_variable_name = sample_var
    if sample_val in ['None', '']:
        sample_values = None
    else:
        sample_values = eval(sample_val)
    if amount_var in ['None', '']:
        amount_variable_name = None
    else: 
        amount_variable_name = amount_var
    
    try:
        start_time = time.time()

        data_full.columns = data_full.columns.str.replace(",", "/")
        original_candidate_variables_numeric = [item.replace(",", "/") for item in original_candidate_variables_numeric]
        original_candidate_variables_character = [item.replace(",", "/") for item in original_candidate_variables_character]
    
        # Create the weight variable, if it doesn't exist.
        data_full, weight_variable_name_solution = dtran.weight_var_assignment(input_data = data_full, 
                                                                                             weight_variable = weight_variable_name)
        
        # Create the sample variable, if it doesn't exist.
        data_full, sample_values_solution, sample_variable_name_solution = dtran.sample_var_assignment(input_data = data_full, 
                                                                                                sample_variable = sample_variable_name,
                                                                                                sample_values = sample_values)
        
        # Create the amount variable, if it doesn't exist.
        data_full, amount_variable_name_solution = dtran.amount_var_assignment(input_data = data_full, 
                                                                                             amount_variable = amount_variable_name)    
    
        data_full = data_full[data_full[sample_variable_name_solution].isin(sample_values_solution)]
    
        # Convert character variables
        data_full, character_variables_list = dtran.convert_character_var(input_data = data_full, 
                                                                character_variables = original_candidate_variables_character,
                                                                sample_variable = sample_variable_name_solution)
        
        # Convert numeric variables
        data_full, numeric_variables_list = dtran.convert_numeric_var(input_data = data_full, 
                                                                numeric_variables = original_candidate_variables_numeric,
                                                                weight_variable = weight_variable_name_solution, 
                                                                amount_variable = amount_variable_name_solution, 
                                                                target_variable = target_variable_name)

        # Split sample data
        data, sample_values_dict = dtran.split_sample_data(
            input_data = data_full, 
            sample_values_solution = sample_values_solution, 
            sample_variable_name_solution = sample_variable_name_solution
            )
        
        # Set the original candidate variables
        original_candidate_variables = original_candidate_variables_character + original_candidate_variables_numeric

        total_time = time.time() - start_time

        with output2:
            output2.clear_output()
            print(f"✅ Data preprocess completed successfully.")
            print(ufun.color.BLUE+'Character variables: '+ufun.color.END+str(character_variables_list))
            print(ufun.color.BLUE+'Numeric variables: '+ufun.color.END+str(numeric_variables_list))
            print(ufun.color.BLUE + 'Original candidate variables: ' + ufun.color.END + str(original_candidate_variables))        
            print('This code took %.2fs. to run'%(total_time))
#        return data_full
        
    except Exception as e:
        with output2:
            output2.clear_output()
            print(f"❌ Error Data preprocess: {e}")

#################################################################################################

# Output widget
output3 = widgets.Output()

def data_quality():

    global data_path, data_full, character_variables_list, numeric_variables_list, weight_variable_name_solution

    try:
        start_time = time.time()

        # Create folder, if it doesn't exist
        ufun.create_folder(data_path = data_path, 
                           folder_name = 'output')
            
        dq = rp.dq_report(input_data = data_full, 
                        data_path = data_path, 
                        variables = character_variables_list + numeric_variables_list, 
                        weight_variable = weight_variable_name_solution, 
                        dq_report_file = 'data_quality_report.csv')

        total_time = time.time() - start_time
        
        with output3:
            output3.clear_output()
            print(f"✅ Data quality completed successfully.")
            display(dq)
            print('This code took %.2fs. to run'%(total_time))

    except Exception as e:
        with output3:
            output3.clear_output()
            print(f"❌ Error Data quality: {e}")


#################################################################################################
# Output widget
output4 = widgets.Output()

def vr_missing(select_miss_variables_to_drop_threshold):

    global data, sample_values_solution, original_candidate_variables, sample_values_dict, data_path, weight_variable_name_solution, excluded_variables

    select_missing_variables_to_drop_threshold = eval(select_miss_variables_to_drop_threshold)
    
    try:
        start_time = time.time()

        # Variables excluded from the non-predictive features: keys, target, sample, etc
        excluded_variables = [x for x in data['data_{}'.format(sample_values_solution[0])].columns if x not in original_candidate_variables]
        # Produce and save the missing values table to review
        missing_variables_table, missing_variables = vr.missing_values_vars(
            sample_values_dict=sample_values_dict, 
            data_path=data_path, 
            input_data=data, 
            weight_variable_name_solution=weight_variable_name_solution, 
            select_missing_variables_to_drop_threshold=select_missing_variables_to_drop_threshold
            )
        # Create the variables to remove: non-predictors + variables with too many missing information
        excluded_variables = excluded_variables + missing_variables

        total_time = time.time() - start_time

        with output4:
            output4.clear_output()
            print(f"✅ Variable reduction: high missing values completed successfully.")
            print(ufun.color.BLUE + 'Variables to remove from the remainder of the analysis: ' + ufun.color.END + str(excluded_variables))
            print('This code took %.2fs. to run'%(total_time))

#        return excluded_variables

    except Exception as e:
        with output4:
            output4.clear_output()
            print(f"❌ Error Variable reduction: high missing values: {e}")

#################################################################################################
# Output widget
output5 = widgets.Output()

def vr_char_many_levels(character_classification_thresh):

    global data, data_path, sample_values_solution, excluded_variables, keep_char_vars_levels

    character_classification_threshold = eval(character_classification_thresh)
    
    try:
        start_time = time.time()

        keep_char_vars_levels, excl_char_vars  = vr.character_var_levels(
            input_data = data, 
            data_path = data_path, 
            sample_values_solution = sample_values_solution,
            excluded_variables = excluded_variables, 
            character_classification_threshold = character_classification_threshold
            )

        total_time = time.time() - start_time

        with output5:
            output5.clear_output()
            print(f"✅ Variable reduction: characters with many levels completed successfully.")
            print(ufun.color.PURPLE + 'Character variables dropped: ' + ufun.color.END + str(excl_char_vars))
            print('This code took %.2fs. to run'%(total_time))

#        return keep_char_vars_levels

    except Exception as e:
        with output5:
            output5.clear_output()
            print(f"❌ Error Variable reduction: characters with many levels: {e}")

#################################################################################################
# Output widget
output6 = widgets.Output()

def vr_outliers(iqr_cf):

    global original_candidate_variables_numeric, excluded_variables, data_full, weight_variable_name_solution, data_path, data, sample_values_solution, \
        sample_variable_name_solution, keep_char_vars_levels

    iqr_coef = eval(iqr_cf)
    
    try:
        start_time = time.time()

        outlier_variables = [i for i in original_candidate_variables_numeric if i not in excluded_variables]
        data_full, outlier_info = dtran.replace_outliers(
            input_data = data_full, 
            variables = outlier_variables, 
            weight_variable = weight_variable_name_solution, 
            data_path = data_path, 
            outlier_info_file = 'outlier_info.csv', 
            iqr_coef = iqr_coef
            )

        # Split sample data
        data, temp_dict = dtran.split_sample_data(
            input_data=data_full, 
            sample_values_solution=sample_values_solution, 
            sample_variable_name_solution=sample_variable_name_solution
            )

        total_time = time.time() - start_time

        with output6:
            output6.clear_output()
            print(f"✅ Variable reduction: removing outliers completed successfully.")
            display(outlier_info)
            print('This code took %.2fs. to run'%(total_time))

#        return keep_char_vars_levels

    except Exception as e:
        with output6:
            output6.clear_output()
            print(f"❌ Error Variable reduction: removing outliers: {e}")

#################################################################################################
# Output widget
output7 = widgets.Output()

def vr_binary(drop_mthd, protected_clss):

    global data_full, keep_char_vars_levels, data, sample_values_solution, sample_variable_name_solution, excluded_variables, keep_num_vars
        
    if drop_mthd in ['None', '']:
        drop_method = None
    else: 
        drop_method = drop_mthd
    if protected_clss in ['None', '']:
        protected_class = None
    else: 
        protected_class = protected_clss
        
    try:
        start_time = time.time()

        data_full = dtran.character_to_binary(
            input_data = data_full, 
            input_variable_list = keep_char_vars_levels, 
            drop = drop_method, # Specifies which value to drop from the one hot encoder. None will return binary variables for all categories. 'first' will drop the most populated category. 'last' will drop the least populated category. 
            protected_class_valid_values = protected_class # Specifies accepted values for the protected class column. For non-protected class conversions use 'None'
            )
        
        # Split sample data
        data, temp_dict = dtran.split_sample_data(
            input_data=data_full, 
            sample_values_solution=sample_values_solution, 
            sample_variable_name_solution=sample_variable_name_solution
            )
        
        # Keep all numeric variables, including those that were one-hot encoded
        keep_num_vars = ufun.identify_numeric_variables(input_data=data['data_{}'.format(sample_values_solution[0])])
        keep_num_vars = [x for x in keep_num_vars if x not in excluded_variables]

        total_time = time.time() - start_time

        with output7:
            output7.clear_output()
            print(f"✅ Variable reduction: convert character to binary completed successfully.")
            print(ufun.color.PURPLE + 'Keeping the following features: ' + ufun.color.END + str(keep_num_vars))
            print(ufun.color.PURPLE + 'Number of features to keep: ' + ufun.color.END + str(len(keep_num_vars)))
            print('This code took %.2fs. to run'%(total_time))

    except Exception as e:
        with output7:
            output7.clear_output()
            print(f"❌ Error Variable reduction: convert character to binary: {e}")

#################################################################################################
# Output widget
output8 = widgets.Output()

def vr_impute(impute_missing_imputation_strtgy):

    global sample_values_dict, data_path, keep_num_vars, data, sample_values_solution, weight_variable_name_solution

    if impute_missing_imputation_strtgy not in ['mean', 'median']:
        impute_missing_imputation_strategy = eval(impute_missing_imputation_strtgy)
    else: 
        impute_missing_imputation_strategy = impute_missing_imputation_strtgy
    
    try:
        start_time = time.time()

        variables_with_missing_dict = vr.select_missing_variables_to_drop_dict(
            sample_values_dict = sample_values_dict, 
            data_path = data_path)
        
        # Select numeric features with missing values. Imputation will be applied to only these features, in order to improve the performance of the code. 
        variables_with_missing = list(dict.fromkeys(sum(variables_with_missing_dict.values(), [])))
        num_variables_with_missing = [i for i in keep_num_vars if i in variables_with_missing]
        
        # Impute missing values
        impute_missing = dtran.impute_missing(
                variables = num_variables_with_missing, 
                imputation_strategy = impute_missing_imputation_strategy)
        impute_missing.imputation_fit_weight(
                input_data = data['data_{}'.format(sample_values_solution[0])], 
                weight_variable = weight_variable_name_solution)
        
        for i, j in sample_values_dict.items():
            impute_missing.imputation_transform(input_data = data['data_{}'.format(i)])
                
        total_time = time.time() - start_time

        with output8:
            output8.clear_output()
            print(f"✅ Variable reduction: missing values imputation completed successfully.")
            print(ufun.color.PURPLE + 'Features with missing values: ' + ufun.color.END + str(num_variables_with_missing))
            # Check missing values for imputed variables
            for i, j in sample_values_dict.items():
                print(ufun.color.BOLD + ufun.color.PURPLE + ufun.color.UNDERLINE + 'SAMPLE ' + i + ufun.color.END)
                if num_variables_with_missing != []:
                    print(data['data_{}'.format(i)][num_variables_with_missing].apply
                          (lambda x: (sum(data['data_{}'.format(i)][x.isnull()][weight_variable_name_solution])
                            /sum(data['data_{}'.format(i)][weight_variable_name_solution])) * 100, axis=0).sort_values(ascending=False))
                else: 
                    print('There are no variables with missing values to impute')
            
            print('This code took %.2fs. to run'%(total_time))

    except Exception as e:
        with output8:
            output8.clear_output()
            print(f"❌ Error Variable reduction: missing values imputation: {e}")

#################################################################################################
# Output widget
output9 = widgets.Output()

def vr_one_val():

    global keep_num_vars, data_path, keep_num_vars_one_v
  
    try:
        start_time = time.time()

        keep_num_vars_one_v = vr.keep_num_variables_one_value(
            keep_num_vars = keep_num_vars, 
            data_path = data_path, 
            dq_report = 'data_quality_report.csv'
            )

        total_time = time.time() - start_time

        with output9:
            output9.clear_output()
            print(f"✅ Variable reduction: drop one level features completed successfully.")
            print(ufun.color.PURPLE + 'Keeping the following features: ' + ufun.color.END + str(keep_num_vars_one_v))
            print(ufun.color.PURPLE + 'Number of features to keep: ' + ufun.color.END + str(len(keep_num_vars_one_v)))
            print('This code took %.2fs. to run'%(total_time))

    except Exception as e:
        with output9:
            output9.clear_output()
            print(f"❌ Error Variable reduction: drop one level features: {e}")

#################################################################################################
# Output widget
output10 = widgets.Output()

def vr_gini(gini_thresh):

    global keep_num_vars_one_v, data, sample_values_solution, target_variable_name, weight_variable_name_solution, data_path, keep_num_vars_gini

    gini_threshold = eval(gini_thresh)
    
    try:
        start_time = time.time()

        gini_table = fe.gini_values_weight(feats = keep_num_vars_one_v, 
                           input_data = data['data_{}'.format(sample_values_solution[0])], 
                           target_variable = target_variable_name, 
                           weight_variable = weight_variable_name_solution, 
                           data_path = data_path, 
                           gini_info_file = 'gini_info.csv', 
                           n_bands = 10)
        keep_num_vars_gini = list(gini_table.loc[gini_table['Gini coefficient'] >= gini_threshold, 'variable'].values)
        
        total_time = time.time() - start_time

        with output10:
            output10.clear_output()
            print(f"✅ Variable reduction: drop low Gini features completed successfully.")
            display(gini_table)
            print(ufun.color.PURPLE + 'Keeping the following variables with Gini > ' + str(gini_threshold) + ': ' + ufun.color.END + str(keep_num_vars_gini))
            print(ufun.color.PURPLE + 'Number of features to keep: ' + ufun.color.END + str(len(keep_num_vars_gini)))
            print('This code took %.2fs. to run'%(total_time))

    except Exception as e:
        with output10:
            output10.clear_output()
            print(f"❌ Error Variable reduction: drop low Gini features: {e}")

#################################################################################################
# Output widget
output11 = widgets.Output()

def vr_corr(corr_thresh):

    global data, sample_values_solution, keep_num_vars_gini, weight_variable_name_solution, data_path, remaining_predictors

    corr_threshold = eval(corr_thresh)
    
    try:

        with output11:
            output11.clear_output()

            corrs = fe.calculate_correlations(
                input_data = data['data_{}'.format(sample_values_solution[0])], 
                features = keep_num_vars_gini, 
                corr_threshold = corr_threshold, 
                weight_variable_name = weight_variable_name_solution
                )
            
            eliminated, remaining_predictors = fe.correlation_elimination(
                method = 'correlation', 
                features = keep_num_vars_gini, 
                input_data = data['data_{}'.format(sample_values_solution[0])], 
                data_path = data_path, 
                corr_threshold = corr_threshold, 
                top_n = 10, 
                weight_variable_name = weight_variable_name_solution, 
                correlations = corrs
                )
            
            print(f"✅ Variable reduction: drop highly correlated features completed successfully.")

    except Exception as e:
        with output11:
            output11.clear_output()
            print(f"❌ Error Variable reduction: drop highly correlated features: {e}")

#################################################################################################
# Output widget
output12 = widgets.Output()

def vr_vif(VIF_reduct, first_vif_thresh):

    global remaining_predictors, data, sample_values_solution, data_path, weight_variable_name_solution

    VIF_reduction = eval(VIF_reduct)
    first_vif_threshold = eval(first_vif_thresh)
    
    try:

        with output12:
            output12.clear_output()

            eliminated, remaining_predictors = fe.run_VIF(
                VIF_reduction = VIF_reduction, 
                features = remaining_predictors, 
                input_data = data['data_{}'.format(sample_values_solution[0])], 
                data_path = data_path, 
                vif_threshold = first_vif_threshold, 
                corr_threshold = 0, 
                weight_variable_name = weight_variable_name_solution
                )            
            
            print(f"✅ Variable reduction: VIF elimination completed successfully.")

    except Exception as e:
        with output12:
            output12.clear_output()
            print(f"❌ Error Variable reduction: VIF elimination: {e}")

#################################################################################################
# Output widget
output13 = widgets.Output()

def fe_lasso(LogisticRegression_solver, c_mn, c_mx, n, second_vif_thresh, lasso_criterion, early_st):

    global sample_values_dict, sample_values_solution, data, target_variable_name, remaining_predictors, data_path, weight_variable_name_solution, final_vars
               
    c_min = eval(c_mn)
    c_max = eval(c_mx)
    num = eval(n)
    second_vif_threshold = eval(second_vif_thresh)
    early_stop = eval(early_st)

    try:
        
        with output13:
            output13.clear_output()

            bic_dict = fe.perform_lasso(
                sample_values_dict = sample_values_dict, 
                sample_values_solution = sample_values_solution, 
                data = data, 
                target_variable_name = target_variable_name, 
                predictor_variables = remaining_predictors, 
                data_path = data_path, 
                LogisticRegression_solver = LogisticRegression_solver,
                early_stop = early_stop, 
                weight_variable_name = weight_variable_name_solution, 
                standardization=False, 
                c_min = c_min, 
                c_max = c_max, 
                num = num, 
                vif_threshold = second_vif_threshold, 
                random_state = 42, 
                lasso_criterion = lasso_criterion
                )
            
            lasso = bic_dict[next(iter(bic_dict))]
            # Obtain the best C value based on the criterion selected by the user
            lasso.best_vars()
            # Running the second VIF using the lasso_features from the best_vars function
            vifs = lasso.calculate_vifs(lasso.lasso_features, weight_variable_name=weight_variable_name_solution, silent=False)
            
            # Obtain the final list of features after the second VIF threshold calculation
            final_vars = lasso.remaining_predictors()
            
            print(f"✅ Feature elimination: Lasso elimination completed successfully.")

    except Exception as e:
        with output13:
            output13.clear_output()
            print(f"❌ Error Feature elimination: Lasso elimination: {e}")

#################################################################################################
# Output widget
output14 = widgets.Output()

def logistic_reg(
        stepwise_method,  # Possible values: 'backward', 'forward', 'combined'. Anything else will retain the features from Lasso selection. 
        number_of_feat, # Set to None to allow for feature selection using the p-value. Otherwise, provide the number of features to be used in the final regression model. 
        significance_lvl # Features with p-value greater than this threshold will not be included in the selected features.
    ):

    global data, final_vars, target_variable_name, weight_variable_name_solution, data_path, sample_values_solution, stepwise_features

    if number_of_feat in ['None', '']:
        number_of_features = None
    else: 
        number_of_features = eval(number_of_feat)
    if significance_lvl in ['None', '']:
        significance_level = None
    else: 
        significance_level = eval(significance_lvl)
    
    try:

        with output14:
            output14.clear_output()

            # Remove features based on p-value information
            logistic_regression_pre = ml.logistic_regression(
                input_data = data, 
                final_feats = final_vars, 
                target_variable = target_variable_name, 
                weight_variable_name = weight_variable_name_solution, 
                data_path = data_path
                )
            
            stepwise_features = logistic_regression_pre.stepwise_fun(sample_values_solution = sample_values_solution, 
                    method = stepwise_method, # Possible values: 'backward', 'forward', 'combined'
                    number_of_features = number_of_features, # Set to None to allow for feature selection using the p-value
                    significance_level = significance_level # Features with p-value greater than this threshold will not be included in the selected features    
            )

            # Execute Logistic regression based on the remaining features
            logistic_regression = ml.logistic_regression(
                input_data = data, 
                final_feats = stepwise_features, 
                target_variable = target_variable_name, 
                weight_variable_name = weight_variable_name_solution, 
                data_path = data_path
                )
            
            lreg_glm, lreg_summary = logistic_regression.glm_bin(
                sample_values_solution = sample_values_solution 
                )
            
            lr_output = logistic_regression.glm_report()

            print(f"✅ Logistic regression training completed successfully.")

    except Exception as e:
        with output14:
            output14.clear_output()
            print(f"❌ Error Logistic regression training: {e}")

#################################################################################################
# Output widget
output15 = widgets.Output()

def logistic_reg_report(
        select_top_pct, # Int. Top percent of data ranked by score selected to produce the reports. Use 100 to select the full dataset. 
        n_bnds, # Int. Number of quantiles to divide the ranking data based on score. 
        cost_fpt, # Cost of blocking a legitimate customer. Leave blank or 'None' without quotes if not available. 
        cost_fnt # Cost of missing a fraud/credit risk customer. Leave blank or 'None' without quotes if not available. 
    ):

    global data, stepwise_features, target_variable_name, weight_variable_name_solution, data_path, sample_values_solution, sample_values_dict, \
            amount_variable_name_solution

    if select_top_pct in ['None', '']:
        select_top_percent = None
    else: 
        select_top_percent = eval(select_top_pct)
    if n_bnds in ['None', '']:
        n_bands = None
    else: 
        n_bands = eval(n_bnds)
    if cost_fpt in ['None', '']:
        cost_fp = None
    else: 
        cost_fp = eval(cost_fpt)
    if cost_fnt in ['None', '']:
        cost_fn = None
    else: 
        cost_fn = eval(cost_fnt)
    
    try:

        with output15:
            output15.clear_output()

            # Execute Logistic regression based on the remaining features
            logistic_regression = ml.logistic_regression(
                input_data = data, 
                final_feats = stepwise_features, 
                target_variable = target_variable_name, 
                weight_variable_name = weight_variable_name_solution, 
                data_path = data_path
                )
            
            lreg_glm, lreg_summary = logistic_regression.glm_bin(
                sample_values_solution = sample_values_solution 
                )
            
            # Create the dataframes dictionary with the predicted variables that will be used as input to other reports
            predictions_dict = logistic_regression.create_predictions(
                    sample_values_dict=sample_values_dict, 
                    amount_variable_name = amount_variable_name_solution)
            
            binary_regression_report_class = rp.binary_regression_report(
                predictions_dictionary = predictions_dict, 
                target_variable = target_variable_name, 
                predicted_score_numeric = 'predicted_score_numeric', 
                amount_variable_name = amount_variable_name_solution, 
                weight_variable_name = weight_variable_name_solution, 
                sample_values_dict = sample_values_dict, 
                select_top_percent = select_top_percent, 
                n_bands = n_bands, 
                rows = n_bands, 
                data_path = data_path
                )
            
            lr_eval = binary_regression_report_class.get_evaluation(predicted_score_binary = 'predicted_score_binary', 
                                                                   filename = 'evaluation_metrics.csv')
            
            # Create Lift table
            lift_table_dict = binary_regression_report_class.create_lift_table(filename = 'lift_table_')
            
            # Create folder, if it doesn't exist
            folder_name = 'graphs_LR'
            ufun.create_folder(data_path = data_path, 
                               folder_name = 'output/{}'.format(folder_name))
            
            binary_regression_report_class.plot_ADR_Quantile(
                    folder_name = folder_name,
                    xlim=None, 
                    ylim=None
                    )
            
            binary_regression_report_class.plot_cADR_Quantile(
                    folder_name = folder_name,
                    xlim=None, 
                    ylim=None
                    )
            
            binary_regression_report_class.plot_FPR_Quantile(
                    folder_name = folder_name,
                    xlim=None, 
                    ylim=None
                    )
            
            binary_regression_report_class.plot_cFPR_Quantile(
                    folder_name = folder_name,
                    xlim=None, 
                    ylim=None
                    )
            
            binary_regression_report_class.plot_ROC_curve(folder_name = folder_name)
            
            binary_regression_report_class.plot_precision_recall_curve(folder_name = folder_name)
            
            binary_regression_report_class.plot_cutoffs(
                    folder_name = folder_name,
                    n_bands = n_bands, # Number of bands between 0 and 1
                    cost_fp = cost_fp, # Cost of blocking a legitimate customer
                    cost_fn = cost_fn, # Cost of missing a fraud/credit risk customer
                    return_table = True # Set to True in order to return the table that produced the graph, otherwise set to False
                    )
            
            print(f"✅ Logistic regression report completed successfully.")

    except Exception as e:
        with output15:
            output15.clear_output()
            print(f"❌ Error Logistic regression report: {e}")


#################################################################################################
#################################################################################################
#################################################################################################

#################################################################################################
#################################################################################################
#################################################################################################
custom_css = """
<style>
/* Style for widget buttons */
.widget-button {
    background-color: lightgreen; 
    color: white; 
    border-radius: 10px;
    }
    
/* Style for HBox container */
.widget-hbox-with-bg {
    background-color: lightblue !important; /* Change this color */
    padding: 10px; /* Add padding inside the box */
    border-radius: 10px; /* Rounded corners */
    border: 2px solid black; /* Optional border */
}
</style>
"""

# Inject CSS into the notebook
display(HTML(custom_css))

#################################################################################################
text_input_restart_button = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Restart kernel and Clear output folder</b></p>")
restart_button = widgets.Button(description="Restart kernel")

# Output widget
output99 = widgets.Output()
def restart_button_clicked(b):
    try: 
#        import IPython
#        import ipywidgets as widgets
#        from IPython.display import display, Javascript
        
        # Restart the kernel
#        IPython.Application.instance().kernel.do_shutdown(restart=True)
        
        # Reload the notebook to continue execution
#        display(Javascript("location.reload()"))

        import subprocess
        
        # Command to run voila within the specific environment
        subprocess.run(["conda", "activate", "Supervised_Modeling_api", "&&", "voila", "Supervised_Modeling_Process_LR_widget.ipynb"], shell=True)

        with output99:
            output99.clear_output()
            print(f"✅ Restart python kernel completed successfully.")

    except Exception as e:
        with output99:
            output99.clear_output()
            print(f"❌ Error Restart python kernel: {e}")

restart_button.on_click(restart_button_clicked)


clear_folder_button = widgets.Button(description="Clear output folder")

# Output widget
output98 = widgets.Output()
def clear_folder_clicked(b):
    import os
    import shutil
    import sys 
    
    if sys.version_info[:3] < (3,4):
        os.getcdw()
        code_dir = os.path.dirname(os.getcdw())
        data_path = os.path.join(code_dir, "data")
    else: 
        from pathlib import Path
        current_directory = os.path.dirname(Path.cwd())
        code_dir = os.path.dirname(os.path.dirname(current_directory))
        data_path = os.path.join(code_dir, "2_Supervised_Modeling\\Logistic_Regression\\data")
    
    # Specify the folder to empty
    folder_path = '{}/output'.format(data_path)
    
    # Check if the folder exists
    if os.path.exists(folder_path):
        if os.listdir(folder_path) == []:
            with output98:
                output98.clear_output()
                print(f"✅ Folder {folder_path} is empty")
        else:
            for filename in os.listdir(folder_path):
                file_path = os.path.join(folder_path, filename)
                try:
                    if os.path.isfile(file_path) or os.path.islink(file_path):
                        os.unlink(file_path)  # Delete file or symlink
                    elif os.path.isdir(file_path):
                        shutil.rmtree(file_path)  # Delete subdirectory
                    with output98:
                        output98.clear_output()
                        print(f"✅ Data output folder emptied successfully.")
                
                except Exception as e:
                    with output98:
                        output98.clear_output()
                        print(f"❌ Error failed to delete {file_path}: {e}")

    else: 
        with output98:
            output98.clear_output()
            print(f"❌ Folder {folder_path} does not exist")
        
clear_folder_button.on_click(clear_folder_clicked)

box_restart_clear_button = widgets.HBox([restart_button, clear_folder_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

#################################################################################################

# Widgets for user input
text_input_data_load = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Load the data</b></p>")

Load_from_widget = widgets.RadioButtons(options=['csv', 'parq'], 
                                        description="Load from:", 
                                        tooltip="Specify how to load the data. Options: csv, parq.", 
                                        layout=widgets.Layout(width='100px', margin="0 25px", background_color="lightblue"))
table_name_widget = widgets.Text(value="", 
                                 tooltip="String. Set the input data file.", 
                                 layout=widgets.Layout(width='400px'))
table_name_widget_label = widgets.Label("Table name:", 
                                          layout=widgets.Layout(width="100px", justify_content='flex-end'))
sample_widget = widgets.Text(value="1.0", 
                                  tooltip="Number between 0-1 determining what percent of data to subsample.",
                                  layout=widgets.Layout(width='100px'))
sample_widget_label = widgets.Label("Data sample:", 
                                          layout=widgets.Layout(width="100px", justify_content='flex-end'))

box_table_name = widgets.HBox([table_name_widget_label, table_name_widget], layout=widgets.Layout(margin="0 25px"))
box_sample = widgets.HBox([sample_widget_label, sample_widget], layout=widgets.Layout(margin="0 25px"))
box_load_data = widgets.HBox([Load_from_widget, box_table_name, box_sample], layout=widgets.Layout(justify_content="center", margin="0 25px", background_color="lightblue", border="3px solid lightblue"))
#box_load_data.add_class = {'widget-hbox-with-bg'}  # Change color as needed
#box_load_data.style = {'description_width': 'initial', 'background_color': 'lightblue'}

load_data_button = widgets.Button(description="Execute")
box_load_data_button = widgets.HBox([load_data_button], layout=widgets.Layout(justify_content="center", margin="0 25px", background_color="lightblue"))

def load_data_clicked(b):
#    global data_full, data_path
#    data_full = load_data_fun(Load_from_widget.value, table_name_widget.value, sample_widget.value)
    load_data_fun(Load_from_widget.value, table_name_widget.value, sample_widget.value)
    
load_data_button.on_click(load_data_clicked)

#################################################################################################

text_input_data_preprocessing = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Data preprocessing</b></p>")

original_candidate_variables_numeric_widget = widgets.Textarea(value="", 
                                                           tooltip="List. Provide the feature names for the numeric variables that will be used for modeling.", 
                                                           layout=widgets.Layout(width='500px'))
original_candidate_variables_numeric_widget_label = widgets.Label("Numeric features:", 
                                          layout=widgets.Layout(width="100%", justify_content='flex-end'))
original_candidate_variables_character_widget = widgets.Textarea(value="", 
                                                             tooltip="List. Provide the feature names for the character variables that will be used for modeling.", 
                                                             layout=widgets.Layout(width='500px'))
original_candidate_variables_character_widget_label = widgets.Label("Character features:", 
                                          layout=widgets.Layout(width="100%", justify_content='flex-end'))
target_variable_name_name_widget = widgets.Text(value="", 
                                           tooltip="String. Set the target variable name in the original dataset.", 
                                           layout=widgets.Layout(width='400px'))
target_variable_name_name_widget_label = widgets.Label("Target variable:", 
                                          layout=widgets.Layout(width="100px", justify_content='flex-end'))
weight_variable_name_widget = widgets.Text(value="", 
                                           tooltip="String. Set the weight variable name in the original dataset. If not available, then provide None with quotes or leave it blank.", 
                                           layout=widgets.Layout(width='400px'))
weight_variable_name_widget_label = widgets.Label("Weight variable:", 
                                          layout=widgets.Layout(width="100px", justify_content='flex-end'))
sample_variable_name_widget = widgets.Text(value="", 
                                           tooltip="If this column does not exist, then provide None with quotes or leave it blank.", 
                                           layout=widgets.Layout(width='400px'))
sample_variable_name_widget_label = widgets.Label("Sample variable:", 
                                          layout=widgets.Layout(width="100px", justify_content='flex-end'))
sample_values_widget = widgets.Textarea(value="", 
                                    tooltip="List of strings. Set the sub-sample values that are in the sample_variable_name field, e.f. for train/test data split and/or for different segments. If sample column does not exist, then provide '[None]' (without quotes) or leave it blank.", 
                                    layout=widgets.Layout(width='500px'))
sample_values_widget_label = widgets.Label("Sample values:", 
                                          layout=widgets.Layout(width="100px", justify_content='flex-end'))
amount_variable_name_widget = widgets.Text(value="", 
                                           tooltip="String. Set the monetary loss associated with a delinquent case, if available. If this information does not exist, then provide None with quotes or leave it blank.", 
                                           layout=widgets.Layout(width='400px'))
amount_variable_name_widget_label = widgets.Label("Amount variable:", 
                                          layout=widgets.Layout(width="150px", justify_content='flex-end'))
										  
box_original_candidate_variables_numeric = widgets.HBox([original_candidate_variables_numeric_widget_label, original_candidate_variables_numeric_widget], layout=widgets.Layout(margin="0 25px"))
box_original_candidate_variables_character = widgets.HBox([original_candidate_variables_character_widget_label, original_candidate_variables_character_widget], layout=widgets.Layout(margin="0 25px"))
box_target_variable_name = widgets.HBox([target_variable_name_name_widget_label, target_variable_name_name_widget], layout=widgets.Layout(margin="0 25px"))
box_weight_variable = widgets.HBox([weight_variable_name_widget_label, weight_variable_name_widget], layout=widgets.Layout(margin="0 25px"))
box_amount_variable = widgets.HBox([amount_variable_name_widget_label, amount_variable_name_widget], layout=widgets.Layout(margin="0 25px"))
box_sample_variable = widgets.HBox([sample_variable_name_widget_label, sample_variable_name_widget], layout=widgets.Layout(margin="0 25px"))
box_sample_values = widgets.HBox([sample_values_widget_label, sample_values_widget], layout=widgets.Layout(margin="0 25px"))
box_data_preprocess1 = widgets.HBox([box_original_candidate_variables_numeric, box_original_candidate_variables_character], layout=widgets.Layout(justify_content="center", margin="0 25px"))
box_data_preprocess2 = widgets.HBox([box_target_variable_name, box_weight_variable, box_amount_variable], layout=widgets.Layout(justify_content="center", margin="0 25px"))
box_data_preprocess3 = widgets.HBox([box_sample_variable, box_sample_values], layout=widgets.Layout(justify_content="center", margin="0 25px"))
box_data_preprocess4 = widgets.VBox([box_data_preprocess1, box_data_preprocess2, box_data_preprocess3], layout=widgets.Layout(border="3px solid lightblue"))
#box_data_preprocess4.style = {'background': 'lightblue'}  # Change color as needed
#box_data_preprocess4.add_class = {'widget-hbox-with-bg'}  # Change color as needed

data_preprocessing_button = widgets.Button(description="Execute")
box_data_preprocessing_button = widgets.HBox([data_preprocessing_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def data_preprocessing_clicked(b):
#    global data_full
#    data_full = data_preprocessing(original_candidate_variables_numeric_widget.value, original_candidate_variables_character_widget.value, 
#                                   target_variable_name_name_widget.value, weight_variable_name_widget.value, sample_variable_name_widget.value, 
#                       sample_values_widget.value, amount_variable_name_widget.value)
    data_preprocessing(original_candidate_variables_numeric_widget.value, original_candidate_variables_character_widget.value, 
                                   target_variable_name_name_widget.value, weight_variable_name_widget.value, sample_variable_name_widget.value, 
                       sample_values_widget.value, amount_variable_name_widget.value)

data_preprocessing_button.on_click(data_preprocessing_clicked)
										  
#################################################################################################

text_input_data_quality = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Data quality</b></p>")

data_quality_button = widgets.Button(description="Execute")
box_data_quality_button = widgets.HBox([data_quality_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def data_quality_clicked(b):
    data_quality()

data_quality_button.on_click(data_quality_clicked)

#################################################################################################

text_input_vr_missing = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Remove features with high missing values percentage</b></p>")

missing_variables_to_drop_threshold_widget = widgets.Text(value='0.5', 
                                    tooltip="Features with missing values above this threshold will be dropped",
                                  layout=widgets.Layout(width='100px'))
missing_variables_to_drop_threshold_widget_label = widgets.Label("Missing value threshold:", 
                                            layout=widgets.Layout(width="200px", justify_content='flex-end'))
											
box_missing_variables_to_drop_threshold = widgets.HBox([missing_variables_to_drop_threshold_widget_label, missing_variables_to_drop_threshold_widget], 
                                                       layout=widgets.Layout(justify_content="center", margin="0 25px", border="3px solid lightblue"))

vr_missing_button = widgets.Button(description="Execute", layout=widgets.Layout(justify_content="center"))
box_vr_missing_button = widgets.HBox([vr_missing_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def vr_missing_clicked(b):
#    global excluded_variables
#    excluded_variables = vr_missing(missing_variables_to_drop_threshold_widget.value)
    vr_missing(missing_variables_to_drop_threshold_widget.value)

vr_missing_button.on_click(vr_missing_clicked)

#################################################################################################

text_input_vr_char_many_levels = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Remove character features with many levels</b></p>")

char_many_levels_threshold_widget = widgets.Text(value='10', 
                                    tooltip="Character features with levels above (not equal) this threshold will be dropped",
                                  layout=widgets.Layout(width='100px'))
char_many_levels_threshold_widget_label = widgets.Label("Character level threshold:", 
                                            layout=widgets.Layout(width="200px", justify_content='flex-end'))
											
box_vr_char_many_levels = widgets.HBox([char_many_levels_threshold_widget_label, char_many_levels_threshold_widget], layout=widgets.Layout(justify_content="center",  margin="0 25px", border="3px solid lightblue"))

vr_char_many_levels_button = widgets.Button(description="Execute")
box_vr_char_many_levels_button = widgets.HBox([vr_char_many_levels_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def vr_char_many_levels_clicked(b):
#    global keep_char_vars_levels
#    keep_char_vars_levels = vr_char_many_levels(char_many_levels_threshold_widget.value)
    vr_char_many_levels(char_many_levels_threshold_widget.value)

vr_char_many_levels_button.on_click(vr_char_many_levels_clicked)

#################################################################################################

text_input_vr_outliers = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Outlier replacement for numeric features</b></p>")

iqr_coef_widget = widgets.Text(value='1.5', 
                                    tooltip="The coefficient for Interquantile range can be used to adjust how many outliers to replace; the higher the value the less outliers are replaced.",
                                  layout=widgets.Layout(width='100px'))
iqr_coef_widget_label = widgets.Label("Coefficient for Interquantile range:", 
                                            layout=widgets.Layout(width="200px", justify_content='flex-end'))
											
box_vr_outliers = widgets.HBox([iqr_coef_widget_label, iqr_coef_widget], layout=widgets.Layout(justify_content="center", margin="0 25px", border="3px solid lightblue"))

vr_outliers_button = widgets.Button(description="Execute")
box_vr_outliers_button = widgets.HBox([vr_outliers_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def vr_outliers_clicked(b):
    vr_outliers(iqr_coef_widget.value)

vr_outliers_button.on_click(vr_outliers_clicked)

#################################################################################################

text_input_vr_binary = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Convert categorical features to binary variables</b></p>")

drop_method_widget = widgets.RadioButtons(options=['last', 'first', None], 
                                        description="Drop method:",
                                        tooltip="Specifies which value to drop from the one hot encoder. None will return binary variables for all categories. 'first' will drop the most populated category. 'last' will drop the least populated category.", 
                                        layout=widgets.Layout(width='100px'))
protected_class_widget = widgets.Textarea(value="", 
                                    tooltip="Specifies accepted values for the protected class. For non-protected class conversions leave it blank.", 
                                    layout=widgets.Layout(width='500px'))
protected_class_widget_label = widgets.Label("Protected class:", 
                                          layout=widgets.Layout(width="100px",  justify_content='flex-end'))
										  
box_vr_binary = widgets.HBox([drop_method_widget, protected_class_widget_label, protected_class_widget], layout=widgets.Layout(justify_content="center",  margin="0 25px", border="3px solid lightblue"))

vr_binary_button = widgets.Button(description="Execute")
box_vr_binary_button = widgets.HBox([vr_binary_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def vr_binary_clicked(b):
    vr_binary(drop_method_widget.value, protected_class_widget.value)

vr_binary_button.on_click(vr_binary_clicked)
										  
#################################################################################################

text_input_vr_imputation = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Impute missing values</b></p>")

impute_widget = widgets.Text(value="median", 
                                tooltip="String. Select the strategy to impute the missing values. Current options are 'median', 'mean', or a specific value without quotes, e.g. 0.", 
                                layout=widgets.Layout(width='200px'))
impute_widget_label = widgets.Label("Imputation strategy:", 
                                          layout=widgets.Layout(width="200px",  justify_content='flex-end'))
										  
box_vr_impute = widgets.HBox([impute_widget_label, impute_widget], layout=widgets.Layout(justify_content="center",  margin="0 25px", border="3px solid lightblue"))

vr_impute_button = widgets.Button(description="Execute")
box_vr_impute_button = widgets.HBox([vr_impute_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def vr_impute_clicked(b):
    vr_impute(impute_widget.value)

vr_impute_button.on_click(vr_impute_clicked)

#################################################################################################

text_input_vr_one_val = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Drop numeric variables with only one value</b></p>")

vr_one_val_button = widgets.Button(description="Execute")
box_vr_one_val_button = widgets.HBox([vr_one_val_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def vr_one_val_clicked(b):
    vr_one_val()

vr_one_val_button.on_click(vr_one_val_clicked)

#################################################################################################

text_input_vr_gini = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Drop variables based on low Gini</b></p>")

gini_thresh_widget = widgets.Text(value="0.001", 
                                tooltip="Float. Variables with Gini coefficient below this threshold will be dropped from the reamained of the analysis.", 
                                layout=widgets.Layout(width='100px'))
gini_thresh_widget_label = widgets.Label("Gini threshold:", 
                                          layout=widgets.Layout(width="100px",  justify_content='flex-end'))
										  
box_vr_gini = widgets.HBox([gini_thresh_widget_label, gini_thresh_widget], 
                           layout=widgets.Layout(justify_content="center",  margin="0 25px", border="3px solid lightblue"))

vr_gini_button = widgets.Button(description="Execute")
box_vr_gini_button = widgets.HBox([vr_gini_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def vr_gini_clicked(b):
    vr_gini(gini_thresh_widget.value)

vr_gini_button.on_click(vr_gini_clicked)
										  
#################################################################################################

text_input_vr_corr = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Remove highly correlated features</b></p>")

corr_thresh_widget = widgets.Text(value="0.9", 
                                tooltip="Float. Variables with correlation greater than this threshold will be dropped.", 
                                layout=widgets.Layout(width='100px'))
corr_thresh_widget_label = widgets.Label("Correlation threshold:", 
                                          layout=widgets.Layout(width="200px",  justify_content='flex-end'))

box_vr_corr = widgets.HBox([corr_thresh_widget_label, corr_thresh_widget], 
                           layout=widgets.Layout(justify_content="center",  margin="0 25px", border="3px solid lightblue"))
										  
vr_corr_button = widgets.Button(description="Execute")
box_vr_corr_button = widgets.HBox([vr_corr_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def vr_corr_clicked(b):
    vr_corr(corr_thresh_widget.value)

vr_corr_button.on_click(vr_corr_clicked)
										  
#################################################################################################

text_input_vr_vif = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>VIF elimination</b></p>")

VIF_reduct_widget = widgets.RadioButtons(options=['False', 'True'], 
                                        description="VIF reduction:",
                                        tooltip="Used to determine whether VIF is run after the correlation feature elimination step. True allows the execution of this step, and False skips this step.", 
                                        layout=widgets.Layout(width='100px', margin="0 25px"))
first_vif_thresh_widget = widgets.Text(value="15", 
                                tooltip="Float. Variables with VIF greater than this threshold will be dropped. This paramater is only applicable if 'VIF reduction = True'.", 
                                layout=widgets.Layout(width='100px'))
first_vif_thresh_widget_label = widgets.Label("VIF reduction threshold:", 
                                          layout=widgets.Layout(width="200px",  justify_content='flex-end'))

box_vr_vif = widgets.HBox([VIF_reduct_widget, first_vif_thresh_widget_label, first_vif_thresh_widget], 
                          layout=widgets.Layout(justify_content="center", margin="0 25px", border="3px solid lightblue"))

vr_vif_button = widgets.Button(description="Execute")
box_vr_vif_button = widgets.HBox([vr_vif_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def vr_vif_clicked(b):
    vr_vif(VIF_reduct_widget.value, first_vif_thresh_widget.value)

vr_vif_button.on_click(vr_vif_clicked)
										  
#################################################################################################

text_input_fe_lasso = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Lasso Logistic Regression for feature selection</b></p>")

LogisticRegression_solver_widget = widgets.RadioButtons(options=['liblinear', 'saga'], 
                                        description="LogisticRegression solver:",
                                        tooltip="String. This is the solver argument in sklearn.LogisticRegression. Use 'saga' to reproduce the results, but there might be convergence warnings. Use 'liblinear' to avoid convergence warnings, but the results will not be reproduced.", 
                                        layout=widgets.Layout(width='200px'))
c_mn_widget = widgets.Text(value="0.0001", 
                                tooltip="Float. Minimum value of the C parameter in LogisticRegression (inverse of regularization strength) to perform grid search.", 
                                layout=widgets.Layout(width='100px'))
c_mn_widget_label = widgets.Label("C min:", 
                                          layout=widgets.Layout(width="100px",  justify_content='flex-end'))
c_mx_widget = widgets.Text(value="0.5", 
                                tooltip="Float. Maximum value of the C parameter in LogisticRegression (inverse of regularization strength) to perform grid search.", 
                                layout=widgets.Layout(width='100px'))
c_mx_widget_label = widgets.Label("C max:", 
                                          layout=widgets.Layout(width="100px",  justify_content='flex-end'))
n_widget = widgets.Text(value="10", 
                                tooltip="Int. Number of grid searches between C min and C max.", 
                                layout=widgets.Layout(width='100px'))
n_widget_label = widgets.Label("# of Lasso grid searches:", 
                                          layout=widgets.Layout(width="200px",  justify_content='flex-end'))
second_vif_thresh_widget = widgets.Text(value="5", 
                                tooltip="Float. Variables with VIF greater than this threshold will be dropped.", 
                                layout=widgets.Layout(width='100px'))
second_vif_thresh_widget_label = widgets.Label("Lasso VIF threshold:", 
                                          layout=widgets.Layout(width="200px",  justify_content='flex-end'))
lasso_criterion_widget = widgets.RadioButtons(options=['BIC', 'AIC'], 
                                        description="Lasso selection criterion:",
                                        tooltip="String. User selects which criterion to optimize for feature selection. Options are: 'AIC', 'BIC'.", 
                                        layout=widgets.Layout(width='200px'))
early_stop_widget = widgets.RadioButtons(options=['True', 'False'], 
                                        description="Lasso grid search early stop:",
                                        tooltip="If set to 'True', the grid search stops when the Lasso selection criterion is not improved 5 iterations after the best value. Set to 'False' to execute all the grid searches.", 
                                        layout=widgets.Layout(width='200px', margin="0 25px"))

box_c_mn = widgets.HBox([c_mn_widget_label, c_mn_widget], layout=widgets.Layout(margin="0 25px"))
box_c_mx = widgets.HBox([c_mx_widget_label, c_mx_widget], layout=widgets.Layout(margin="0 25px"))
box_n = widgets.HBox([n_widget_label, n_widget], layout=widgets.Layout(margin="0 25px"))
box_second_vif_thresh = widgets.HBox([second_vif_thresh_widget_label, second_vif_thresh_widget], layout=widgets.Layout(margin="0 25px"))
box_lasso1 = widgets.HBox([LogisticRegression_solver_widget, lasso_criterion_widget, box_second_vif_thresh, early_stop_widget], 
                          layout=widgets.Layout(justify_content="center",  margin="25px 25px"))
box_lasso2 = widgets.HBox([box_c_mn, box_c_mx, box_n], layout=widgets.Layout(justify_content="center",  margin="25px 25px"))
box_lasso3 = widgets.VBox([box_lasso1, box_lasso2], layout=widgets.Layout(justify_content="center",  margin="0 25px", border="3px solid lightblue"))

fe_lasso_button = widgets.Button(description="Execute")
box_fe_lasso_button = widgets.HBox([fe_lasso_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def fe_lasso_clicked(b):
    fe_lasso(LogisticRegression_solver_widget.value, c_mn_widget.value, c_mx_widget.value, n_widget.value, second_vif_thresh_widget.value, \
             lasso_criterion_widget.value, early_stop_widget.value)

fe_lasso_button.on_click(fe_lasso_clicked)

#################################################################################################

text_input_logistic_reg = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Train Logistic Regression model</b></p>")

method_widget = widgets.RadioButtons(options=['backward', 'forward', 'combined', 'None'], 
                                        description="Stepwise method:",
                                        tooltip="String. Possible values: 'backward', 'forward', 'combined'. Anything else will retain the features from Lasso selection.", 
                                        layout=widgets.Layout(width='100px'))
number_of_feat_widget = widgets.Text(value="None", 
                                tooltip="Set to None to allow for feature selection using the p-value. Otherwise, provide the number of features to be used in the final regression model.", 
                                layout=widgets.Layout(width='100px'))
number_of_feat_widget_label = widgets.Label("# of features:", 
                                          layout=widgets.Layout(width="100px",  justify_content='flex-end'))
significance_lvl_widget = widgets.Text(value="0.05", 
                                tooltip="Features with p-value greater than this threshold will not be included in the selected features.", 
                                layout=widgets.Layout(width='100px'))
significance_lvl_widget_label = widgets.Label("Significance level:", 
                                          layout=widgets.Layout(width="100px",  justify_content='flex-end'))

box_number_of_feat = widgets.HBox([number_of_feat_widget_label, number_of_feat_widget], layout=widgets.Layout(margin="0 25px"))
box_significance_lvl = widgets.HBox([significance_lvl_widget_label, significance_lvl_widget], layout=widgets.Layout(margin="0 25px"))
box_logistic_regression = widgets.HBox([method_widget, box_number_of_feat, box_significance_lvl], 
                                       layout=widgets.Layout(justify_content="center",  margin="0 25px", border="3px solid lightblue"))

logistic_reg_button = widgets.Button(description="Execute")
box_logistic_reg_button = widgets.HBox([logistic_reg_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def logistic_reg_clicked(b):
    logistic_reg(method_widget.value, number_of_feat_widget.value, significance_lvl_widget.value)

logistic_reg_button.on_click(logistic_reg_clicked)

#################################################################################################

text_input_logistic_reg_rep = widgets.HTML(value="<p style='color:#1E88E5; text-align:center; font-size:24px;'><b>Produce Logistic Regression reports</b></p>")

top_percent_widget = widgets.Text(value="100", 
                                tooltip="Int. Top percent of data ranked by score selected to produce the reports. Use 100 to select the full dataset. This is applied only to the performance metrics table.", 
                                layout=widgets.Layout(width='100px'))
top_percent_widget_label = widgets.Label("Ranked data top %:", 
                                          layout=widgets.Layout(width="200px",  justify_content='flex-end'))
n_bands_widget = widgets.Text(value="10", 
                                tooltip="Int. Number of quantiles to divide the ranking data based on score.", 
                                layout=widgets.Layout(width='100px'))
n_bands_widget_label = widgets.Label("Lift table # of quantiles:", 
                                          layout=widgets.Layout(width="200px",  justify_content='flex-end'))
cost_fp_widget = widgets.Text(value="None", 
                                tooltip="Cost of blocking a legitimate customer. Leave blank or 'None' without quotes if not available.", 
                                layout=widgets.Layout(width='100px'))
cost_fp_widget_label = widgets.Label("False positive cost:", 
                                          layout=widgets.Layout(width="200px",  justify_content='flex-end'))
cost_fn_widget = widgets.Text(value="None", 
                                tooltip="Cost of missing a fraud/credit risk customer. Leave blank or 'None' without quotes if not available.", 
                                layout=widgets.Layout(width='100px'))
cost_fn_widget_label = widgets.Label("False negative cost:", 
                                          layout=widgets.Layout(width="200px",  justify_content='flex-end'))

box_top_percent = widgets.HBox([top_percent_widget_label, top_percent_widget], layout=widgets.Layout(margin="0 25px"))
box_n_bands = widgets.HBox([n_bands_widget_label, n_bands_widget], layout=widgets.Layout(margin="0 25px"))
box_cost_fp = widgets.HBox([cost_fp_widget_label, cost_fp_widget], layout=widgets.Layout(margin="0 25px"))
box_cost_fn = widgets.HBox([cost_fn_widget_label, cost_fn_widget], layout=widgets.Layout(margin="0 25px"))
box_logistic_regression_rep1 = widgets.HBox([box_top_percent, box_n_bands], 
                          layout=widgets.Layout(justify_content="center",  margin="25px 25px"))
box_logistic_regression_rep2 = widgets.HBox([box_cost_fp, box_cost_fn], 
                          layout=widgets.Layout(justify_content="center",  margin="25px 25px"))
box_logistic_regression_rep = widgets.VBox([box_logistic_regression_rep1, box_logistic_regression_rep2], 
                                           layout=widgets.Layout(justify_content="center",  margin="0 25px", border="3px solid lightblue"))


logistic_reg_rep_button = widgets.Button(description="Execute")
box_logistic_reg_rep_button = widgets.HBox([logistic_reg_rep_button], layout=widgets.Layout(justify_content="center", margin="0 25px"))

def logistic_reg_rep_clicked(b):
    logistic_reg_report(top_percent_widget.value, n_bands_widget.value, cost_fp_widget.value, cost_fn_widget.value)

logistic_reg_rep_button.on_click(logistic_reg_rep_clicked)

#################################################################################################

# Create a spacer widget (adjust the height as needed)
spacer = widgets.Label(value="\n\n\n\n\n\n")  # Adds space

# Define a layout with margin to add space between widgets
#layout = widgets.Layout(margin="0 10px")  # 10px space between widgets

w_layout1 = widgets.Layout(
#    width="200px",
#    height="100px",
    margin="50px",
    padding="100px",
    flex="1 1 auto",
    justify_content="space-around"
)
w_layout2 = widgets.Layout(
#    width="200px",
#    height="100px",
    margin="500px",
#    padding="10px",
    flex="1 1 auto",
    justify_content="center"
)


display(text_input_restart_button, box_restart_clear_button, output99, output98, spacer, 
                text_input_data_load, box_load_data, box_load_data_button, output1, spacer,
                 text_input_data_preprocessing, box_data_preprocess4, box_data_preprocessing_button, output2, spacer, 
                 text_input_data_quality, box_data_quality_button, output3, spacer, 
                 text_input_vr_missing, box_missing_variables_to_drop_threshold, box_vr_missing_button, output4, spacer, 
                 text_input_vr_char_many_levels, box_vr_char_many_levels, box_vr_char_many_levels_button, output5, spacer, 
                 text_input_vr_outliers, box_vr_outliers, box_vr_outliers_button, output6, spacer, 
                 text_input_vr_binary, box_vr_binary, box_vr_binary_button, output7, spacer, 
                 text_input_vr_imputation, box_vr_impute, box_vr_impute_button, output8, spacer,
                 text_input_vr_one_val, box_vr_one_val_button, output9, spacer, 
                 text_input_vr_gini, box_vr_gini, box_vr_gini_button, output10, spacer,
                 text_input_vr_corr, box_vr_corr, box_vr_corr_button, output11, spacer,
                 text_input_vr_vif, box_vr_vif, box_vr_vif_button, output12, spacer, 
                 text_input_fe_lasso, box_lasso3, box_fe_lasso_button, output13, spacer, 
                 text_input_logistic_reg, box_logistic_regression, box_logistic_reg_button, output14, spacer, 
                 text_input_logistic_reg_rep, box_logistic_regression_rep, box_logistic_reg_rep_button, output15, spacer
       )

# Arrange everything in a VBox layout
#ui = widgets.VBox([text_input_data_load, box_load_data, load_data_button, output1, spacer,
#                 text_input_data_preprocessing, box_data_preprocess1, box_data_preprocess2, box_data_preprocess3, data_preprocessing_button, output2, spacer, 
#                 text_input_data_quality, data_quality_button, output3, spacer, 
#                 text_input_vr_missing, box_missing_variables_to_drop_threshold, vr_missing_button, output4, spacer, 
#                 text_input_vr_char_many_levels, box_vr_char_many_levels, vr_char_many_levels_button, output5, spacer, 
#                 text_input_vr_outliers, box_vr_outliers, vr_outliers_button, output6, spacer, 
#                 text_input_vr_binary, box_vr_binary, vr_binary_button, output7, spacer, 
#                 text_input_vr_imputation, box_vr_impute, vr_impute_button, output8, spacer,
#                 text_input_vr_one_val, vr_one_val_button, output9, spacer, 
#                 text_input_vr_gini, box_vr_gini, vr_gini_button, output10, spacer,
#                 text_input_vr_corr, box_vr_corr, vr_corr_button, output11, spacer,
#                 text_input_vr_vif, box_vr_vif, vr_vif_button, output12, spacer, 
#                 text_input_fe_lasso, box_lasso1, box_lasso2, fe_lasso_button, output13, spacer, 
#                 text_input_logistic_reg, logistic_reg_button, output14, spacer
#                  ])
#display(ui)