In [None]:
#*******************************************************************************************
 #
 #  File Name:  charity_hyperparameters_optimization.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, charity_hyperparameters_optimization.ipynb,
 #      reads a csv file, charity_data.csv, in the folder, resources, and uses Python and
 #      neural networks methods to determine the optimum neural network model parameters.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  12/02/2023      Initial Development                     Nicholas J. George
 #
 #******************************************************************************************/

import deep_learningx
import pandasx

import logx
logx.create_directory(logx.MODELS_DIRECTORY_PATH)

import charity_constants

import datetime
import io
import json
import time

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from silence_tensorflow import silence_tensorflow
silence_tensorflow()

import tensorflow as tf

In [None]:
CONSTANT_LOCAL_FILE_NAME = 'charity_hyperparameters_optimization.ipynb'


logx.set_log_mode(False)

logx.set_image_mode(False)


logx.begin_program('charity_hyperparameters_optimization')

In [None]:
BINNING_DICTIONARY \
    = {'NAME': 1,
       'APPLICATION_TYPE': 156,
       'AFFILIATION': 33,
       'CLASSIFICATION': 6074,
       'USE_CASE': 146,
       'ORGANIZATION': 0,
       'INCOME_AMT': 728,
       'ASK_AMT': 53}

# <br> **Section 1: Extraction**

## **1.1: Import and Read Charity Data**

In [None]:
data_type_dictionary \
    = {'EIN': str,
       'NAME':	str,
       'APPLICATION_TYPE': str,
       'AFFILIATION': str,
       'CLASSIFICATION': str,
       'USE_CASE': str,
       'ORGANIZATION': str,
       'STATUS': int,
       'INCOME_AMT': str,
       'SPECIAL_CONSIDERATIONS': str,
       'ASK_AMT': int,
       'IS_SUCCESSFUL': int}

charity_dataframe \
    = pd.read_csv(charity_constants.INPUT_FILE_PATH, dtype = data_type_dictionary)

logx.log_write_object(charity_dataframe)

## **1.2: Display Charity Data**

In [None]:
pandasx.return_formatted_table(charity_dataframe, 'Table 1.2: Charity Data Table')

# <br> **Section 2: Preprocessing**

## **2.1: Change ASK_AMT Integers to Categories**

In [None]:
updated_charity_dataframe = charity_dataframe.copy()

updated_charity_dataframe = updated_charity_dataframe.rename(columns = {'ASK_AMT': 'TEMP'})


ask_amt_bins_integer_list \
    = [0, 9999, 24999, 99999, 499999, 1000000, 4999999, 9999999, 49999999, 99999999]

ask_amt_labels_string_list \
    = ['0-9999', '10000-24999', '25000-99999', '100000-499999', '500000-999999', '1M-5M', '5M-10M', '10M-50M', '50M+']


updated_charity_dataframe['ASK_AMT'] \
    = pd.cut \
        (x = updated_charity_dataframe['TEMP'],
         bins = ask_amt_bins_integer_list,
         labels = ask_amt_labels_string_list)

updated_charity_dataframe \
    = updated_charity_dataframe \
        [['EIN', 'NAME', 'APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION', 'USE_CASE',
          'ORGANIZATION', 'STATUS', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS', 'ASK_AMT',
          'IS_SUCCESSFUL', 'TEMP']]


logx.log_write_object(updated_charity_dataframe['ASK_AMT'])

## **2.2: Drop the Non-Beneficial ID Columns**

In [None]:
drop_columns_string_list = ['EIN', 'STATUS', 'SPECIAL_CONSIDERATIONS', 'TEMP']

if updated_charity_dataframe.columns.isin(drop_columns_string_list).any():

    updated_charity_dataframe.drop(drop_columns_string_list, axis = 1, inplace = True)
    

logx.log_write_object(updated_charity_dataframe)

## **2.3: Bin Columns**

### **NAME**

In [None]:
updated_charity_dataframe['NAME'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['NAME'], BINNING_DICTIONARY['NAME'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['NAME'].value_counts().sort_values(ascending = False))

### **APPLICATION_TYPE**

In [None]:
updated_charity_dataframe['APPLICATION_TYPE'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['APPLICATION_TYPE'], BINNING_DICTIONARY['APPLICATION_TYPE'], 'T20')

logx.log_write_object \
    (updated_charity_dataframe['APPLICATION_TYPE'].value_counts().sort_values(ascending = False))

### **AFFILIATION**

In [None]:
updated_charity_dataframe['AFFILIATION'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['AFFILIATION'], BINNING_DICTIONARY['AFFILIATION'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['AFFILIATION'].value_counts().sort_values(ascending = False))

### **CLASSIFICATION**

In [None]:
updated_charity_dataframe['CLASSIFICATION'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['CLASSIFICATION'], BINNING_DICTIONARY['CLASSIFICATION'], 'C2200')

updated_charity_dataframe['CLASSIFICATION'] \
    = updated_charity_dataframe['CLASSIFICATION'].replace('C0', 'C0000')

logx.log_write_object \
    (updated_charity_dataframe['CLASSIFICATION'].value_counts().sort_values(ascending = False))

### **USE_CASE**

In [None]:
updated_charity_dataframe['USE_CASE'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['USE_CASE'], BINNING_DICTIONARY['USE_CASE'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['USE_CASE'].value_counts().sort_values(ascending = False))

### **ORGANIZATION**

In [None]:
updated_charity_dataframe['ORGANIZATION'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['ORGANIZATION'], BINNING_DICTIONARY['ORGANIZATION'], 'CORPORATION')

logx.log_write_object \
    (updated_charity_dataframe['AFFILIATION'].value_counts().sort_values(ascending = False))

### **INCOME_AMT**

In [None]:
updated_charity_dataframe['INCOME_AMT'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['INCOME_AMT'], BINNING_DICTIONARY['INCOME_AMT'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['INCOME_AMT'].value_counts().sort_values(ascending = False))

### **ASK_AMT**

In [None]:
updated_charity_dataframe['ASK_AMT'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['ASK_AMT'], BINNING_DICTIONARY['ASK_AMT'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['ASK_AMT'].value_counts().sort_values(ascending = False))

## **2.4: Convert All Features to Uppercase**

In [None]:
updated_charity_dataframe['NAME'] = updated_charity_dataframe['NAME'].str.upper()

updated_charity_dataframe['APPLICATION_TYPE'] = updated_charity_dataframe['APPLICATION_TYPE'].str.upper()

updated_charity_dataframe['AFFILIATION'] = updated_charity_dataframe['AFFILIATION'].str.upper()

updated_charity_dataframe['CLASSIFICATION'] = updated_charity_dataframe['CLASSIFICATION'].str.upper()

updated_charity_dataframe['USE_CASE'] = updated_charity_dataframe['USE_CASE'].str.upper()

updated_charity_dataframe['ORGANIZATION'] = updated_charity_dataframe['ORGANIZATION'].str.upper()

updated_charity_dataframe['INCOME_AMT'] = updated_charity_dataframe['INCOME_AMT'].str.upper()

updated_charity_dataframe['ASK_AMT'] = updated_charity_dataframe['ASK_AMT'].str.upper()

logx.log_write_object(updated_charity_dataframe)

## **2.5: Display Updated DataFrame**

In [None]:
pandasx.return_formatted_table(updated_charity_dataframe, 'Table 2.5: Updated Charity Data Table')

# <br> **Section 3: Compile, Train, Evaluate, and Export the Model**

## **3.1: Convert and Split Data Set**

### **Convert Categorical Data to Numeric with `pd.get_dummies`**

In [None]:
dummies_dataframe = pd.get_dummies(updated_charity_dataframe)

logx.log_write_object(dummies_dataframe)

### **Split Data Set into Features and Target Arrays**

In [None]:
y_nparray = dummies_dataframe['IS_SUCCESSFUL'].values

logx.log_write_object(y_nparray)

In [None]:
x_nparray = dummies_dataframe.drop(['IS_SUCCESSFUL'], axis = 1).values

logx.log_write_object(x_nparray)

### **Split X-Y Arrays into Training and Testing Arrays**

In [None]:
x_train_nparray, x_test_nparray, y_train_nparray, y_test_nparray \
    = train_test_split \
        (x_nparray, y_nparray, 
         test_size = charity_constants.NN_TEST_SIZE,
         random_state = charity_constants.NN_RANDOM_STATE)

logx.log_write_object(str(x_train_nparray) + '\n\n')

logx.log_write_object(str(x_test_nparray) + '\n\n')

logx.log_write_object(str(y_train_nparray) + '\n\n')

logx.log_write_object(str(y_test_nparray) + '\n\n')

## **3.2: Define and Set Hyperparameter Ranges**

In [None]:
hyperparameters_dictionary \
    = {'tuner_type': 'hyperband',
       'best_model_count': 5,
       'hyperband_iterations': 2,
       'patience': 42,
       'batch_size': 64,
       'validation_split': 0.2,
       'verbose': 'auto',
       'max_epochs': 1000,
       'restore_best_weights': True,
       'activation_choice_list': ['selu'],
       'input_features': len(x_train_nparray[0]),
       'objective': 'val_accuracy',
       'objective_direction': 'max',
       'first_layer_units_range': (57, 57),
       'first_units_step':1,
       'first_dropout_range': (0.0, 0.01),
       'first_dropout_step': 0.000001,
       'first_dropout_sampling': 'linear',
       'hidden_layers': 1,
       'hidden_layer_units_range_list': \
           [(20, 20), (0, 0), (0, 0), (0, 0), (0, 0)],
       'hidden_units_step': 1,
       'hidden_dropout_range_list': \
           [(0.0, 0.001), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0)],
       'hidden_dropout_step': 0.01,
       'hidden_dropout_sampling': 'linear',
       'learning_rate_range': (0.0025815, 0.0025815),
       'learning_rate_step': 0.000001,
       'learning_sampling': 'linear',
       'output_activation_choice_list': ['linear'],
       'output_layer_units': 1,
       'loss_choice_list': ['huber'],
       'optimizer_choice_list': ['adamax'],
       'metrics': 'accuracy'}

deep_learningx.set_sequential_model_hyperparameters_dictionary(hyperparameters_dictionary)

## **3.3: Find Optimal Model Hyperparameters**

In [None]:
best_models_dictionary_list \
    = deep_learningx.return_best_sequential_model_hyperparameters \
        (x_train_nparray, x_test_nparray, y_train_nparray, y_test_nparray)

In [None]:
list_length_integer = len(best_models_dictionary_list)

for index, model in enumerate(best_models_dictionary_list):

    logx.print_and_log_text \
        (f'MODEL #{index + 1}\n'
         + 'objective: ' + '{:.1f}%\n'.format(model['objective'] * 100) \
         + 'loss: ' + '{:.1f}%\n'.format(model['loss'] * 100) \
         + 'hyperparameters: ' + f"{model['hyperparameters']}\n\n")

In [None]:
# logx.end_program()