In [1]:
#*******************************************************************************************
 #
 #  File Name:  charity_hyperparameters_optimization.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, charity_hyperparameters_optimization.ipynb,
 #      reads a csv file, charity_data.csv, in the folder, resources, and uses Python and
 #      neural networks methods to determine the optimum neural network model parameters.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  12/02/2023      Initial Development                     Nicholas J. George
 #
 #******************************************************************************************/

import deep_learningx
import pandasx

import logx
logx.create_directory(logx.MODELS_DIRECTORY_PATH)

import charity_constants

import datetime
import io
import json
import time

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from silence_tensorflow import silence_tensorflow
silence_tensorflow()

import tensorflow as tf

2024-05-07 08:23:18.039589: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
CONSTANT_LOCAL_FILE_NAME = 'charity_hyperparameters_optimization.ipynb'


logx.set_log_mode(False)

logx.set_image_mode(False)


logx.begin_program('charity_hyperparameters_optimization')

In [3]:
BINNING_DICTIONARY \
    = {'NAME': 1,
       'APPLICATION_TYPE': 156,
       'AFFILIATION': 33,
       'CLASSIFICATION': 6074,
       'USE_CASE': 146,
       'ORGANIZATION': 0,
       'INCOME_AMT': 728,
       'ASK_AMT': 53}

# <br> **Section 1: Extraction**

## **1.1: Read the CSV data into a Pandas DataFrame**

In [4]:
data_type_dictionary \
    = {'EIN': str,
       'NAME':	str,
       'APPLICATION_TYPE': str,
       'AFFILIATION': str,
       'CLASSIFICATION': str,
       'USE_CASE': str,
       'ORGANIZATION': str,
       'STATUS': int,
       'INCOME_AMT': str,
       'SPECIAL_CONSIDERATIONS': str,
       'ASK_AMT': int,
       'IS_SUCCESSFUL': int}

charity_dataframe \
    = pd.read_csv(charity_constants.INPUT_FILE_PATH, dtype = data_type_dictionary)

logx.log_write_object(charity_dataframe)

## **1.2: Display Charity DataFrame**

In [5]:
pandasx.return_formatted_table(charity_dataframe, 'Table 1.2: Charity Data Table')

EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1
10556855,MINORITY ORGAN & TISSUE TRANSPLANT & EDUCATION PROGRAM OF TENNESSEE,T3,Independent,C1200,Preservation,Trust,1,0,N,5000,1
10558440,FRIENDS OF ARTS COUNCIL OF GREATER DENHAM SPRINGS INC,T3,Independent,C1000,Preservation,Trust,1,100000-499999,N,31452,1
10566033,ISRAEL EMERGENCY ALLIANCE,T3,Independent,C2000,Preservation,Trust,1,10M-50M,N,7508025,1
10570430,ARAMCO BRATS INC,T7,Independent,C1000,ProductDev,Trust,1,1-9999,N,94389,1
10571689,INTERNATIONAL ASSOCIATION OF FIRE FIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0


# <br> **Section 2: Preprocessing**

## **2.1: Change ASK_AMT Integers to Categories**

In [6]:
updated_charity_dataframe = charity_dataframe.copy()

updated_charity_dataframe = updated_charity_dataframe.rename(columns = {'ASK_AMT': 'TEMP'})


ask_amt_bins_integer_list \
    = [0, 10000, 25000, 100000, 500000, 1000000, 5000000, 10000000, 50000000, 100000000]

ask_amt_labels_string_list \
    = ['0-9999', '10000-24999', '25000-99999', '100000-499999', '500000-999999', '1M-5M', '5M-10M', '10M-50M', '50M+']


updated_charity_dataframe['ASK_AMT'] \
    = pd.cut \
        (x = updated_charity_dataframe['TEMP'],
         bins = ask_amt_bins_integer_list,
         labels = ask_amt_labels_string_list)

updated_charity_dataframe \
    = updated_charity_dataframe \
        [['EIN', 'NAME', 'APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION', 'USE_CASE',
          'ORGANIZATION', 'STATUS', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS', 'ASK_AMT',
          'IS_SUCCESSFUL', 'TEMP']]


logx.log_write_object(updated_charity_dataframe['ASK_AMT'])

## **2.2: Drop the Non-Beneficial ID Columns**

In [7]:
drop_columns_string_list = ['EIN', 'STATUS', 'SPECIAL_CONSIDERATIONS', 'TEMP']

if updated_charity_dataframe.columns.isin(drop_columns_string_list).any():

    updated_charity_dataframe.drop(drop_columns_string_list, axis = 1, inplace = True)
    

logx.log_write_object(updated_charity_dataframe)

## **2.3: Bin Columns**

### **NAME**

In [8]:
updated_charity_dataframe['NAME'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['NAME'], BINNING_DICTIONARY['NAME'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['NAME'].value_counts().sort_values(ascending = False))

### **APPLICATION_TYPE**

In [9]:
updated_charity_dataframe['APPLICATION_TYPE'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['APPLICATION_TYPE'], BINNING_DICTIONARY['APPLICATION_TYPE'], 'T20')

logx.log_write_object \
    (updated_charity_dataframe['APPLICATION_TYPE'].value_counts().sort_values(ascending = False))

### **AFFILIATION**

In [10]:
updated_charity_dataframe['AFFILIATION'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['AFFILIATION'], BINNING_DICTIONARY['AFFILIATION'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['AFFILIATION'].value_counts().sort_values(ascending = False))

### **CLASSIFICATION**

In [11]:
updated_charity_dataframe['CLASSIFICATION'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['CLASSIFICATION'], BINNING_DICTIONARY['CLASSIFICATION'], 'C2200')

updated_charity_dataframe['CLASSIFICATION'] \
    = updated_charity_dataframe['CLASSIFICATION'].replace('C0', 'C0000')

logx.log_write_object \
    (updated_charity_dataframe['CLASSIFICATION'].value_counts().sort_values(ascending = False))

### **USE_CASE**

In [12]:
updated_charity_dataframe['USE_CASE'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['USE_CASE'], BINNING_DICTIONARY['USE_CASE'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['USE_CASE'].value_counts().sort_values(ascending = False))

### **ORGANIZATION**

In [13]:
updated_charity_dataframe['ORGANIZATION'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['ORGANIZATION'], BINNING_DICTIONARY['ORGANIZATION'], 'CORPORATION')

logx.log_write_object \
    (updated_charity_dataframe['AFFILIATION'].value_counts().sort_values(ascending = False))

### **INCOME_AMT**

In [14]:
updated_charity_dataframe['INCOME_AMT'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['INCOME_AMT'], BINNING_DICTIONARY['INCOME_AMT'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['INCOME_AMT'].value_counts().sort_values(ascending = False))

### **ASK_AMT**

In [15]:
updated_charity_dataframe['ASK_AMT'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['ASK_AMT'], BINNING_DICTIONARY['ASK_AMT'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['ASK_AMT'].value_counts().sort_values(ascending = False))

## **2.4: Convert All Features to Uppercase**

In [16]:
updated_charity_dataframe['NAME'] = updated_charity_dataframe['NAME'].str.upper()

updated_charity_dataframe['APPLICATION_TYPE'] = updated_charity_dataframe['APPLICATION_TYPE'].str.upper()

updated_charity_dataframe['AFFILIATION'] = updated_charity_dataframe['AFFILIATION'].str.upper()

updated_charity_dataframe['CLASSIFICATION'] = updated_charity_dataframe['CLASSIFICATION'].str.upper()

updated_charity_dataframe['USE_CASE'] = updated_charity_dataframe['USE_CASE'].str.upper()

updated_charity_dataframe['ORGANIZATION'] = updated_charity_dataframe['ORGANIZATION'].str.upper()

updated_charity_dataframe['INCOME_AMT'] = updated_charity_dataframe['INCOME_AMT'].str.upper()

updated_charity_dataframe['ASK_AMT'] = updated_charity_dataframe['ASK_AMT'].str.upper()

logx.log_write_object(updated_charity_dataframe)

## **2.5: Display Updated DataFrame**

In [17]:
pandasx.return_formatted_table(updated_charity_dataframe, 'Table 2.5: Updated Charity Data Table')

NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,INCOME_AMT,ASK_AMT,IS_SUCCESSFUL
BLUE KNIGHTS MOTORCYCLE CLUB,T10,INDEPENDENT,C1000,PRODUCTDEV,ASSOCIATION,0,0-9999,1
OTHER,T3,INDEPENDENT,C2200,PRESERVATION,CO-OPERATIVE,OTHER,100000-499999,1
OTHER,T5,COMPANYSPONSORED,C2200,PRODUCTDEV,ASSOCIATION,0,0-9999,0
OTHER,T3,COMPANYSPONSORED,C2200,PRESERVATION,TRUST,OTHER,0-9999,1
OTHER,T3,INDEPENDENT,C1000,OTHER,TRUST,100000-499999,100000-499999,1
OTHER,T3,INDEPENDENT,C2200,PRESERVATION,TRUST,0,0-9999,1
OTHER,T3,INDEPENDENT,C1000,PRESERVATION,TRUST,100000-499999,25000-99999,1
OTHER,T3,INDEPENDENT,C2200,PRESERVATION,TRUST,OTHER,5M-10M,1
OTHER,T7,INDEPENDENT,C1000,PRODUCTDEV,TRUST,OTHER,25000-99999,1
INTERNATIONAL ASSOCIATION OF FIRE FIGHTERS,T5,COMPANYSPONSORED,C2200,PRODUCTDEV,ASSOCIATION,0,0-9999,0


# <br> **Section 3: Compile, Train, Evaluate, and Export the Model**

## **3.1: Convert and Split Data Set**

### **Convert Categorical Data to Numeric with `pd.get_dummies`**

In [18]:
dummies_dataframe = pd.get_dummies(updated_charity_dataframe)

logx.log_write_object(dummies_dataframe)

### **Split Data Set into Features and Target Arrays**

In [19]:
y_nparray = dummies_dataframe['IS_SUCCESSFUL'].values

logx.log_write_object(y_nparray)

In [20]:
x_nparray = dummies_dataframe.drop(['IS_SUCCESSFUL'], axis = 1).values

logx.log_write_object(x_nparray)

### **Split X-Y Arrays into Training and Testing Arrays**

In [21]:
x_train_nparray, x_test_nparray, y_train_nparray, y_test_nparray \
    = train_test_split \
        (x_nparray, y_nparray, 
         test_size = charity_constants.NN_TEST_SIZE,
         random_state = charity_constants.NN_RANDOM_STATE)

logx.log_write_object(str(x_train_nparray) + '\n\n')

logx.log_write_object(str(x_test_nparray) + '\n\n')

logx.log_write_object(str(y_train_nparray) + '\n\n')

logx.log_write_object(str(y_test_nparray) + '\n\n')

## **3.2: Define and Set Hyperparameter Ranges**

In [22]:
hyperparameters_dictionary \
    = {'tuner_type': 'grid_search',
       'best_model_count': 5,
       'hyperband_iterations': 2,
       'patience': 42,
       'batch_size': 64,
       'validation_split': 0.2,
       'verbose': 'auto',
       'max_epochs': 1000,
       'restore_best_weights': True,
       'activation_choice_list': ['selu'],
       'input_features': len(x_train_nparray[0]),
       'objective': 'val_accuracy',
       'objective_direction': 'max',
       'first_layer_units_range': (57, 57),
       'first_units_step':1,
       'first_dropout_range': (0.012928, 0.012928),
       'first_dropout_step': 0.000001,
       'first_dropout_sampling': 'linear',
       'hidden_layers': 1,
       'hidden_layer_units_range_list': [(20, 20)],
       'hidden_units_step': 1,
       'hidden_dropout_range_list': [(0.011151, 0.011171)],
       'hidden_dropout_step': 0.000001,
       'hidden_dropout_sampling': 'linear',
       'learning_rate_range': (0.0025815, 0.0025815),
       'learning_rate_step': 0.000001,
       'learning_sampling': 'linear',
       'output_activation_choice_list': ['linear'],
       'output_layer_units': 1,
       'loss_choice_list': ['huber'],
       'optimizer_choice_list': ['adamax'],
       'metrics': 'accuracy'}

deep_learningx.set_sequential_model_hyperparameters_dictionary(hyperparameters_dictionary)

## **3.3: Find Optimal Model Hyperparameters**

In [None]:
best_models_dictionary_list \
    = deep_learningx.return_best_sequential_model_hyperparameters \
        (x_train_nparray, x_test_nparray, y_train_nparray, y_test_nparray)

Trial 8 Complete [00h 01m 17s]
val_accuracy: 0.8123906850814819

Best val_accuracy So Far: 0.8141399621963501
Total elapsed time: 00h 07m 35s

Search: Running Trial #9

Value             |Best Value So Far |Hyperparameter
selu              |selu              |activation
57                |57                |units_1
0.012928          |0.012928          |first_dropout_rate
0.011159          |0.011151          |hidden_dropout_rate
linear            |linear            |output_activation
huber             |huber             |loss
0.0025815         |0.0025815         |learning_rate
adamax            |adamax            |optimizer

Epoch 1/1000
[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7296 - loss: 0.0940 - val_accuracy: 0.7978 - val_loss: 0.0739
Epoch 2/1000
[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7951 - loss: 0.0747 - val_accuracy: 0.7997 - val_loss: 0.0727
Epoch 3/1000
[1m429/429[0m [32m━━━━━━

In [None]:
list_length_integer = len(best_models_dictionary_list)

for index, model in enumerate(best_models_dictionary_list):

    logx.print_and_log_text \
        (f'MODEL #{index + 1}\n'
         + 'objective: ' + '{:.1f}%\n'.format(model['objective'] * 100) \
         + 'loss: ' + '{:.1f}%\n'.format(model['loss'] * 100) \
         + 'hyperparameters: ' + f"{model['hyperparameters']}\n\n")

In [None]:
# logx.end_program()