In [1]:
#*******************************************************************************************
 #
 #  File Name:  charity_analysis.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, charity_analysis.ipynb, reads a csv file,
 #      charity_data.csv, in the folder, resources, and uses Python and neural 
 #      networks methods to determine the optimum neural network model parameters.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  12/02/2023      Initial Development                     Nicholas J. George
 #
 #******************************************************************************************/

import deep_learningx
import pandasx

import logx
logx.create_directory(logx.MODELS_DIRECTORY_PATH)

import charity_constants

import datetime
import io
import json
import pickle
import time

import pandas as pd

from IPython.display import clear_output

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from silence_tensorflow import silence_tensorflow
silence_tensorflow()

import tensorflow as tf

2024-05-07 08:40:06.941869: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
CONSTANT_LOCAL_FILE_NAME = 'charity_analysis.ipynb'


logx.set_log_mode(False)

logx.set_image_mode(False)


logx.begin_program('charity_analysis')

In [3]:
BINNING_DICTIONARY \
    = {'NAME': 1,
       'APPLICATION_TYPE': 156,
       'AFFILIATION': 33,
       'CLASSIFICATION': 6074,
       'USE_CASE': 146,
       'ORGANIZATION': 0,
       'INCOME_AMT': 728,
       'ASK_AMT': 53}

# <br> **Section 1: Extraction**

## **1.1: Read the CSV data into a Pandas DataFrame**

In [4]:
data_type_dictionary \
    = {'EIN': str,
       'NAME':	str,
       'APPLICATION_TYPE': str,
       'AFFILIATION': str,
       'CLASSIFICATION': str,
       'USE_CASE': str,
       'ORGANIZATION': str,
       'STATUS': int,
       'INCOME_AMT': str,
       'SPECIAL_CONSIDERATIONS': str,
       'ASK_AMT': int,
       'IS_SUCCESSFUL': int}

charity_dataframe \
    = pd.read_csv(charity_constants.INPUT_FILE_PATH, dtype = data_type_dictionary)

logx.log_write_object(charity_dataframe)

## **1.2: Display Charity DataFrame**

In [5]:
pandasx.return_formatted_table(charity_dataframe, 'Table 1.2: Charity Data Table')

EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1
10556855,MINORITY ORGAN & TISSUE TRANSPLANT & EDUCATION PROGRAM OF TENNESSEE,T3,Independent,C1200,Preservation,Trust,1,0,N,5000,1
10558440,FRIENDS OF ARTS COUNCIL OF GREATER DENHAM SPRINGS INC,T3,Independent,C1000,Preservation,Trust,1,100000-499999,N,31452,1
10566033,ISRAEL EMERGENCY ALLIANCE,T3,Independent,C2000,Preservation,Trust,1,10M-50M,N,7508025,1
10570430,ARAMCO BRATS INC,T7,Independent,C1000,ProductDev,Trust,1,1-9999,N,94389,1
10571689,INTERNATIONAL ASSOCIATION OF FIRE FIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0


# <br> **Section 2: Preprocessing**

## **2.1: Change ASK_AMT Integers to Categories**

In [6]:
updated_charity_dataframe = charity_dataframe.copy()

updated_charity_dataframe = updated_charity_dataframe.rename(columns = {'ASK_AMT': 'TEMP'})


ask_amt_bins_integer_list \
    = [0, 10000, 25000, 100000, 500000, 1000000, 5000000, 10000000, 50000000, 100000000]

ask_amt_labels_string_list \
    = ['0-9999', '10000-24999', '25000-99999', '100000-499999', '500000-999999', '1M-5M', '5M-10M', '10M-50M', '50M+']


updated_charity_dataframe['ASK_AMT'] \
    = pd.cut \
        (x = updated_charity_dataframe['TEMP'],
         bins = ask_amt_bins_integer_list,
         labels = ask_amt_labels_string_list)

updated_charity_dataframe \
    = updated_charity_dataframe \
        [['EIN', 'NAME', 'APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION', 'USE_CASE',
          'ORGANIZATION', 'STATUS', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS', 'ASK_AMT',
          'IS_SUCCESSFUL', 'TEMP']]


logx.log_write_object(updated_charity_dataframe['ASK_AMT'])

## **2.2: Drop the Non-Beneficial ID Columns**

In [7]:
drop_columns_string_list = ['EIN', 'STATUS', 'SPECIAL_CONSIDERATIONS', 'TEMP']

if updated_charity_dataframe.columns.isin(drop_columns_string_list).any():

    updated_charity_dataframe.drop(drop_columns_string_list, axis = 1, inplace = True)
    

logx.log_write_object(updated_charity_dataframe)

## **2.3: Bin Columns**

### **NAME**

In [8]:
updated_charity_dataframe['NAME'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['NAME'], BINNING_DICTIONARY['NAME'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['NAME'].value_counts().sort_values(ascending = False))

### **APPLICATION_TYPE**

In [9]:
updated_charity_dataframe['APPLICATION_TYPE'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['APPLICATION_TYPE'], BINNING_DICTIONARY['APPLICATION_TYPE'], 'T20')

logx.log_write_object \
    (updated_charity_dataframe['APPLICATION_TYPE'].value_counts().sort_values(ascending = False))

### **AFFILIATION**

In [10]:
updated_charity_dataframe['AFFILIATION'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['AFFILIATION'], BINNING_DICTIONARY['AFFILIATION'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['AFFILIATION'].value_counts().sort_values(ascending = False))

### **CLASSIFICATION**

In [11]:
updated_charity_dataframe['CLASSIFICATION'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['CLASSIFICATION'], BINNING_DICTIONARY['CLASSIFICATION'], 'C2200')

updated_charity_dataframe['CLASSIFICATION'] \
    = updated_charity_dataframe['CLASSIFICATION'].replace('C0', 'C0000')

logx.log_write_object \
    (updated_charity_dataframe['CLASSIFICATION'].value_counts().sort_values(ascending = False))

### **USE_CASE**

In [12]:
updated_charity_dataframe['USE_CASE'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['USE_CASE'], BINNING_DICTIONARY['USE_CASE'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['USE_CASE'].value_counts().sort_values(ascending = False))

### **ORGANIZATION**

In [13]:
updated_charity_dataframe['ORGANIZATION'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['ORGANIZATION'], BINNING_DICTIONARY['ORGANIZATION'], 'CORPORATION')

logx.log_write_object \
    (updated_charity_dataframe['AFFILIATION'].value_counts().sort_values(ascending = False))

### **INCOME_AMT**

In [14]:
updated_charity_dataframe['INCOME_AMT'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['INCOME_AMT'], BINNING_DICTIONARY['INCOME_AMT'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['INCOME_AMT'].value_counts().sort_values(ascending = False))

### **ASK_AMT**

In [15]:
updated_charity_dataframe['ASK_AMT'] \
    = deep_learningx.bin_series \
        (updated_charity_dataframe['ASK_AMT'], BINNING_DICTIONARY['ASK_AMT'], 'OTHER')

logx.log_write_object \
    (updated_charity_dataframe['ASK_AMT'].value_counts().sort_values(ascending = False))

## **2.4: Convert All Features to Uppercase**

In [16]:
updated_charity_dataframe['NAME'] = updated_charity_dataframe['NAME'].str.upper()

updated_charity_dataframe['APPLICATION_TYPE'] = updated_charity_dataframe['APPLICATION_TYPE'].str.upper()

updated_charity_dataframe['AFFILIATION'] = updated_charity_dataframe['AFFILIATION'].str.upper()

updated_charity_dataframe['CLASSIFICATION'] = updated_charity_dataframe['CLASSIFICATION'].str.upper()

updated_charity_dataframe['USE_CASE'] = updated_charity_dataframe['USE_CASE'].str.upper()

updated_charity_dataframe['ORGANIZATION'] = updated_charity_dataframe['ORGANIZATION'].str.upper()

updated_charity_dataframe['INCOME_AMT'] = updated_charity_dataframe['INCOME_AMT'].str.upper()

updated_charity_dataframe['ASK_AMT'] = updated_charity_dataframe['ASK_AMT'].str.upper()

logx.log_write_object(updated_charity_dataframe)

## **2.5: Display Updated DataFrame**

In [17]:
pandasx.return_formatted_table(updated_charity_dataframe, 'Table 2.5: Updated Charity Data Table')

NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,INCOME_AMT,ASK_AMT,IS_SUCCESSFUL
BLUE KNIGHTS MOTORCYCLE CLUB,T10,INDEPENDENT,C1000,PRODUCTDEV,ASSOCIATION,0,0-9999,1
OTHER,T3,INDEPENDENT,C2200,PRESERVATION,CO-OPERATIVE,OTHER,100000-499999,1
OTHER,T5,COMPANYSPONSORED,C2200,PRODUCTDEV,ASSOCIATION,0,0-9999,0
OTHER,T3,COMPANYSPONSORED,C2200,PRESERVATION,TRUST,OTHER,0-9999,1
OTHER,T3,INDEPENDENT,C1000,OTHER,TRUST,100000-499999,100000-499999,1
OTHER,T3,INDEPENDENT,C2200,PRESERVATION,TRUST,0,0-9999,1
OTHER,T3,INDEPENDENT,C1000,PRESERVATION,TRUST,100000-499999,25000-99999,1
OTHER,T3,INDEPENDENT,C2200,PRESERVATION,TRUST,OTHER,5M-10M,1
OTHER,T7,INDEPENDENT,C1000,PRODUCTDEV,TRUST,OTHER,25000-99999,1
INTERNATIONAL ASSOCIATION OF FIRE FIGHTERS,T5,COMPANYSPONSORED,C2200,PRODUCTDEV,ASSOCIATION,0,0-9999,0


# <br> **Section 3: Compile, Train, Evaluate, and Export the Model**

## **3.1: Convert and Split Data Set**

### **Convert Categorical Data to Numeric with `pd.get_dummies`**

In [18]:
dummies_dataframe = pd.get_dummies(updated_charity_dataframe)

logx.log_write_object(dummies_dataframe)

### **Split Data Set into Features and Target Arrays**

In [19]:
y_nparray = dummies_dataframe['IS_SUCCESSFUL'].values

logx.log_write_object(y_nparray)

In [20]:
x_nparray = dummies_dataframe.drop(['IS_SUCCESSFUL'], axis = 1).values

logx.log_write_object(x_nparray)

### **Split X-Y Arrays into Training and Testing Arrays**

In [21]:
x_train_nparray, x_test_nparray, y_train_nparray, y_test_nparray \
    = train_test_split \
        (x_nparray, y_nparray, 
         test_size = charity_constants.NN_TEST_SIZE,
         random_state = charity_constants.NN_RANDOM_STATE)

logx.log_write_object(str(x_train_nparray) + '\n\n')

logx.log_write_object(str(x_test_nparray) + '\n\n')

logx.log_write_object(str(y_train_nparray) + '\n\n')

logx.log_write_object(str(y_test_nparray) + '\n\n')

## **3.2: Create and Compile Model**

### **Set Hyperparameters**

In [22]:
number_input_features_integer = len(x_train_nparray[0])

logx.print_and_log_text \
    ('\033[1m'
     + 'The number of inputs (features) in the model is {:,}.' \
         .format(number_input_features_integer)
     + '\033[0m')

[1mThe number of inputs (features) in the model is 830.[0m


In [23]:
first_layer_units_integer = 57

first_layer_activation_string = 'selu'

logx.print_and_log_text \
    ('\033[1m'
     + 'The number of nodes in the first layer is {:,}.' \
         .format(first_layer_units_integer)
     + '\033[0m')

[1mThe number of nodes in the first layer is 57.[0m


In [24]:
second_layer_units_integer = 20

second_layer_activation_string = 'selu'

logx.print_and_log_text \
    ('\033[1m'
     + 'The number of nodes in the second layer is {:,}.' \
         .format(second_layer_units_integer)
     + '\033[0m')

[1mThe number of nodes in the second layer is 20.[0m


In [25]:
output_layer_units_integer = 1

output_layer_activation_string = 'linear'

logx.print_and_log_text \
    ('\033[1m'
     + 'The number of nodes in the output layer is {:,}.' \
         .format(output_layer_units_integer)
     + '\033[0m')

[1mThe number of nodes in the output layer is 1.[0m


### **Instantiate the Model**

In [26]:
sequential_neural_network_model = tf.keras.models.Sequential()

In [27]:
sequential_neural_network_model.add \
    (tf.keras.layers.Dense \
         (units = first_layer_units_integer,
          activation = first_layer_activation_string,
          input_dim = number_input_features_integer))

sequential_neural_network_model.add(tf.keras.layers.Dropout(0.012928))

sequential_neural_network_model.add \
    (tf.keras.layers.Dense \
         (units = second_layer_units_integer,
          activation = second_layer_activation_string))

sequential_neural_network_model.add(tf.keras.layers.Dropout(0.011161))

sequential_neural_network_model.add \
    (tf.keras.layers.Dense \
         (units = output_layer_units_integer,
          activation = output_layer_activation_string))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### **Model Summary**

In [28]:
sequential_neural_network_model.summary()

### **Compile**

In [29]:
sequential_neural_network_model.compile \
    (loss = 'huber',
     optimizer = tf.keras.optimizers.Adamax(learning_rate = 0.0025815),
     metrics = ['accuracy'])

## **3.3: Fit and Train Model**

In [30]:
earlystopping_callback \
    = tf.keras.callbacks.EarlyStopping  \
        (monitor = 'val_accuracy', 
         mode = 'max', 
         patience = 100, 
         restore_best_weights = True)

sequential_neural_network_model \
    .fit \
        (x_train_nparray, y_train_nparray, 
         batch_size = charity_constants.NN_BATCH_SIZE,
         validation_split = charity_constants.NN_TEST_SIZE,
         epochs = charity_constants.NN_MAX_EPOCHS, 
         verbose = charity_constants.NN_VERBOSE,
         validation_data = (x_test_nparray, y_test_nparray),
         callbacks = [earlystopping_callback])

Epoch 1/1000
[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7372 - loss: 0.0928 - val_accuracy: 0.7915 - val_loss: 0.0733
Epoch 2/1000
[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 940us/step - accuracy: 0.7906 - loss: 0.0749 - val_accuracy: 0.7959 - val_loss: 0.0727
Epoch 3/1000
[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 945us/step - accuracy: 0.7922 - loss: 0.0739 - val_accuracy: 0.8001 - val_loss: 0.0704
Epoch 4/1000
[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7997 - loss: 0.0712 - val_accuracy: 0.7990 - val_loss: 0.0701
Epoch 5/1000
[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 951us/step - accuracy: 0.8007 - loss: 0.0710 - val_accuracy: 0.7999 - val_loss: 0.0747
Epoch 6/1000
[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 945us/step - accuracy: 0.7985 - loss: 0.0716 - val_accuracy: 0.7997 - val_loss: 0.0718
Epoch 7/

<keras.src.callbacks.history.History at 0x15e13ac90>

## **3.4: Evaluate the Model**

In [31]:
model_loss_float, model_accuracy_float \
    = sequential_neural_network_model.evaluate(x_test_nparray, y_test_nparray, verbose = 2)

logx.print_and_log_text \
    (f'\nModel Loss: {round(model_loss_float * 100, 2)}%, '
     + f'Model Accuracy: {round(model_accuracy_float * 100, 2)}%')

215/215 - 0s - 513us/step - accuracy: 0.8141 - loss: 0.0678

Model Loss: 6.78%, Model Accuracy: 81.41%


## **3.5: Save and Export Model**

In [32]:
sequential_neural_network_model.save(charity_constants.MODEL_FILE_PATH)

# <br> **Section 4: Predict Charity Funding Success**

## **4.1: Reload Model**

In [33]:
reloaded_sequential_neural_network_model \
    = tf.keras.models.load_model \
        (charity_constants.MODEL_FILE_PATH)

## **4.2: Predictions**

In [34]:
predictions_nparray \
    = reloaded_sequential_neural_network_model.predict \
        (x_test_nparray).round().astype('int32')

predictions_dataframe = pd.DataFrame(predictions_nparray).idxmax(axis = 1)

predictions_nparray = predictions_dataframe.values

[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 572us/step


In [35]:
y_test_dataframe = pd.DataFrame(y_test_nparray).idxmax(axis = 1)

y_test_nparray = y_test_dataframe.values

## **4.3: Compare Predictions and Actual Values**

In [36]:
comparison_dataframe \
    = pd.DataFrame \
        ({'predictions': predictions_nparray.ravel(), 
          'actual': y_test_nparray})

In [37]:
pandasx.return_formatted_table \
    (comparison_dataframe, 
     'Table 4.3: Model Predictions vs. Actual Values')

predictions,actual
0,0
0,0
0,0
0,0
0,0
0,0
0,0
0,0
0,0
0,0


In [38]:
# logx.end_program()