<a href="https://colab.research.google.com/github/njgeorge000158/Charity-Funding-Analysis-with-Neural-Networks-using-TensorFlow/blob/main/alphabet_soup_charity_optimization_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#*******************************************************************************************
 #
 #  File Name:  alphabet_soup_charity_optimization_search.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, alphabet_soup_charity_optimization_search.ipynb,
 #      reads a csv file, CharityData.csv, in the Resources folder, and uses Python and
 #      neural networks methods to determine the optimum neural network model parameters.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  12/02/2023      Initial Development                     Nicholas J. George
 #
 #******************************************************************************************/

from google.colab import drive
drive.mount('/content/drive/')

import sys
sys.path.insert(0,'/content/drive/My Drive/alphabet_soup_charity_analysis')

!pip install -q dataframe_image
!pip install hvplot
!pip install silence-tensorflow
!pip install keras-tuner

import deep_learning_functions
import log_subroutines
import pandas_process_functions

import datetime
import io
import json
import time

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from silence_tensorflow import silence_tensorflow
silence_tensorflow()

import tensorflow as tf

Mounted at /content/drive/
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m398.1/398.1 kB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting hvplot
  Downloading hvplot-0.9.2-py2.py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: hvplot
Successfully installed hvplot-0.9.2
Collecting silence-tensorflow
  Downloading silence_tensorflow-1.2.1.tar.gz (3.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting support_developer (from silence-tensorflow)
  Downloading support_developer-1.0.5.tar.gz (4.9 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: silence-tensorflow, support_developer
  Building wheel for silence-tensorflow (setup.py) ... [?25l[?25hdone
  Create

In [2]:
CONSTANT_LOCAL_FILE_NAME \
    = 'alphabet_soup_charity_optimization_search.ipynb'

CONSTANT_CHARITY_DATA_CSV_FILE_PATH \
    = '/content/drive/My Drive/alphabet_soup_charity_analysis/resources/charity_data.csv'

CONSTANT_OUTPUT_FILE_PATH \
    = '/content/drive/My Drive/alphabet_soup_charity_analysis/resources/charity_data_optimization_search.txt'

# <br> **Section 1: Extraction**

## **1.1: Import and Read Charity Data**

In [3]:
data_type_dictionary \
    = {'EIN': str,
       'NAME':	str,
       'APPLICATION_TYPE': str,
       'AFFILIATION': str,
       'CLASSIFICATION': str,
       'USE_CASE': str,
       'ORGANIZATION': str,
       'STATUS': int,
       'INCOME_AMT': str,
       'SPECIAL_CONSIDERATIONS': str,
       'ASK_AMT': int,
       'IS_SUCCESSFUL': int}

charity_dataframe \
    = pd.read_csv(CONSTANT_CHARITY_DATA_CSV_FILE_PATH, dtype = data_type_dictionary)

log_subroutines.log_write_object(charity_dataframe)

## **1.2: Display Charity Data**

In [4]:
pandas_process_functions \
    .return_formatted_table \
        (charity_dataframe,
         'Table 1.2.1: Charity Data Table')

EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1
10556855,MINORITY ORGAN & TISSUE TRANSPLANT & EDUCATION PROGRAM OF TENNESSEE,T3,Independent,C1200,Preservation,Trust,1,0,N,5000,1
10558440,FRIENDS OF ARTS COUNCIL OF GREATER DENHAM SPRINGS INC,T3,Independent,C1000,Preservation,Trust,1,100000-499999,N,31452,1
10566033,ISRAEL EMERGENCY ALLIANCE,T3,Independent,C2000,Preservation,Trust,1,10M-50M,N,7508025,1
10570430,ARAMCO BRATS INC,T7,Independent,C1000,ProductDev,Trust,1,1-9999,N,94389,1
10571689,INTERNATIONAL ASSOCIATION OF FIRE FIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0


# <br> **Section 2: Preprocessing**

## **2.1: Drop the Non-Beneficial ID Columns**

In [5]:
charity_dataframe.drop(['EIN', 'STATUS', 'SPECIAL_CONSIDERATIONS'], axis = 1, inplace = True)

log_subroutines.log_write_object(charity_dataframe)

In [6]:
pandas_process_functions \
    .return_formatted_table \
        (charity_dataframe,
         'Table 1.2.2: Modified Charity Data Table')

NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,INCOME_AMT,ASK_AMT,IS_SUCCESSFUL
BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,0,5000,1
AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1-9999,108590,1
ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,0,5000,0
SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,10000-24999,6692,1
GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,100000-499999,142590,1
MINORITY ORGAN & TISSUE TRANSPLANT & EDUCATION PROGRAM OF TENNESSEE,T3,Independent,C1200,Preservation,Trust,0,5000,1
FRIENDS OF ARTS COUNCIL OF GREATER DENHAM SPRINGS INC,T3,Independent,C1000,Preservation,Trust,100000-499999,31452,1
ISRAEL EMERGENCY ALLIANCE,T3,Independent,C2000,Preservation,Trust,10M-50M,7508025,1
ARAMCO BRATS INC,T7,Independent,C1000,ProductDev,Trust,1-9999,94389,1
INTERNATIONAL ASSOCIATION OF FIRE FIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,0,5000,0


## **2.2: Determine the Number of Unique Values in Each Column**

In [7]:
unique_value_count_each_column_series = charity_dataframe.nunique()

log_subroutines.log_write_object(unique_value_count_each_column_series)

In [8]:
log_subroutines.print_and_log_text(str(unique_value_count_each_column_series))

NAME                19568
APPLICATION_TYPE       17
AFFILIATION             6
CLASSIFICATION         71
USE_CASE                5
ORGANIZATION            4
INCOME_AMT              9
ASK_AMT              8747
IS_SUCCESSFUL           2
dtype: int64


## **2.3: Binning Setup**

### **Initialize Lists**

In [9]:
column_series_list = []

counts_integer_list_list = []

column_name_string_list = []

### **NAME**

In [10]:
name_series, name_counts_integer_list \
    = deep_learning_functions.return_feature_series_and_sorted_count_list \
        (charity_dataframe, 'NAME')

column_series_list.append(name_series)

counts_integer_list_list.append(name_counts_integer_list)

column_name_string_list.append('NAME')

log_subroutines.print_and_log_text(str(name_series))

NAME
PARENT BOOSTER USA INC                                              1260
TOPS CLUB INC                                                        765
UNITED STATES BOWLING CONGRESS INC                                   700
WASHINGTON STATE UNIVERSITY                                          492
AMATEUR ATHLETIC UNION OF THE UNITED STATES INC                      408
                                                                    ... 
POM-POMS CASTLE                                                        1
FOUNDATION FOR INTERVENTIONAL RADIOLOGICAL RESEARCH SUPP & TEACH       1
DARE U TO CARE OUTREACH MINISTRY                                       1
GEFFEN PLAYHOUSE INC                                                   1
WATERHOUSE CHARITABLE TR                                               1
Name: NAME, Length: 19568, dtype: int64


### **APPLICATION_TYPE**

In [11]:
application_type_series, application_type_counts_integer_list \
    = deep_learning_functions.return_feature_series_and_sorted_count_list \
        (charity_dataframe, 'APPLICATION_TYPE')

column_series_list.append(application_type_series)

counts_integer_list_list.append(application_type_counts_integer_list)

column_name_string_list.append('APPLICATION_TYPE')

log_subroutines.print_and_log_text(str(application_type_series))

APPLICATION_TYPE
T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: APPLICATION_TYPE, dtype: int64


### **AFFILIATION**

In [12]:
affiliation_series, affiliation_counts_integer_list \
    = deep_learning_functions.return_feature_series_and_sorted_count_list \
        (charity_dataframe, 'AFFILIATION')

column_series_list.append(affiliation_series)

counts_integer_list_list.append(affiliation_counts_integer_list)

column_name_string_list.append('AFFILIATION')

log_subroutines.print_and_log_text(str(affiliation_series))

AFFILIATION
Independent         18480
CompanySponsored    15705
Family/Parent          64
National               33
Regional               13
Other                   4
Name: AFFILIATION, dtype: int64


### **CLASSIFICATION**

In [13]:
classification_series, classification_counts_integer_list \
    = deep_learning_functions.return_feature_series_and_sorted_count_list \
        (charity_dataframe, 'CLASSIFICATION')

column_series_list.append(classification_series)

counts_integer_list_list.append(classification_counts_integer_list)

column_name_string_list.append('CLASSIFICATION')

log_subroutines.print_and_log_text(str(classification_series.head(50)))

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
C7000      777
C1700      287
C4000      194
C5000      116
C1270      114
C2700      104
C2800       95
C7100       75
C1300       58
C1280       50
C1230       36
C1400       34
C2300       32
C7200       32
C1240       30
C8000       20
C7120       18
C1500       16
C1800       15
C6000       15
C1250       14
C8200       11
C1278       10
C1238       10
C1235        9
C1237        9
C7210        7
C2400        6
C1720        6
C4100        6
C1600        5
C1257        5
C1260        3
C0           3
C2710        3
C3200        2
C1234        2
C1246        2
C1267        2
C1256        2
C1728        1
C1570        1
C1283        1
C2380        1
C1732        1
Name: CLASSIFICATION, dtype: int64


### **USE CASE**

In [14]:
use_case_series, use_case_counts_integer_list \
    = deep_learning_functions.return_feature_series_and_sorted_count_list \
        (charity_dataframe, 'USE_CASE')

column_series_list.append(use_case_series)

counts_integer_list_list.append(use_case_counts_integer_list)

column_name_string_list.append('USE_CASE')

log_subroutines.print_and_log_text(str(use_case_series))

USE_CASE
Preservation     28095
ProductDev        5671
CommunityServ      384
Heathcare          146
Other                3
Name: USE_CASE, dtype: int64


### **ORGANIZATION**

In [15]:
organization_series, organization_counts_integer_list \
    = deep_learning_functions.return_feature_series_and_sorted_count_list \
        (charity_dataframe, 'ORGANIZATION')

column_series_list.append(organization_series)

counts_integer_list_list.append(organization_counts_integer_list)

column_name_string_list.append('ORGANIZATION')

log_subroutines.print_and_log_text(str(organization_series))

ORGANIZATION
Trust           23515
Association     10255
Co-operative      486
Corporation        43
Name: ORGANIZATION, dtype: int64


### **INCOME_AMT**

In [16]:
income_amt_series, income_amt_counts_integer_list \
    = deep_learning_functions.return_feature_series_and_sorted_count_list \
        (charity_dataframe, 'INCOME_AMT')

column_series_list.append(income_amt_series)

counts_integer_list_list.append(income_amt_counts_integer_list)

column_name_string_list.append('INCOME_AMT')

log_subroutines.print_and_log_text(str(income_amt_series))

INCOME_AMT
0                24388
25000-99999       3747
100000-499999     3374
1M-5M              955
1-9999             728
10000-24999        543
10M-50M            240
5M-10M             185
50M+               139
Name: INCOME_AMT, dtype: int64


### **ASK_AMT**

In [17]:
ask_amt_series, ask_amt_counts_integer_list \
    = deep_learning_functions.return_feature_series_and_sorted_count_list \
        (charity_dataframe, 'ASK_AMT')

column_series_list.append(ask_amt_series)

counts_integer_list_list.append(ask_amt_counts_integer_list)

column_name_string_list.append('ASK_AMT')

log_subroutines.print_and_log_text(str(ask_amt_series))

ASK_AMT
5000        25398
15583           3
63981           3
6725            3
10478           3
            ...  
11336           1
1270766         1
121619          1
46915           1
36500179        1
Name: ASK_AMT, Length: 8747, dtype: int64


# <br> **Section 3: Optimization**

In [None]:
best_model_dictionary = {'accuracy': 0.0}

start_time_float = time.time()


name_count_integer = 2

application_type_count_integer = 157

affiliation_count_integer = 65

classification_count_integer = 96

use_case_count_integer = 147

organization_count_integer = 0

income_amt_count_integer = 0

ask_amt_count_integer = 4


temp_charity_dataframe = charity_dataframe.copy()


count_integer_list \
    = [name_count_integer,
       application_type_count_integer,
       affiliation_count_integer,
       classification_count_integer,
       use_case_count_integer,
       organization_count_integer,
       income_amt_count_integer,
       ask_amt_count_integer]

max_count_integer_list \
    = [max(name_counts_integer_list),
       max(application_type_counts_integer_list),
       max(affiliation_counts_integer_list),
       max(classification_counts_integer_list),
       max(use_case_counts_integer_list),
       max(organization_counts_integer_list),
       max(income_amt_counts_integer_list),
       max(ask_amt_counts_integer_list)]


temp_charity_dataframe \
    = deep_learning_functions.return_binned_dataframe \
        (temp_charity_dataframe,
         column_series_list,
         counts_integer_list_list,
         column_name_string_list,
         count_integer_list)


x_train_scaled_nparray, \
x_test_scaled_nparray, \
y_train_nparray, \
y_test_nparray \
    = deep_learning_functions.return_neural_network_xy_parameters \
        (temp_charity_dataframe, 'IS_SUCCESSFUL')


deep_learning_functions.set_features_integer(len(x_train_scaled_nparray[0]))

temp_dictionary \
    = deep_learning_functions.return_best_model_dictionary \
        (x_train_scaled_nparray, \
         x_test_scaled_nparray, \
         y_train_nparray, \
         y_test_nparray,
         'val_accuracy',
         100, 2, 3)


temp_dictionary['count_list'] = count_integer_list

temp_dictionary['max_count_list'] = max_count_integer_list


log_subroutines.print_and_log_text(temp_dictionary)


if temp_dictionary['accuracy'] >= best_model_dictionary['accuracy']:

    best_model_dictionary = temp_dictionary

    with open(CONSTANT_OUTPUT_FILE_PATH, 'a') as output_file:

        output_file.write('\n\n')

        output_file.write(str(count_integer_list))

        output_file.write('\n\n')

        output_file.write(str(best_model_dictionary))

Trial 251 Complete [00h 06m 24s]
val_accuracy: 0.8065305948257446

Best val_accuracy So Far: 0.8111953139305115
Total elapsed time: 03h 02m 03s

Search: Running Trial #252

Value             |Best Value So Far |Hyperparameter
relu              |relu              |activation
29                |53                |first_units
5                 |1                 |num_layers
75                |20                |units_0
0.0046911         |0.00088933        |learning_rate
96                |60                |units_1
86                |44                |units_2
28                |47                |units_3
35                |91                |units_4
100               |34                |tuner/epochs
0                 |12                |tuner/initial_epoch
0                 |2                 |tuner/bracket
0                 |1                 |tuner/round

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/10

# <br> **Section 4: Export Best Model Parameters to File**

In [None]:
log_subroutines.print_and_log_text(str(best_model_dictionary))

In [None]:
with open(CONSTANT_OUTPUT_FILE_PATH, 'a') as output_file:

    output_file.write('\n\n')

    output_file.write(json.dumps(best_model_dictionary))