<a href="https://colab.research.google.com/github/njgeorge000158/deep-learning-challenge/blob/main/AlphabetSoupCharityOptimizationSearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#*******************************************************************************************
 #
 #  File Name:  AlphabetSoupCharityOptimizationSearch.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, AlphabetSoupCharityOptimizationSearch.ipynb,
 #      reads a csv file, CharityData.csv, in the Resources folder, and uses Python and
 #      neural networks methods to determine the optimum neural network model parameters.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  12/02/2023      Initial Development                     N. James George
 #
 #******************************************************************************************/

from google.colab import drive
drive.mount('/content/drive/')

import sys
sys.path.insert(0,'/content/drive/My Drive/deep-learning-challenge')

!pip install -q dataframe_image
!pip install aspose-words
!pip install hvplot
!pip install silence-tensorflow
!pip install keras-tuner

import PyConstants as constant
import PyFunctions as function
import PyLogConstants as log_constant
import PyLogFunctions as log_function
import PyLogSubRoutines as log_subroutine
import PySubRoutines as subroutine

import AlphabetSoupCharityOptimizationSearchFunctions as local_function

import datetime
import io
import json
import time

import pandas as pd

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
CONSTANT_LOCAL_FILE_NAME \
    = 'AlphabetSoupCharity.ipynb'

CONSTANT_CHARITY_DATA_CSV_FILE_PATH \
    = '/content/drive/My Drive/deep-learning-challenge/Resources/CharityData.csv'

CONSTANT_OUTPUT_FILE_PATH \
    = '/content/drive/My Drive/deep-learning-challenge/Resources/CharityDataOptimizationSearch.txt'

log_subroutine \
    .BeginProgramExecution \
        ('AlphabetSoupCharity')

# <br> **Section 1: Data Acquistion**

## **1.1: Import and Read Charity Data**

In [3]:
charityDataFrame \
    = function. \
        ReturnCSVFileAsDataFrame \
            (CONSTANT_CHARITY_DATA_CSV_FILE_PATH)

log_function \
    .DebugReturnObjectWriteObject \
        (charityDataFrame)

## **1.2: Display Charity Data**

In [4]:
captionString \
    = 'Table 1.2.1: Charity Data Table'

currentStylerObject \
    = function \
        .ReturnStylerObjectStandardFormat \
            (charityDataFrame.head(12),
             captionString)

log_function \
    .ReturnStylerObjectSavePNGImage \
        (currentStylerObject,
         captionString)

EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1
10556855,MINORITY ORGAN & TISSUE TRANSPLANT & EDUCATION PROGRAM OF TENNESSEE,T3,Independent,C1200,Preservation,Trust,1,0,N,5000,1
10558440,FRIENDS OF ARTS COUNCIL OF GREATER DENHAM SPRINGS INC,T3,Independent,C1000,Preservation,Trust,1,100000-499999,N,31452,1
10566033,ISRAEL EMERGENCY ALLIANCE,T3,Independent,C2000,Preservation,Trust,1,10M-50M,N,7508025,1
10570430,ARAMCO BRATS INC,T7,Independent,C1000,ProductDev,Trust,1,1-9999,N,94389,1
10571689,INTERNATIONAL ASSOCIATION OF FIRE FIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0


# <br> **Section 2: Preprocessing**

## **2.1: Drop the Non-Beneficial ID Columns**

In [5]:
charityDataFrame \
    .drop \
        (['EIN', 'STATUS', 'SPECIAL_CONSIDERATIONS'],
         axis = 1,
         inplace = True)

log_function \
    .DebugReturnObjectWriteObject \
        (charityDataFrame)

In [6]:
captionString \
    = 'Table 1.2.2: Modified Charity Data Table'

currentStylerObject \
    = function \
        .ReturnStylerObjectStandardFormat \
            (charityDataFrame.head(12),
             captionString)

log_function \
    .ReturnStylerObjectSavePNGImage \
        (currentStylerObject,
         captionString)

NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,INCOME_AMT,ASK_AMT,IS_SUCCESSFUL
BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,0,5000,1
AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1-9999,108590,1
ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,0,5000,0
SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,10000-24999,6692,1
GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,100000-499999,142590,1
MINORITY ORGAN & TISSUE TRANSPLANT & EDUCATION PROGRAM OF TENNESSEE,T3,Independent,C1200,Preservation,Trust,0,5000,1
FRIENDS OF ARTS COUNCIL OF GREATER DENHAM SPRINGS INC,T3,Independent,C1000,Preservation,Trust,100000-499999,31452,1
ISRAEL EMERGENCY ALLIANCE,T3,Independent,C2000,Preservation,Trust,10M-50M,7508025,1
ARAMCO BRATS INC,T7,Independent,C1000,ProductDev,Trust,1-9999,94389,1
INTERNATIONAL ASSOCIATION OF FIRE FIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,0,5000,0


## **2.2: Determine the Number of Unique Values in Each Column**

In [7]:
numberOfUniqueValuesInEachColumnSeries \
    = charityDataFrame \
        .nunique()

log_function \
    .DebugReturnObjectWriteObject \
        (numberOfUniqueValuesInEachColumnSeries)

In [8]:
log_subroutine \
    .PrintAndLogWriteText \
        (str(numberOfUniqueValuesInEachColumnSeries))

NAME                19568
APPLICATION_TYPE       17
AFFILIATION             6
CLASSIFICATION         71
USE_CASE                5
ORGANIZATION            4
INCOME_AMT              9
ASK_AMT              8747
IS_SUCCESSFUL           2
dtype: int64


## **2.3: Binning Setup**

### **Initialize Lists**

In [9]:
columnSeriesList \
    = []

countsIntegerListList \
    = []

columnNameStringList \
    = []

### **NAME**

In [10]:
nameSeries, nameCountsIntegerList \
    = local_function \
        .ReturnColumnSeriesAndSortedValueCountList \
            (charityDataFrame, 'NAME')


columnSeriesList \
    .append(nameSeries)

countsIntegerListList \
    .append(nameCountsIntegerList)

columnNameStringList \
    .append('NAME')


log_subroutine \
    .PrintAndLogWriteText \
        (str(nameSeries.tolist()))

[1260, 765, 700, 492, 408, 368, 331, 313, 293, 287, 277, 266, 210, 206, 197, 166, 158, 154, 151, 144, 140, 136, 131, 127, 107, 107, 106, 105, 104, 103, 99, 97, 92, 91, 87, 84, 76, 73, 73, 69, 60, 58, 57, 57, 56, 55, 54, 52, 52, 50, 50, 49, 46, 46, 46, 44, 43, 42, 42, 42, 41, 41, 41, 40, 39, 39, 38, 38, 38, 38, 37, 37, 36, 35, 34, 34, 33, 33, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 27, 26, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,

### **APPLICATION_TYPE**

In [11]:
applicationTypeSeries, applicationTypeCountsIntegerList \
    = local_function \
        .ReturnColumnSeriesAndSortedValueCountList \
            (charityDataFrame, 'APPLICATION_TYPE')


columnSeriesList \
    .append(applicationTypeSeries)

countsIntegerListList \
    .append(applicationTypeCountsIntegerList)

columnNameStringList \
    .append('APPLICATION_TYPE')


log_subroutine \
    .PrintAndLogWriteText \
        (str(applicationTypeSeries.tolist()))

[27037, 1542, 1216, 1173, 1065, 737, 725, 528, 156, 66, 27, 16, 3, 3, 2, 2, 1]


### **CLASSIFICATION**

In [12]:
classificationSeries, classificationCountsIntegerList \
    = local_function \
        .ReturnColumnSeriesAndSortedValueCountList \
            (charityDataFrame, 'CLASSIFICATION')


columnSeriesList \
    .append(classificationSeries)

countsIntegerListList \
    .append(classificationCountsIntegerList)

columnNameStringList \
    .append('CLASSIFICATION')


log_subroutine \
    .PrintAndLogWriteText \
        (str(classificationSeries.tolist()))

[17326, 6074, 4837, 1918, 1883, 777, 287, 194, 116, 114, 104, 95, 75, 58, 50, 36, 34, 32, 32, 30, 20, 18, 16, 15, 15, 14, 11, 10, 10, 9, 9, 7, 6, 6, 6, 5, 5, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


### **USE CASE**

In [13]:
useCaseSeries, useCaseCountsIntegerList \
    = local_function \
        .ReturnColumnSeriesAndSortedValueCountList \
            (charityDataFrame, 'USE_CASE')


columnSeriesList \
    .append(useCaseSeries)

countsIntegerListList \
    .append(useCaseCountsIntegerList)

columnNameStringList \
    .append('USE_CASE')


log_subroutine \
    .PrintAndLogWriteText \
        (str(useCaseSeries.tolist()))

[28095, 5671, 384, 146, 3]


### **ORGANIZATION**

In [14]:
organizationSeries, organizationCountsIntegerList \
    = local_function \
        .ReturnColumnSeriesAndSortedValueCountList \
            (charityDataFrame, 'ORGANIZATION')


columnSeriesList \
    .append(organizationSeries)

countsIntegerListList \
    .append(organizationCountsIntegerList)

columnNameStringList \
    .append('ORGANIZATION')


log_subroutine \
    .PrintAndLogWriteText \
        (str(organizationSeries.tolist()))

[23515, 10255, 486, 43]


### **INCOME_AMT**

In [15]:
incomeAmtSeries, incomeAmtCountsIntegerList \
    = local_function \
        .ReturnColumnSeriesAndSortedValueCountList \
            (charityDataFrame, 'INCOME_AMT')


columnSeriesList \
    .append(incomeAmtSeries)

countsIntegerListList \
    .append(incomeAmtCountsIntegerList)

columnNameStringList \
    .append('INCOME_AMT')


log_subroutine \
    .PrintAndLogWriteText \
        (str(incomeAmtSeries.tolist()))

[24388, 3747, 3374, 955, 728, 543, 240, 185, 139]


### **ASK_AMT**

In [16]:
askAmtSeries, askAmtCountsIntegerList \
    = local_function \
        .ReturnColumnSeriesAndSortedValueCountList \
            (charityDataFrame, 'ASK_AMT')


columnSeriesList \
    .append(askAmtSeries)

countsIntegerListList \
    .append(askAmtCountsIntegerList)

columnNameStringList \
    .append('ASK_AMT')


log_subroutine \
    .PrintAndLogWriteText \
        (str(askAmtSeries.tolist()))

[25398, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

# <br> **Section 3: Optimization**

In [None]:
bestModelDictionary \
    = {'accuracy': 0.0}

startTimeFloat = time.time()


nameCountInteger = 2

applicationTypeCountInteger = 157

classificationCountInteger = 96

useCaseCountInteger = 4

organizationCountInteger = 0

incomeAmtCountInteger = 0

askAmtCountInteger = 4


tempCharityDataFrame = charityDataFrame.copy()


countIntegerList \
    = [nameCountInteger,
       applicationTypeCountInteger,
       classificationCountInteger,
       useCaseCountInteger,
       organizationCountInteger,
       incomeAmtCountInteger,
       askAmtCountInteger]

maxCountIntegerList \
    = [max(nameCountsIntegerList),
       max(applicationTypeCountsIntegerList),
       max(classificationCountsIntegerList),
       max(useCaseCountsIntegerList),
       max(organizationCountsIntegerList),
       max(incomeAmtCountsIntegerList),
       max(askAmtCountsIntegerList)]


tempCharityDataFrame \
    = local_function \
          .ReturnBinnedDataFrame \
              (tempCharityDataFrame,
               columnSeriesList,
               countsIntegerListList,
               columnNameStringList,
               countIntegerList)


XTrainScaledNumpyArray, \
XTestScaledNumpyArray, \
yTrainNumpyArray, \
yTestNumpyArray \
    = local_function \
          .ReturnNeuralNetworkXYParameters \
              (tempCharityDataFrame,
               'IS_SUCCESSFUL')


local_function \
    .SetFeaturesInteger \
        (len(XTrainScaledNumpyArray[0]))

tempDictionary \
    = local_function \
          .ReturnBestModelDictionary \
              (XTrainScaledNumpyArray, \
               XTestScaledNumpyArray, \
               yTrainNumpyArray, \
               yTestNumpyArray,
               'val_accuracy',
               100, 2, 3)


tempDictionary['count_list'] \
    = countIntegerList

tempDictionary['max_count_list'] \
    = maxCountIntegerList


log_subroutine \
    .PrintAndLogWriteText \
        (tempDictionary)


if tempDictionary['accuracy'] >= bestModelDictionary['accuracy']:

    bestModelDictionary = tempDictionary

    with open(CONSTANT_OUTPUT_FILE_PATH, 'a') as outputFile:

        outputFile.write \
            ('\n\n')

        outputFile.write \
            (str(countIntegerList))

        outputFile.write \
            ('\n\n')

        outputFile.write \
            (str(bestModelDictionary))

Trial 199 Complete [00h 00m 26s]
val_accuracy: 0.8080466389656067

Best val_accuracy So Far: 0.8083965182304382
Total elapsed time: 00h 43m 48s

Search: Running Trial #200

Value             |Best Value So Far |Hyperparameter
relu              |relu              |activation
16                |35                |first_units
1                 |4                 |num_layers
62                |54                |units_0
62                |10                |units_1
40                |91                |units_2
97                |39                |units_3
11                |4                 |units_4
12                |12                |tuner/epochs
4                 |4                 |tuner/initial_epoch
3                 |4                 |tuner/bracket
1                 |2                 |tuner/round
0181              |0099              |tuner/trial_id

Epoch 5/12
Epoch 6/12
Epoch 7/12

# <br> **Section 4: Export Best Model Parameters to File**

In [None]:
log_subroutine \
    .PrintAndLogWriteText \
        (str(bestModelDictionary))

In [None]:
with open(CONSTANT_OUTPUT_FILE_PATH, 'a') as outputFile:

    outputFile.write \
        ('\n\n')

    outputFile.write \
        (json.dumps \
            (bestModelDictionary))

In [None]:
#log_subroutine \
#    .EndProgramExecution()