In [1]:
# Dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

# Importing data into pandas
application_df = pd.read_csv("Resources/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [2]:
# Dropping columns that won't be used
application_df = application_df.drop(columns=['EIN','NAME'])

# Dropping all rows with a 'STATUS' of 0
application_df = application_df[application_df.STATUS != 0]
application_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [None]:
#application_df['APPLICATION_TYPE'].value_counts()
#application_df['CLASSIFICATION'].value_counts()
#application_df['INCOME_AMT'].value_counts()
#application_df['SPECIAL_CONSIDERATIONS'].value_counts()
#application_df['ASK_AMT'].value_counts()

In [None]:
# Creating list of application types to combine, according to our cutoff value of 500+
application_types_to_replace = ['T9','T13','T12','T2','T25','T14','T29','T15','T17']

# Iterating through specified values to replace dataframe values with 'Other'
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# Viewing results
application_df['APPLICATION_TYPE'].value_counts()

In [None]:
# Creating list of classifications to combine, according to cutoff of 1800+
classifications_to_replace = ['C7000', 'C1700', 'C4000',
       'C5000', 'C1270', 'C2700', 'C2800', 'C7100', 'C1300', 'C1280', 'C1230',
       'C1400', 'C7200', 'C2300', 'C1240', 'C8000', 'C7120', 'C1500', 'C1800',
       'C6000', 'C1250', 'C8200', 'C1238', 'C1278', 'C1235', 'C1237', 'C7210',
       'C2400', 'C1720', 'C4100', 'C1257', 'C1600', 'C1260', 'C2710', 'C0',
       'C3200', 'C1234', 'C1246', 'C1267', 'C1256', 'C2190', 'C4200', 'C2600',
       'C5200', 'C1370', 'C1248', 'C6100', 'C1820', 'C1900', 'C1236', 'C3700',
       'C2570', 'C1580', 'C1245', 'C2500', 'C1570', 'C1283', 'C2380', 'C1732',
       'C1728', 'C2170', 'C4120', 'C8210', 'C2561', 'C4500', 'C2150']

# Iterating through specified values to replace dataframe values with 'Other'
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")

# Viewing results
application_df['CLASSIFICATION'].value_counts()

In [None]:
# Converting categorical data into numerical data
df = pd.get_dummies(application_df)
df.head()

In [None]:
# Creating function that creates a new sequential model with variable hyperparameters
def create_model(hp):

    nn_optimized = tf.keras.models.Sequential()

    activation = hp.Choice('activation'['relu','tanh','sigmoid'])

    nn_optimized.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=100,
        step=2), activation=activation, input_dim=43))
    
    for i in range(hp.Int('num_layers', 1, 4)):
        nn_optimized.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=2,
            step=2),
            activation=activation))
    
    nn_optimized.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    nn_optimized.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return nn_optimized

In [None]:
# Importing keras tuner
import keras_tuner as kt

# Initializing tuner
tuner = kt.Hyperband(
    create_model,
    objective='val_accuracy',
    max_epochs=10,
    hyperband_iterations=2
)