<a href="https://colab.research.google.com/github/theidari/alphabet_soup/blob/main/src/AlphabetSoupCharity_Optimization_tuner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Alphabet Soup Charity Optimization**

#**Step 3: Optimize the Model**
---

In [1]:
# Delete the existing directory
!rm -rf alphabet_soup

# Clone the repository to a new directory
!git clone https://github.com/theidari/alphabet_soup.git

# Dependencies and setup
from alphabet_soup.src.package.constants import * # constants
from alphabet_soup.src.package.helpers import * # liberaries and functions

Cloning into 'alphabet_soup'...
remote: Enumerating objects: 223, done.[K
remote: Counting objects:   0% (1/223)[Kremote: Counting objects:   1% (3/223)[Kremote: Counting objects:   2% (5/223)[Kremote: Counting objects:   3% (7/223)[Kremote: Counting objects:   4% (9/223)[Kremote: Counting objects:   5% (12/223)[Kremote: Counting objects:   6% (14/223)[Kremote: Counting objects:   7% (16/223)[Kremote: Counting objects:   8% (18/223)[Kremote: Counting objects:   9% (21/223)[Kremote: Counting objects:  10% (23/223)[Kremote: Counting objects:  11% (25/223)[Kremote: Counting objects:  12% (27/223)[Kremote: Counting objects:  13% (29/223)[Kremote: Counting objects:  14% (32/223)[Kremote: Counting objects:  15% (34/223)[Kremote: Counting objects:  16% (36/223)[Kremote: Counting objects:  17% (38/223)[Kremote: Counting objects:  18% (41/223)[Kremote: Counting objects:  19% (43/223)[Kremote: Counting objects:  20% (45/223)[Kremote: Counting objects:  

In [2]:
# Loading the data into a Pandas DataFrame
application_df = pd.read_csv(DATA_URL)

In [3]:
# Drop the 'EIN' column.
application_df = application_df.drop(columns=["EIN"], axis=1)

In [4]:
binning (application_df,'CLASSIFICATION',800)


--------------------------------------------------------------------------------
 Value Count before binning:
--------------------------------------------------------------------------------
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: CLASSIFICATION, Length: 71, dtype: int64
--------------------------------------------------------------------------------
Value Count after binning:
--------------------------------------------------------------------------------
C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: CLASSIFICATION, dtype: int64


In [5]:
binning (application_df,'APPLICATION_TYPE',500)


--------------------------------------------------------------------------------
 Value Count before binning:
--------------------------------------------------------------------------------
T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: APPLICATION_TYPE, dtype: int64
--------------------------------------------------------------------------------
Value Count after binning:
--------------------------------------------------------------------------------
T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: APPLICATION_TYPE, dtype: int64


In [6]:
binning (application_df,'NAME',100)


--------------------------------------------------------------------------------
 Value Count before binning:
--------------------------------------------------------------------------------
PARENT BOOSTER USA INC                                                  1260
TOPS CLUB INC                                                            765
UNITED STATES BOWLING CONGRESS INC                                       700
WASHINGTON STATE UNIVERSITY                                              492
AMATEUR ATHLETIC UNION OF THE UNITED STATES INC                          408
                                                                        ... 
ST LOUIS SLAM WOMENS FOOTBALL                                              1
AIESEC ALUMNI IBEROAMERICA CORP                                            1
WEALLBLEEDRED ORG INC                                                      1
AMERICAN SOCIETY FOR STANDARDS IN MEDIUMSHIP & PSYCHICAL INVESTIGATI       1
WATERHOUSE CHARITABLE TR              

In [7]:
# Convert categorical data to numeric with `pd.get_dummies`
application_numeric = pd.get_dummies(application_df)

In [8]:
# Split our preprocessed data into our features and target arrays
X = application_numeric.drop(['IS_SUCCESSFUL'], axis=1)
y = application_numeric['IS_SUCCESSFUL']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

input_features=len(X_train_scaled[1])

In [9]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh',"sigmoid"])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=300,
        step=5), activation=activation, input_dim=input_features))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 8)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=160,
            step=5),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [10]:
!pip install -q -U keras-tuner
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=35,
    hyperband_iterations=2)

In [11]:
tuner.search(X_train_scaled,y_train,epochs=35,validation_data=(X_test_scaled,y_test)) 

Trial 180 Complete [00h 02m 24s]
val_accuracy: 0.7514868974685669

Best val_accuracy So Far: 0.7551020383834839
Total elapsed time: 01h 26m 15s


In [12]:
top_hyper = tuner.get_best_hyperparameters(3)
for param in top_hyper:
    print(param.values)

{'activation': 'relu', 'first_units': 241, 'num_layers': 5, 'units_0': 26, 'units_1': 101, 'units_2': 146, 'units_3': 61, 'units_4': 16, 'units_5': 156, 'units_6': 16, 'tuner/epochs': 12, 'tuner/initial_epoch': 4, 'tuner/bracket': 3, 'tuner/round': 2, 'tuner/trial_id': '0044', 'units_7': 156}
{'activation': 'relu', 'first_units': 51, 'num_layers': 3, 'units_0': 66, 'units_1': 111, 'units_2': 141, 'units_3': 136, 'units_4': 126, 'units_5': 51, 'units_6': 41, 'units_7': 116, 'tuner/epochs': 35, 'tuner/initial_epoch': 12, 'tuner/bracket': 1, 'tuner/round': 1, 'tuner/trial_id': '0080'}
{'activation': 'relu', 'first_units': 96, 'num_layers': 6, 'units_0': 101, 'units_1': 36, 'units_2': 26, 'units_3': 31, 'units_4': 156, 'units_5': 21, 'units_6': 136, 'units_7': 21, 'tuner/epochs': 12, 'tuner/initial_epoch': 4, 'tuner/bracket': 3, 'tuner/round': 2, 'tuner/trial_id': '0125'}


In [13]:
top_model = tuner.get_best_models(3)
for model in top_model:
    model_loss, model_accuracy = model.evaluate(X_test_scaled,y_test,verbose=2)
    print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 1s - loss: 0.4974 - accuracy: 0.7551 - 639ms/epoch - 2ms/step
Loss: 0.4973735809326172, Accuracy: 0.7551020383834839
268/268 - 1s - loss: 0.5038 - accuracy: 0.7551 - 574ms/epoch - 2ms/step
Loss: 0.5038114190101624, Accuracy: 0.7551020383834839
268/268 - 1s - loss: 0.5026 - accuracy: 0.7548 - 604ms/epoch - 2ms/step
Loss: 0.5026103258132935, Accuracy: 0.7547521591186523


In [31]:
models = tuner.get_best_models(num_models=3)
best_model = models[1]

best_model.build()
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 51)                3825      
                                                                 
 dense_1 (Dense)             (None, 66)                3432      
                                                                 
 dense_2 (Dense)             (None, 111)               7437      
                                                                 
 dense_3 (Dense)             (None, 141)               15792     
                                                                 
 dense_4 (Dense)             (None, 1)                 142       
                                                                 
Total params: 30,628
Trainable params: 30,628
Non-trainable params: 0
_________________________________________________________________


