## Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import keras_tuner as kt

#  Import and read the charity_data.csv.
import pandas as pd 
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

In [None]:
application_df.columns

The target variable of the model is:\
"IS_SUCCESSFUL"

The features variables are:\
'APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION',\
'USE_CASE', 'ORGANIZATION', 'STATUS', 'INCOME_AMT',\
'SPECIAL_CONSIDERATIONS', 'ASK_AMT'


The goal is to create a binary classification model that can predict if an Alphabet Soup-funded organization will be successful based on the features in the dataset.

In [None]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
# After conferring with a classmate, I left 'NAME' in place. This actually seems to have a significant affect on model accuracy.
application_df.drop(['EIN'], axis=1, inplace=True)

In [None]:
# Determine the number of unique values in each column.
nunique_column = application_df.nunique()

print(nunique_column)

In [None]:
# Look at APPLICATION_TYPE value counts for binning
application_counts = application_df['APPLICATION_TYPE'].value_counts()

print(application_counts)

In [None]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
application_cutoff_value = 250
application_types_to_replace = application_counts[application_counts < application_cutoff_value].index.tolist()

# Replace in dataframe
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure binning was successful
application_df['APPLICATION_TYPE'].value_counts()

In [None]:
# Look at CLASSIFICATION value counts for binning
classification_counts = application_df['CLASSIFICATION'].value_counts()
print(classification_counts)

In [None]:
# You may find it helpful to look at CLASSIFICATION value counts >1
print(classification_counts[classification_counts > 1])

In [None]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
classification_cutoff_value = 1750
classifications_to_replace = classification_counts[classification_counts < classification_cutoff_value].index.tolist()

# Replace in dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
application_df['CLASSIFICATION'].value_counts()

In [None]:
application_df.head(10)

In [None]:
# Convert categorical data to numeric with `pd.get_dummies`
application_df_dummies = pd.get_dummies(application_df)

In [None]:
# Split our preprocessed data into our features and target arrays
y = application_df_dummies['IS_SUCCESSFUL']
X = application_df_dummies.drop(columns='IS_SUCCESSFUL')

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Compile, Train and Evaluate the Model

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_dim = X.shape[1]

nn_model = tf.keras.models.Sequential()

# First hidden layer
nn_model.add(tf.keras.layers.Dense(units=125, activation="softmax", input_dim=input_dim))

# Second hidden layer
nn_model.add(tf.keras.layers.Dense(units=125, activation="linear"))

# Third hidden layer
nn_model.add(tf.keras.layers.Dense(units=125, activation="linear"))

# Output layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model.summary()

In [None]:
# Compile the model
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=5)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Create a method that creates a new Sequential model with hyperparameter options to determine best layers/activation. 

def create_model(hp):
    
    nn_model2 = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','linear','softmax'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model2.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=64,
        step=1), activation=activation, input_dim=input_dim))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 10)):
        nn_model2.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=64,
            step=1),
            activation=activation))
    
    nn_model2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model2.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model2

In [None]:
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=50,
    hyperband_iterations=5)

In [None]:
tuner.search(X_train_scaled,y_train,epochs=1,validation_data=(X_test_scaled,y_test))

In [None]:
# Get top model hyperparameters and print the values
top_hyper = tuner.get_best_hyperparameters(1)
for param in top_hyper:
    print(param.values)

In [None]:
# Evaluate the top models against the test dataset
top_model = tuner.get_best_models(1)
for model in top_model:
    model_loss, model_accuracy = model.evaluate(X_test_scaled,y_test,verbose=2)
    print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
import numpy as np
from sklearn.decomposition import PCA

# Create a PCA object
pca = PCA(n_components=44)

application_df_pca = pca.fit_transform(application_df)

# Fit the PCA model to your data
pca.fit(X)

# Get the loadings (coefficients) for each feature
loadings = pca.components_

# Determine the absolute magnitude of the loadings
absolute_loadings = np.abs(loadings)

# Find the most important feature for each component
most_important_features = np.argmax(absolute_loadings, axis=1)

# Print the most important features for each component
for component, feature_idx in enumerate(most_important_features):
    feature_name = application_df.columns[feature_idx]
    print(f"Component {component+1}: Most important feature is {feature_name}")

In [None]:
# Export our model to HDF5 file
#  YOUR CODE GOES HERE

Step 4: Write a Report on the Neural Network Model
For this part of the assignment, you’ll write a report on the performance of the deep learning model you created for Alphabet Soup.

The report should contain the following:

Overview of the analysis: Explain the purpose of this analysis.

Results: Using bulleted lists and images to support your answers, address the following questions:

Data Preprocessing

What variable(s) are the target(s) for your model?
What variable(s) are the features for your model?
What variable(s) should be removed from the input data because they are neither targets nor features?
Compiling, Training, and Evaluating the Model

How many neurons, layers, and activation functions did you select for your neural network model, and why?
Were you able to achieve the target model performance?
What steps did you take in your attempts to increase model performance?
Summary: Summarize the overall results of the deep learning model. Include a recommendation for how a different model could solve this classification problem, and then explain your recommendation.