## 1. Initial Setup and Import Dependencies:

#### Importing necessary libraries and dependencies

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import log_loss
import numpy as np
import pandas as pd
import tensorflow as tf

2023-08-07 11:05:25.777230: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## 2. Data Cleaning and Preprocessing:

#### The purpose of preprocessing is to clean, transform, and prepare the data to become suitable for further analysis or machine learning tasks.

### Data Loading & Exploration
- Loading and inspecting the dataset
- Exploring data through initial statistics and visualization

In [2]:
#  Import and read in the data
df_application = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
df_application.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


### Data Cleaning
- Identifying and correcting typos in categorical columns
- Dropping non-beneficial ID columns

In [3]:
# Identify and count the number of occurrences of observed typo
df_application["USE_CASE"].value_counts()

Preservation     28095
ProductDev        5671
CommunityServ      384
Heathcare          146
Other                3
Name: USE_CASE, dtype: int64

In [4]:
# Correct typo
df_application["USE_CASE"] = df_application["USE_CASE"].replace("Heathcare", "Healthcare")

In [5]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
df_application_features = df_application.drop(columns = ['EIN', 'NAME'])

# View the new features dataframe
df_application_features.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Healthcare,Trust,1,100000-499999,N,142590,1


### Data Exploration

In [6]:
# Determine the number of unique values in each column.
print(df_application_features.nunique())

APPLICATION_TYPE            17
AFFILIATION                  6
CLASSIFICATION              71
USE_CASE                     5
ORGANIZATION                 4
STATUS                       2
INCOME_AMT                   9
SPECIAL_CONSIDERATIONS       2
ASK_AMT                   8747
IS_SUCCESSFUL                2
dtype: int64


### Feature Engineering & Transformation (Part 1)
- Binning values in the "APPLICATION_TYPE" column
- Replacing infrequent application types with "Other"

In [7]:
print(df_application_features['APPLICATION_TYPE'].value_counts())

T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: APPLICATION_TYPE, dtype: int64


In [8]:
# Choose a cutoff value and create a list of application types to be replaced
cutoff_value = 500

# use the variable name `application_types_to_replace`
application_types_to_replace = list(
    df_application_features['APPLICATION_TYPE'].value_counts()[df_application['APPLICATION_TYPE'].value_counts() < cutoff_value].index
    )

# Replace in dataframe
for app in application_types_to_replace:
    df_application_features['APPLICATION_TYPE'] = df_application_features['APPLICATION_TYPE'].replace(app,"Other")

### Data Exploration (Part 1)
- Analyzing value counts and distributions of "APPLICATION_TYPE" column

In [9]:
# Check to make sure binning was successful
df_application_features['APPLICATION_TYPE'].value_counts()

T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: APPLICATION_TYPE, dtype: int64

### Feature Engineering & Transformation (Part 2)
- Binning values in the "CLASSIFICATION" column
- Replacing infrequent classifications with "Other"

In [10]:
# Look at CLASSIFICATION value counts for binning
print(df_application_features['CLASSIFICATION'].value_counts())

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: CLASSIFICATION, Length: 71, dtype: int64


In [11]:
# You may find it helpful to look at CLASSIFICATION value counts > 1
classification_counts = df_application_features['CLASSIFICATION'].value_counts()
classification_counts_filtered = classification_counts[classification_counts > 1]
print(classification_counts_filtered)

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
C7000      777
C1700      287
C4000      194
C5000      116
C1270      114
C2700      104
C2800       95
C7100       75
C1300       58
C1280       50
C1230       36
C1400       34
C7200       32
C2300       32
C1240       30
C8000       20
C7120       18
C1500       16
C1800       15
C6000       15
C1250       14
C8200       11
C1238       10
C1278       10
C1235        9
C1237        9
C7210        7
C2400        6
C1720        6
C4100        6
C1257        5
C1600        5
C1260        3
C2710        3
C0           3
C3200        2
C1234        2
C1246        2
C1267        2
C1256        2
Name: CLASSIFICATION, dtype: int64


In [12]:
# Choose a cutoff value and create a list of classifications to be replaced
cutoff_value = 1000

# use the variable name `classifications_to_replace`
classifications_to_replace = list(
    df_application_features['CLASSIFICATION'].value_counts()[df_application_features['CLASSIFICATION'].value_counts() < cutoff_value].index
    )

# Replace in dataframe
for cls in classifications_to_replace:
    df_application_features['CLASSIFICATION'] = df_application_features['CLASSIFICATION'].replace(cls,"Other")

### Data Exploration (Part 2)
- Analyzing value counts and distributions of "CLASSIFICATION" column

In [13]:
# Check to make sure binning was successful
df_application_features['CLASSIFICATION'].value_counts()

C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: CLASSIFICATION, dtype: int64

### Feature Engineering & Data Transformation (Part 3)
One-hot encoding is used to convert categorical variables into a format suitable for machine learning algorithms.

In [14]:
# Convert categorical data to numeric with `pd.get_dummies`
df_application_features = pd.get_dummies(df_application_features)

### Data Splitting
- Splitting data into features and target arrays
- Splitting data into training and testing datasets

In [15]:
# Split our preprocessed data into our features and target arrays
X = df_application_features.drop(columns=['IS_SUCCESSFUL'])
y = df_application_features['IS_SUCCESSFUL']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

### Data Scaling & Preprocessing
- Creating and fitting a StandardScaler instance
- Scaling and transforming training and testing data

In [16]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## 3. Modeling (FeedForward Neural Network)

### Defining the Model
- Defining a relatively shallow neural network model with specified layers

In [17]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation='relu'))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

### Model Exploration & Understanding
- Printing the summary of the neural network model structure

In [18]:
# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 80)                3520      
                                                                 
 dense_1 (Dense)             (None, 30)                2430      
                                                                 
 dense_2 (Dense)             (None, 1)                 31        
                                                                 
Total params: 5981 (23.36 KB)
Trainable params: 5981 (23.36 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


###  Model Compilation & Training
- Compiling the model with loss function and optimizer
- Training the neural network model with training data

In [19]:
# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [20]:
# Train the model
nn.fit(X_train_scaled, y_train, epochs=100, verbose=1)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

<keras.src.callbacks.History at 0x7fe783f98e80>

### Evaluate the Model
- Evaluating the trained model using testing data
- Printing loss and accuracy metrics

In [21]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.5572 - accuracy: 0.7268 - 491ms/epoch - 2ms/step
Loss: 0.5572280287742615, Accuracy: 0.7267638444900513


---
## Results (FeedForward Nueral Network)
- Accurately preicts the testing data `72.68%` of the time, that a venture will be successful or not if funded by Alphabet Soup.
- Given the model architecture and the number of epochs (100), a binary cross-entropy loss value of approximately `0.5572` at the end of the training process indicates that the model has achieved a moderate level of performance in the binary classification task.
    - The binary cross-entropy loss measures the dissimilarity between the true labels and the predicted probabilities generated by the model. In this case, `the loss value of 0.5572 means that, on average, the model's predictions are somewhat far from the true labels. However, it is not too high, indicating that the model has learned to make reasonable predictions`.

### Export Model
- Exporting the trained model to an HDF5 file

In [22]:
# Export our model to HDF5 file
nn.save("trained_charity.h5")

  saving_api.save_model(


## Optimizing the Model

### Repeated Training & Evaluation
-  Used to understand how the model's performance varies due to factors such as random initialization, data shuffling, or other sources of variation.

In [23]:
# Define lists to store metric scores
loss_scores = []
accuracy_scores = []

# Number of times to repeat the process
num_runs = 10  # adjust as desired

for _ in range(num_runs):
    # Train the model
    nn.fit(X_train_scaled, y_train, epochs=100, verbose=0)  # Set verbose to 0 to suppress training output
    
    # Evaluate the model using the test data
    model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=0)
    
    # Append metric scores to the lists
    loss_scores.append(model_loss)
    accuracy_scores.append(model_accuracy)

# Convert lists to numpy arrays for easier calculation
loss_scores = np.array(loss_scores)
accuracy_scores = np.array(accuracy_scores)

# Calculate and print average and standard deviation
average_loss = np.mean(loss_scores)
average_accuracy = np.mean(accuracy_scores)
std_dev_loss = np.std(loss_scores)
std_dev_accuracy = np.std(accuracy_scores)

print(f"Average Loss: {average_loss:.4f}, Standard Deviation Loss: {std_dev_loss:.4f}")
print(f"Average Accuracy: {average_accuracy:.4f}, Standard Deviation Accuracy: {std_dev_accuracy:.4f}")


Average Loss: 0.6021, Standard Deviation Loss: 0.0252
Average Accuracy: 0.7277, Standard Deviation Accuracy: 0.0009


---
## Results (Repeated FeedForward Neural Network)
- After 10 repetitions of the model, testing data is accurately predicted an average of `72.77%` of the time with a standard deviation of `0.09%`; Determines whether a funded venture will be successful or not.
- The average loss score is `0.6021` with a standard deviation of `0.0252`

### Optimizing with the MLPClassifier Model

In [24]:
# Create the neural network classifier
nn = MLPClassifier(max_iter=100)

# Define the parameter grid for hyperparameter search
param_grid = {
    'hidden_layer_sizes': [(80,), (100, 50), (50, 30, 10)],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.0001, 0.001, 0.01],
}

# Create the GridSearchCV object
grid_search = GridSearchCV(nn, param_grid, cv=3)

# Perform the hyperparameter search
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters and the corresponding model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Get the best accuracy
best_accuracy = grid_search.best_score_

# Evaluate the best model on the test set
X_test_scaled = scaler.transform(X_test)
accuracy = best_model.score(X_test_scaled, y_test)

# Get the log loss on the test set
y_predicted_probabilities = best_model.predict_proba(X_test_scaled)
logloss = log_loss(y_test, y_predicted_probabilities)

print("Best hyperparameters:", best_params)
print("Best accuracy:", best_accuracy)
print("Test set accuracy:", accuracy)
print("Test set log loss:", logloss)



Best hyperparameters: {'alpha': 0.0001, 'hidden_layer_sizes': (50, 30, 10), 'learning_rate_init': 0.01}
Best accuracy: 0.7303299577020318
Test set accuracy: 0.7283965014577259
Test set log loss: 0.5524035475509221


---
## Results (MLPClassifier Neural Network)
- Accurately predicts the test data `72.84%` of the time, that a venture will be successful or not if funded by Alphabet Soup.
- The loss score is `0.5524`

### Optimizing with the MLPClassifier and SVC Classifier Models

In [25]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the classifiers you want to test
classifiers = [
    ('MLP', MLPClassifier()),
    ('SVC', SVC())
]

# Create separate parameter grids for each classifier
param_grids = {
    'MLP': {
        'hidden_layer_sizes': [50],
        'alpha': [0.001],
        'max_iter': [200],
        'solver': ['adam'],
        'learning_rate': ['constant'],
        'validation_fraction': [0.1],
        'n_iter_no_change': [10]
    },
    'SVC': {
        'kernel': ['rbf'],
        'C': [1.0],
        'gamma': ['scale']
    },
}

# Create an empty dictionary to store the best models for each classifier
best_models = {}

# Loop through each classifier and perform the hyperparameter search
for name, clf in classifiers:
    # Create the pipeline with the current classifier
    pipeline = Pipeline(
        [
            ('scaler', scaler),
            ('classifier', clf)
        ]
        )

    # Get the corresponding parameter grid for the current classifier
    param_grid = {'classifier__' + key: value for key, value in param_grids[name].items()}

    # Create the GridSearchCV object
    grid_search = GridSearchCV(pipeline, param_grid, cv=2)

    # Fit the GridSearchCV object
    grid_search.fit(X_train_scaled, y_train)
    
    # Get the best estimator and store it in the dictionary
    best_models[name] = grid_search.best_estimator_

# Evaluate the best models on the test set
for name, model in best_models.items():
    accuracy = model.score(X_test_scaled, y_test)
    print(f"Best accuracy for {name}:", accuracy)
            
    # Print "epoch complete" along with epoch number for MLPClassifier
    if name == 'MLP':
        print("one epoch is complete")

Best accuracy for MLP: 0.7266472303206997
one epoch is complete
Best accuracy for SVC: 0.7286297376093295


---
## Results (MLPClassifier Neural Network) & (SVC Classifier Neural Network)
- __MLPClassifier__ accurately predicts, `72.66%` of the time, that a funded vventure will be successful or not.
- __SVC Classifier__ accurately predicts, `72.86%` of the time, that a funded venture will be successful or not.

# Conclusion:
- Based on the results obtained from various neural network models and classifiers, this project demonstrates a consistent and commendable performance in predicting the success of funding applicants for Alphabet Soup, a nonprofit foundation.
-  The `FeedForward Neural Network`, both individually and in repeated runs, along with the `MLPClassifier` and `SVC Classifier Neural Networks`, achieve accuracies around 72-73%, showcasing the ability to effectively identify organizations with a promising likelihood of success when funded.
- While these models provide reliable predictions, further exploration and refinement could potentially enhance predictive accuracy and decision-making for Alphabet Soup's funding initiatives.