In [3]:
# Dependency importing
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint

# Step 1: Reading the data
url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
df = pd.read_csv(url)

# Identifying the target variable(s) and feature(s)
target = "IS_SUCCESSFUL"
features = df.drop(columns=["EIN", "NAME", target])

# Step 2: Dropping the EIN and NAME columns
df = df.drop(columns=["EIN", "NAME"])

# Step 3: Determining the number of unique values for each column
unique_counts = df.nunique()

# Step 4: Determining the number of data points for each unique value in columns with > 10 unique values
for column in df.columns:
    if unique_counts[column] > 10:
        print(f"Number of unique values for {column}: {unique_counts[column]}")
        print(df[column].value_counts())
        print()

# Step 5: Binning "rare" categorical variables
cutoff_point = 100

# Identify categorical columns with more than 10 unique values
categorical_columns = [column for column in df.columns if df[column].nunique() > 10 and df[column].dtype == 'object']

# Iterate over each categorical column to bin rare values
for column in categorical_columns:
    # Determining the count of each unique value
    value_counts = df[column].value_counts()

    # Identifying values with fewer than the cutoff point data points
    rare_values = value_counts[value_counts < cutoff_point].index.tolist()

    # Replacing rare values with "Other"
    df[column] = df[column].apply(lambda x: "Other" if x in rare_values else x)

# Verifying if binning was successful
for column in categorical_columns:
    print(f"Unique values for {column} after binning:")
    print(df[column].value_counts())
    print()

# Step 6: Encoding the categorical variables
df = pd.get_dummies(df)

# Step 7: Split the data into features and target arrays, then split into training and testing datasets
X = df.drop(columns=[target])
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Step 8: Scaling the training and testing features datasets
scaler = StandardScaler()

# Fitting the scaler to the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 9: Determining the number of input features
input_features = X_train_scaled.shape[1]

Number of unique values for APPLICATION_TYPE: 17
T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: APPLICATION_TYPE, dtype: int64

Number of unique values for CLASSIFICATION: 71
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: CLASSIFICATION, Length: 71, dtype: int64

Number of unique values for ASK_AMT: 8747
5000        25398
10478           3
15583           3
63981           3
6725            3
            ...  
5371754         1
30060           1
43091152        1
18683           1
36500179        1
Name: ASK_AMT, Length: 8747, dtype: int64

Unique values for APPLICATION_TYPE after binning:
T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7       



Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Loss: 0.5506112575531006
Accuracy: 0.7309620976448059


  saving_api.save_model(


In [None]:
# Step 1: Creating a neural network model
model = Sequential()

# Step 2: Adding the first hidden layer with appropriate activation function
model.add(Dense(units=64, activation='relu', input_dim=input_features))

# Step 3: Creating an output layer with an appropriate activation function
model.add(Dense(units=1, activation='sigmoid'))

# Step 3: Checking the structure of the model
model.summary()

# Step 5: Compiling and training the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Step 6: Creating a callback to save the model's weights every five epochs
checkpoint_path = "model_checkpoint/checkpoint"
checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path,
                                      save_weights_only=True,
                                      save_freq='epoch',
                                      period=5)

# Step 7: Training the model
history = model.fit(X_train_scaled, y_train, epochs=25, callbacks=[checkpoint_callback], validation_data=(X_test_scaled, y_test))

# Step 8: Evaluating the model
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print("Loss:", loss)
print("Accuracy:", accuracy)

# Step 9: Saving the model to an HDF5 file
model.save("AlphabetSoupCharity.h5")

#Optimization attempts in the AlphabetSoupCharity_Optimization.py file