In [None]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load the dataset
try:
    df = pd.read_csv('charity_data.csv')
    print('Dataset loaded successfully.')
except Exception as e:
    print('Error loading dataset:', e)

# Display the first few rows
print('First few rows of the dataset:')
print(df.head())

In [None]:
# Drop unnecessary columns (EIN and NAME)
if 'EIN' in df.columns and 'NAME' in df.columns:
    df = df.drop(columns=['EIN', 'NAME'])
    print('Dropped columns: EIN, NAME')
else:
    print('Columns EIN or NAME not found, proceeding without dropping.')

# Print unique value counts for each column
print('\nUnique values in each column:')
for col in df.columns:
    print(col + ': ' + str(df[col].nunique()))

In [None]:
# For columns with more than 10 unique values, you might want to group rare categories
# This is a simple placeholder: if a column has >10 uniques, we will replace values with frequency <5 with 'Other'
def replace_rare_categories(series):
    counts = series.value_counts()
    rares = counts[counts < 5].index
    return series.replace(rares, 'Other')

# Apply this transformation to object type columns
for col in df.select_dtypes(include=['object']).columns:
    if df[col].nunique() > 10:
        df[col] = replace_rare_categories(df[col])
        print('Replaced rare categories in column: ' + col)

In [None]:
# Split the DataFrame into features X and target y
# Assume the target variable is named 'target'. Modify if the target has a different name.
if 'target' in df.columns:
    y = df['target']
    X = df.drop(columns=['target'])
    print('Separated target from features.')
else:
    # If no column 'target', set target as the last column
    y = df.iloc[:, -1]
    X = df.iloc[:, :-1]
    print('No column named "target"; assumed the last column is the target.')

# Split data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print('Data split into training and testing sets.')

In [None]:
# Scale the feature data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print('Data scaling complete.')

In [None]:
# Build the neural network model
model = Sequential()
# Input layer and 1st hidden layer
model.add(Dense(16, input_dim=X_train_scaled.shape[1], activation='relu'))
# Second hidden layer
model.add(Dense(8, activation='relu'))
# Output layer: using sigmoid activation for binary classification
model.add(Dense(1, activation='sigmoid'))

print('Neural network model built:')
model.summary()

In [None]:
# Compile and train the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Compiled the model.')

# Train the model. Adjust epochs and batch_size as needed
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, validation_split=0.2, verbose=1)
print('Model training complete.')

In [None]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print('Testing Loss:', loss)
print('Testing Accuracy:', accuracy)

# Save the trained model to HDF5 format
model.save('AlphabetSoupCharity.h5')
print('Model saved as AlphabetSoupCharity.h5')