## Preprocessing

In [1]:
import os
# Find the latest version of spark 3.x  from http://www.apache.org/dist/spark/ and enter as the spark version
# For example:
# spark_version = 'spark-3.4.3'
spark_version = 'spark-3.4.3'
os.environ['SPARK_VERSION']=spark_version

In [2]:
# Install Spark and Java
!apt-get update
!apt-get install openjdk-11-jdk-headless -qq > /dev/null
!wget -q http://www.apache.org/dist/spark/$SPARK_VERSION/$SPARK_VERSION-bin-hadoop3.tgz
!tar xf $SPARK_VERSION-bin-hadoop3.tgz
!pip install -q findspark

Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]
Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]
Hit:6 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:7 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:10 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [871 kB]
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:12 http://archive.ubuntu.com/ubuntu jammy-updates/restricted amd64 Packages [2,468 kB]
Get:13 http://archive.ubuntu.com/ub

In [3]:
# Set Environment Variables
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk-amd64"
os.environ["SPARK_HOME"] = f"/content/{spark_version}-bin-hadoop3"

In [4]:
# Start a SparkSession
import findspark
findspark.init()

In [5]:
# Start Spark session
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Charity Data").getOrCreate()

In [6]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
import pandas as pd
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [7]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
#  YOUR CODE GOES HERE
application_df.drop(columns=['EIN', 'NAME'], inplace=True)


In [8]:
# Determine the number of unique values in each column.
unique_value_counts = application_df.nunique()

# Print the number of unique values in each column
print(unique_value_counts)

APPLICATION_TYPE            17
AFFILIATION                  6
CLASSIFICATION              71
USE_CASE                     5
ORGANIZATION                 4
STATUS                       2
INCOME_AMT                   9
SPECIAL_CONSIDERATIONS       2
ASK_AMT                   8747
IS_SUCCESSFUL                2
dtype: int64


In [9]:
# Look at APPLICATION_TYPE value counts to identify and replace with "Other"
app_type_value_counts = application_df['APPLICATION_TYPE'].value_counts()
print(app_type_value_counts)

APPLICATION_TYPE
T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: count, dtype: int64


In [10]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
# Identify values to replace with 'Other' (e.g., values that occur less frequently)
#Identify values to replace with 'Other' in "APPLICATION_TYPE" column
replace_values = app_type_value_counts[app_type_value_counts < 500].index

# Replace identified values with 'Other'
#Set inplace=True to have replacement done within application_df
application_df['APPLICATION_TYPE'].replace(replace_values, 'Other', inplace=True)

# Loop through dataframe to replace specific values that may not have been captured by .replace statement
for app in replace_values:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure replacement was successful
application_df['APPLICATION_TYPE'].value_counts()

APPLICATION_TYPE
T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: count, dtype: int64

In [11]:
# Look at CLASSIFICATION value counts to identify and replace with "Other"
classification_value_counts = application_df['CLASSIFICATION'].value_counts()
print(classification_value_counts)

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: count, Length: 71, dtype: int64


In [12]:
# You may find it helpful to look at CLASSIFICATION value counts >1
#  YOUR CODE GOES HERE
bottom_classification = classification_value_counts[(classification_value_counts > 1) & (classification_value_counts < 1000)]
print(bottom_classification)


CLASSIFICATION
C7000    777
C1700    287
C4000    194
C5000    116
C1270    114
C2700    104
C2800     95
C7100     75
C1300     58
C1280     50
C1230     36
C1400     34
C7200     32
C2300     32
C1240     30
C8000     20
C7120     18
C1500     16
C1800     15
C6000     15
C1250     14
C8200     11
C1238     10
C1278     10
C1235      9
C1237      9
C7210      7
C2400      6
C1720      6
C4100      6
C1257      5
C1600      5
C1260      3
C2710      3
C0         3
C3200      2
C1234      2
C1246      2
C1267      2
C1256      2
Name: count, dtype: int64


In [13]:
# Choose a cutoff value and create a list of classifications to be replaced

# use the variable name `classifications_to_replace`
classifications_to_replace = classification_value_counts[classification_value_counts < 777].index

# Replace in dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")

# Check to make sure replacement was successful
application_df['CLASSIFICATION'].value_counts()

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
Other     1484
C7000      777
Name: count, dtype: int64

In [14]:
column_list = application_df.columns
print(column_list)

Index(['APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION', 'USE_CASE',
       'ORGANIZATION', 'STATUS', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS',
       'ASK_AMT', 'IS_SUCCESSFUL'],
      dtype='object')


In [None]:
application_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [15]:
#Field names: APPLICATION_TYPE	AFFILIATION	CLASSIFICATION	USE_CASE

application_type_value_counts = application_df['APPLICATION_TYPE'].value_counts()
print(application_type_value_counts)

affiliation_value_counts = application_df['AFFILIATION'].value_counts()
print(affiliation_value_counts)

use_case_value_counts = application_df['USE_CASE'].value_counts()
print(use_case_value_counts)

APPLICATION_TYPE
T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: count, dtype: int64
AFFILIATION
Independent         18480
CompanySponsored    15705
Family/Parent          64
National               33
Regional               13
Other                   4
Name: count, dtype: int64
USE_CASE
Preservation     28095
ProductDev        5671
CommunityServ      384
Heathcare          146
Other                3
Name: count, dtype: int64


In [16]:
#Field names: ORGANIZATION	STATUS INCOME_AMT

organization_value_counts = application_df['ORGANIZATION'].value_counts()
print(organization_value_counts)

status_value_counts = application_df['STATUS'].value_counts()
print(status_value_counts)

income_amount_value_counts = application_df['INCOME_AMT'].value_counts()
print(income_amount_value_counts)

ORGANIZATION
Trust           23515
Association     10255
Co-operative      486
Corporation        43
Name: count, dtype: int64
STATUS
1    34294
0        5
Name: count, dtype: int64
INCOME_AMT
0                24388
25000-99999       3747
100000-499999     3374
1M-5M              955
1-9999             728
10000-24999        543
10M-50M            240
5M-10M             185
50M+               139
Name: count, dtype: int64


In [17]:
#Field names:	SPECIAL_CONSIDERATIONS	IS_SUCCESSFUL
consideration_value_counts = application_df['SPECIAL_CONSIDERATIONS'].value_counts()
print(consideration_value_counts)

success_value_counts = application_df['IS_SUCCESSFUL'].value_counts()
print(success_value_counts)

SPECIAL_CONSIDERATIONS
N    34272
Y       27
Name: count, dtype: int64
IS_SUCCESSFUL
1    18261
0    16038
Name: count, dtype: int64


In [18]:
# Make copy of original DataFrame
application_df_copy = application_df.copy()

#Field names: APPLICATION_TYPE	AFFILIATION	CLASSIFICATION	USE_CASE	ORGANIZATION INCOME_AMT	SPECIAL_CONSIDERATIONS
#Set'columns_to_encode' as list of columns containing categorical data
columns_to_encode = ['APPLICATION_TYPE','AFFILIATION','CLASSIFICATION', 'USE_CASE', 'ORGANIZATION', 'INCOME_AMT','SPECIAL_CONSIDERATIONS']  # Specify the columns to encode

# Apply pd.get_dummies to encode categorical columns
application_df_encoded = pd.get_dummies(application_df, columns=columns_to_encode)

# can customize column names of generated dummy variables
# For example, adding a prefix to the column names
application_df_encoded = pd.get_dummies(application_df, columns=columns_to_encode, prefix=columns_to_encode)

# Display DataFrame with encoded categorical data
print(application_df_encoded.head())

   STATUS  ASK_AMT  IS_SUCCESSFUL  APPLICATION_TYPE_Other  \
0       1     5000              1                   False   
1       1   108590              1                   False   
2       1     5000              0                   False   
3       1     6692              1                   False   
4       1   142590              1                   False   

   APPLICATION_TYPE_T10  APPLICATION_TYPE_T19  APPLICATION_TYPE_T3  \
0                  True                 False                False   
1                 False                 False                 True   
2                 False                 False                False   
3                 False                 False                 True   
4                 False                 False                 True   

   APPLICATION_TYPE_T4  APPLICATION_TYPE_T5  APPLICATION_TYPE_T6  ...  \
0                False                False                False  ...   
1                False                False                False  

In [19]:
# Skip this block. Already done above.
# Convert categorical data to numeric with `pd.get_dummies`
# Assuming 'df' is your DataFrame and 'column_to_encode' is the column containing categorical data
#column_to_encode = 'Category'  # Specify the column to encode

# Apply pd.get_dummies to encode the categorical column
#df_encoded = pd.get_dummies(application_df, columns=[column_to_encode])

# Optionally, you can customize the column names of the generated dummy variables
# For example, adding a prefix to the column names
#df_encoded = pd.get_dummies(df, columns=[column_to_encode], prefix=column_to_encode)

# Display the DataFrame with the encoded categorical data
#print(df_encoded.head())

In [20]:
column_list = application_df_encoded.columns
print(column_list)

Index(['STATUS', 'ASK_AMT', 'IS_SUCCESSFUL', 'APPLICATION_TYPE_Other',
       'APPLICATION_TYPE_T10', 'APPLICATION_TYPE_T19', 'APPLICATION_TYPE_T3',
       'APPLICATION_TYPE_T4', 'APPLICATION_TYPE_T5', 'APPLICATION_TYPE_T6',
       'APPLICATION_TYPE_T7', 'APPLICATION_TYPE_T8',
       'AFFILIATION_CompanySponsored', 'AFFILIATION_Family/Parent',
       'AFFILIATION_Independent', 'AFFILIATION_National', 'AFFILIATION_Other',
       'AFFILIATION_Regional', 'CLASSIFICATION_C1000', 'CLASSIFICATION_C1200',
       'CLASSIFICATION_C2000', 'CLASSIFICATION_C2100', 'CLASSIFICATION_C3000',
       'CLASSIFICATION_C7000', 'CLASSIFICATION_Other',
       'USE_CASE_CommunityServ', 'USE_CASE_Heathcare', 'USE_CASE_Other',
       'USE_CASE_Preservation', 'USE_CASE_ProductDev',
       'ORGANIZATION_Association', 'ORGANIZATION_Co-operative',
       'ORGANIZATION_Corporation', 'ORGANIZATION_Trust', 'INCOME_AMT_0',
       'INCOME_AMT_1-9999', 'INCOME_AMT_10000-24999',
       'INCOME_AMT_100000-499999', 'INCOME_

In [21]:
# Convert True and False to 1 and 0
# Specify columns to convert

columns_to_convert = ['APPLICATION_TYPE_Other', 'APPLICATION_TYPE_T10',
       'APPLICATION_TYPE_T19', 'APPLICATION_TYPE_T3', 'APPLICATION_TYPE_T4',
       'APPLICATION_TYPE_T5', 'APPLICATION_TYPE_T6', 'APPLICATION_TYPE_T7',
       'APPLICATION_TYPE_T8', 'AFFILIATION_CompanySponsored',
       'AFFILIATION_Family/Parent', 'AFFILIATION_Independent',
       'AFFILIATION_National', 'AFFILIATION_Other', 'AFFILIATION_Regional',
       'CLASSIFICATION_C1000', 'CLASSIFICATION_C1200', 'CLASSIFICATION_C2000',
       'CLASSIFICATION_C2100', 'CLASSIFICATION_C3000', 'CLASSIFICATION_C7000',
       'CLASSIFICATION_Other', 'USE_CASE_CommunityServ', 'USE_CASE_Heathcare',
       'USE_CASE_Other', 'USE_CASE_Preservation', 'USE_CASE_ProductDev',
       'ORGANIZATION_Association', 'ORGANIZATION_Co-operative',
       'ORGANIZATION_Corporation', 'ORGANIZATION_Trust', 'INCOME_AMT_0',
       'INCOME_AMT_1-9999', 'INCOME_AMT_10000-24999',
       'INCOME_AMT_100000-499999', 'INCOME_AMT_10M-50M', 'INCOME_AMT_1M-5M',
       'INCOME_AMT_25000-99999', 'INCOME_AMT_50M+', 'INCOME_AMT_5M-10M',
       'SPECIAL_CONSIDERATIONS_N', 'SPECIAL_CONSIDERATIONS_Y']

# Convert specified columns from boolean to integer
application_df_encoded[columns_to_convert] = application_df_encoded[columns_to_convert].astype(int)

print(application_df_encoded)

       STATUS   ASK_AMT  IS_SUCCESSFUL  APPLICATION_TYPE_Other  \
0           1      5000              1                       0   
1           1    108590              1                       0   
2           1      5000              0                       0   
3           1      6692              1                       0   
4           1    142590              1                       0   
...       ...       ...            ...                     ...   
34294       1      5000              0                       0   
34295       1      5000              0                       0   
34296       1      5000              0                       0   
34297       1      5000              1                       0   
34298       1  36500179              0                       0   

       APPLICATION_TYPE_T10  APPLICATION_TYPE_T19  APPLICATION_TYPE_T3  \
0                         1                     0                    0   
1                         0                     0          

In [22]:
column_types = application_df_encoded.dtypes
print(column_types)

STATUS                          int64
ASK_AMT                         int64
IS_SUCCESSFUL                   int64
APPLICATION_TYPE_Other          int64
APPLICATION_TYPE_T10            int64
APPLICATION_TYPE_T19            int64
APPLICATION_TYPE_T3             int64
APPLICATION_TYPE_T4             int64
APPLICATION_TYPE_T5             int64
APPLICATION_TYPE_T6             int64
APPLICATION_TYPE_T7             int64
APPLICATION_TYPE_T8             int64
AFFILIATION_CompanySponsored    int64
AFFILIATION_Family/Parent       int64
AFFILIATION_Independent         int64
AFFILIATION_National            int64
AFFILIATION_Other               int64
AFFILIATION_Regional            int64
CLASSIFICATION_C1000            int64
CLASSIFICATION_C1200            int64
CLASSIFICATION_C2000            int64
CLASSIFICATION_C2100            int64
CLASSIFICATION_C3000            int64
CLASSIFICATION_C7000            int64
CLASSIFICATION_Other            int64
USE_CASE_CommunityServ          int64
USE_CASE_Hea

In [23]:
# Split our preprocessed data into our features and target arrays
# Identify target variable
#target_variable = 'IS_SUCCESSFUL'

# Create target array (y)
#y = application_df_encoded[target_variable].values

# Create features array (X) by dropping target variable column
#X = application_df_encoded.drop(columns=[target_variable]).values

# Split the preprocessed data into a training and testing dataset
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [24]:
#Scale only columns with continuous values

# Identify columns to scale
columns_to_scale = ['ASK_AMT']

# Extract values of columns to scale
data_to_scale = application_df_encoded[columns_to_scale].values

In [25]:
#Scale extracted data, then replace original values with scaled values in DataFrame

# Initialize StandardScaler
scaler = StandardScaler()

# Fit and transform selected columns
scaled_data = scaler.fit_transform(data_to_scale)

# Replace original values with scaled values in application_df_encoded DataFrame
application_df_encoded[columns_to_scale] = scaled_data

In [26]:
# Check data types of all columns in DataFrames
column_types = application_df_encoded.dtypes
print(column_types)

STATUS                            int64
ASK_AMT                         float64
IS_SUCCESSFUL                     int64
APPLICATION_TYPE_Other            int64
APPLICATION_TYPE_T10              int64
APPLICATION_TYPE_T19              int64
APPLICATION_TYPE_T3               int64
APPLICATION_TYPE_T4               int64
APPLICATION_TYPE_T5               int64
APPLICATION_TYPE_T6               int64
APPLICATION_TYPE_T7               int64
APPLICATION_TYPE_T8               int64
AFFILIATION_CompanySponsored      int64
AFFILIATION_Family/Parent         int64
AFFILIATION_Independent           int64
AFFILIATION_National              int64
AFFILIATION_Other                 int64
AFFILIATION_Regional              int64
CLASSIFICATION_C1000              int64
CLASSIFICATION_C1200              int64
CLASSIFICATION_C2000              int64
CLASSIFICATION_C2100              int64
CLASSIFICATION_C3000              int64
CLASSIFICATION_C7000              int64
CLASSIFICATION_Other              int64


In [27]:
# Split preprocessed data into features and target arrays
# Identify target variable
target_variable = 'IS_SUCCESSFUL'

# Create target array (y)
y = application_df_encoded[target_variable].values

# Create features array (X) by dropping target variable column
X = application_df_encoded.drop(columns=[target_variable]).values

In [28]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [29]:
#Create scaler instance, fit scaler, scale data
# Colab keeps giving NameError: name 'sklearn' is not defined and 'skl' is not defined,
#so import again
#from sklearn.model_selection import train_test_split
#from sklearn.preprocessing import StandardScaler

# Create scaler instance, fit scaler, scale data
X_scaler = StandardScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Compile, Train and Evaluate the Model

In [37]:
#Compile, fit, and train model with callback to save model's weights every five epochs

import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np

# Create Keras Sequential model
nn_model = tf.keras.models.Sequential()

# Define model
#Original model
#nn_model.add(tf.keras.layers.Dense(units=6, activation="relu", input_dim=44))
#nn_model.add(tf.keras.layers.Dense(units=6, activation="relu"))
#nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

#Optimization Attempt 1: Add more neurons to a hidden layer
#nn_model.add(tf.keras.layers.Dense(units=12, activation="relu", input_dim=44))
#nn_model.add(tf.keras.layers.Dense(units=12, activation="relu"))
#nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

#Optimization Attempt 2: Add more hidden layers
nn_model.add(tf.keras.layers.Dense(units=12, activation="relu", input_dim=44))
nn_model.add(tf.keras.layers.Dense(units=12, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=12, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=12, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

#Optimization Attempt 3: Add number of epochs to training regimen
#Scroll to bottom of cell

# Check structure of Sequential model
nn_model.summary()

# Compile model with accuracy metric
nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define callback to save model's weights every five epochs
checkpoint_path = "model_weights.h5"
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True,
    save_freq=5 * len(X_train),)  # save every 5 epochs (converted to number of samples)

#Optimization attempt 3: Add number of epochs to training regimen
# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)

# Add the early stopping callback to the callbacks list
callbacks_list = [checkpoint_callback, early_stopping]

# Train model
history = nn_model.fit(
    X_train, y_train,
    epochs=100,
    validation_data=(X_test, y_test),
    callbacks=callbacks_list)

# Access and print details from callbacks after training
print("Model weights saved at:", checkpoint_callback.filepath)
print("Early stopping was triggered:", early_stopping.stopped_epoch)

# Access callbacks from model history (note: not best practice)
# demonstrates that callbacks are referenced in model's history
print("Early stopping was triggered at epoch:", early_stopping.stopped_epoch)


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_18 (Dense)            (None, 12)                540       
                                                                 
 dense_19 (Dense)            (None, 12)                156       
                                                                 
 dense_20 (Dense)            (None, 12)                156       
                                                                 
 dense_21 (Dense)            (None, 12)                156       
                                                                 
 dense_22 (Dense)            (None, 1)                 13        
                                                                 
Total params: 1021 (3.99 KB)
Trainable params: 1021 (3.99 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/100
Epoch 2/10

In [36]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 1s - loss: 1.5297 - accuracy: 0.6106 - 964ms/epoch - 4ms/step
Loss: 1.529732346534729, Accuracy: 0.6106122732162476


In [None]:
# Export our model to HDF5 file
#import h5py

# Specify filename
#filename = "results.h5"

# Extract values from history object
#history_dict = {
#    'loss': history.history['loss'],
#    'accuracy': history.history['accuracy'],
#    'val_loss': history.history['val_loss'],
#    'val_accuracy': history.history['val_accuracy']}

# Create new HDF5 file
#with h5py.File(filename, 'w') as f:
    # Save each dataset in the history dictionary
#    for key, value in history_dict.items():
#        f.create_dataset(key, data=value)

In [None]:
#print("HDF5 file exported successfully:", filename)

HDF5 file exported successfully: results.h5
Mounted at /content/drive
