# Neural Network Model


In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
import pandas as pd

# Import and read the csv.
# b1 view
df = pd.read_csv("../ML_Data_&_Preprocessing/b1_df_nonEncoded.csv", index_col=[0])
df.head()


Unnamed: 0,state,spend,Impressions,leads,link_clicks,reach,Agency Tiers
0,TN,8.96,492,0,0,483,
1,GA,15.92,797,0,3,773,
2,MO,11.16,836,1,2,798,
3,RI,8.8,759,0,2,729,
4,CA,5.83,500,0,0,476,


In [2]:
df['leads'].value_counts()

0    57153
1    45676
Name: leads, dtype: int64

In [3]:
# check on different state values for testing 
df['state'].value_counts()

TX    12200
CA     9216
OH     7480
TN     5689
FL     4675
CO     4518
IL     4425
PA     4360
MO     4318
NJ     3974
NC     3823
MI     3791
AZ     3660
GA     2751
MN     2409
WA     2303
WI     2004
NY     1978
OR     1800
MA     1737
LA     1603
CT     1491
IN     1310
KY     1242
VA     1148
KS     1098
IA      927
RI      908
MS      873
ME      747
NM      707
OK      613
SC      565
UT      451
ID      392
ND      387
AR      368
NV      338
AL      290
NE      260
Name: state, dtype: int64

In [84]:
df2 = df.loc[df['state']== 'LA']
df2

Unnamed: 0,state,spend,Impressions,leads,link_clicks,reach,Agency Tiers
73,LA,0.00,0,0,0,0,
81,LA,18.14,1013,0,7,931,
127,LA,12.10,1225,1,5,1123,
199,LA,12.13,543,0,7,511,
200,LA,8.08,706,0,3,664,
...,...,...,...,...,...,...,...
41989,LA,7.81,695,0,1,656,
41997,LA,7.18,564,0,4,545,
41998,LA,6.90,533,0,1,513,
42008,LA,3.29,284,1,1,281,


In [85]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1603 entries, 73 to 42009
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   state         1603 non-null   object 
 1   spend         1603 non-null   float64
 2   Impressions   1603 non-null   int64  
 3   leads         1603 non-null   int64  
 4   link_clicks   1603 non-null   int64  
 5   reach         1603 non-null   int64  
 6   Agency Tiers  0 non-null      float64
dtypes: float64(2), int64(4), object(1)
memory usage: 100.2+ KB


In [86]:
df3 = df2.drop(df2.columns[[0,6]], axis=1)

In [87]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1603 entries, 73 to 42009
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   spend        1603 non-null   float64
 1   Impressions  1603 non-null   int64  
 2   leads        1603 non-null   int64  
 3   link_clicks  1603 non-null   int64  
 4   reach        1603 non-null   int64  
dtypes: float64(1), int64(4)
memory usage: 75.1 KB


# Split into train and test features

In [88]:
# Split our preprocessed data into our features and target arrays
y = df3['leads'].values
X = df3.drop(['leads'], 1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

  This is separate from the ipykernel package so we can avoid doing imports until


In [89]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Compile, Train, Evaluate our Model 1 - NN

In [90]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
# using multi-layer perceptron (two layers)
numInputFeatures = len(X_train[0])

# I have X amount of columns
# layer1 = input layer, typically equals number of input variables in data
layer1 = 30
# layer 2 = hidden layer, typically 2/3 of input layer
layer2 = 15
# layer 3 = hidden layer
layer3= 2

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=layer1, 
                          input_dim=numInputFeatures, 
                          activation="hard_sigmoid")
)
# Second hidden layer
nn.add(tf.keras.layers.Dense(units=layer2, 
                             activation="elu"))

# adding a third layer to increase accuracy 
nn.add(tf.keras.layers.Dense(units=layer3, 
                             activation='elu'))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="hard_sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_28 (Dense)            (None, 30)                150       
                                                                 
 dense_29 (Dense)            (None, 15)                465       
                                                                 
 dense_30 (Dense)            (None, 2)                 32        
                                                                 
 dense_31 (Dense)            (None, 1)                 3         
                                                                 
Total params: 650
Trainable params: 650
Non-trainable params: 0
_________________________________________________________________


In [91]:
# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    # checkpoint directory and file structure defined above
    filepath=checkpoint_path,
    # notified when checkpoint is being saved to the directory
    verbose=1,
    # checkpoint files take small space
    save_weights_only=True,
    # checkpoints saved every epoch
    save_freq='epoch')

In [92]:
# Train the model
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])

Epoch 1/100
 1/38 [..............................] - ETA: 10s - loss: 0.7717 - accuracy: 0.4375
Epoch 1: saving model to checkpoints\weights.01.hdf5
Epoch 2/100
 1/38 [..............................] - ETA: 0s - loss: 0.6623 - accuracy: 0.5625
Epoch 2: saving model to checkpoints\weights.02.hdf5
Epoch 3/100
 1/38 [..............................] - ETA: 0s - loss: 0.7046 - accuracy: 0.5312
Epoch 3: saving model to checkpoints\weights.03.hdf5
Epoch 4/100
 1/38 [..............................] - ETA: 0s - loss: 0.4753 - accuracy: 0.8750
Epoch 4: saving model to checkpoints\weights.04.hdf5
Epoch 5/100
 1/38 [..............................] - ETA: 0s - loss: 0.4929 - accuracy: 0.7500
Epoch 5: saving model to checkpoints\weights.05.hdf5
Epoch 6/100
 1/38 [..............................] - ETA: 0s - loss: 0.5132 - accuracy: 0.6562
Epoch 6: saving model to checkpoints\weights.06.hdf5
Epoch 7/100
 1/38 [..............................] - ETA: 0s - loss: 0.3902 - accuracy: 0.7812
Epoch 7: saving 

Epoch 36/100
 1/38 [..............................] - ETA: 0s - loss: 0.4703 - accuracy: 0.6250
Epoch 36: saving model to checkpoints\weights.36.hdf5
Epoch 37/100
 1/38 [..............................] - ETA: 0s - loss: 0.4491 - accuracy: 0.7812
Epoch 37: saving model to checkpoints\weights.37.hdf5
Epoch 38/100
 1/38 [..............................] - ETA: 0s - loss: 0.4593 - accuracy: 0.7188
Epoch 38: saving model to checkpoints\weights.38.hdf5
Epoch 39/100
 1/38 [..............................] - ETA: 0s - loss: 0.4461 - accuracy: 0.7188
Epoch 39: saving model to checkpoints\weights.39.hdf5
Epoch 40/100
 1/38 [..............................] - ETA: 0s - loss: 0.3520 - accuracy: 0.8125
Epoch 40: saving model to checkpoints\weights.40.hdf5
Epoch 41/100
 1/38 [..............................] - ETA: 0s - loss: 0.2973 - accuracy: 0.8750
Epoch 41: saving model to checkpoints\weights.41.hdf5
Epoch 42/100
 1/38 [..............................] - ETA: 0s - loss: 0.4443 - accuracy: 0.7500
Epoc

Epoch 70/100
 1/38 [..............................] - ETA: 0s - loss: 0.3954 - accuracy: 0.8125
Epoch 70: saving model to checkpoints\weights.70.hdf5
Epoch 71/100
 1/38 [..............................] - ETA: 0s - loss: 0.4092 - accuracy: 0.7500
Epoch 71: saving model to checkpoints\weights.71.hdf5
Epoch 72/100
 1/38 [..............................] - ETA: 0s - loss: 0.3888 - accuracy: 0.7812
Epoch 72: saving model to checkpoints\weights.72.hdf5
Epoch 73/100
 1/38 [..............................] - ETA: 0s - loss: 0.3617 - accuracy: 0.8438
Epoch 73: saving model to checkpoints\weights.73.hdf5
Epoch 74/100
 1/38 [..............................] - ETA: 0s - loss: 0.4151 - accuracy: 0.8438
Epoch 74: saving model to checkpoints\weights.74.hdf5
Epoch 75/100
 1/38 [..............................] - ETA: 0s - loss: 0.5970 - accuracy: 0.6250
Epoch 75: saving model to checkpoints\weights.75.hdf5
Epoch 76/100
 1/38 [..............................] - ETA: 0s - loss: 0.3666 - accuracy: 0.8438
Epoc

In [93]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# when brand 1, all states, accuracy = 36%
# when brand 2, all states, accuracy = 36%
# when brand 3, all states, accuracy = 36%

# B1, CA = 78%
# B2, CA = 80%
# B3, CA = 77%

# B3, PA = 100%
# B3, TX = 76%



13/13 - 0s - loss: 0.4908 - accuracy: 0.7157 - 108ms/epoch - 8ms/step
Loss: 0.49083954095840454, Accuracy: 0.7157106995582581


### B1 Results

In [94]:
data_results_b1 = {
        'State': ['CA', 'TX', 'NE', 'AL', 'NJ', 'NC', 'OR', 'LA'],
        'Predictive Accuracy': [.78, .74, .89, .84, .74, .74, .87, .71],
        'Tier': ['T1', 'T1', 'T4', 'T4', 'T3', 'T3', 'T2', 'T2']}
data_results_b1

{'State': ['CA', 'TX', 'NE', 'AL', 'NJ', 'NC', 'OR', 'LA'],
 'Predictive Accuracy': [0.78, 0.74, 0.89, 0.84, 0.74, 0.74, 0.87, 0.71],
 'Tier': ['T1', 'T1', 'T4', 'T4', 'T3', 'T3', 'T2', 'T2']}

In [95]:
# create the new df to display brand, state, and model accuracy 
df_results_b1 = pd.DataFrame(data_results_b1)
df_results_b1

Unnamed: 0,State,Predictive Accuracy,Tier
0,CA,0.78,T1
1,TX,0.74,T1
2,NE,0.89,T4
3,AL,0.84,T4
4,NJ,0.74,T3
5,NC,0.74,T3
6,OR,0.87,T2
7,LA,0.71,T2


In [None]:
# # Export the model to HDF5 file
# nn.save("AlphabetSoupCharity_optimization.h5")