# Neural Network Model


In [3]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
import pandas as pd

# Import and read the csv.
df = pd.read_csv("../ML_Data_&_Preprocessing/b3_df_nonEncoded.csv", index_col=[0])
df.head()



Unnamed: 0,state,spend,Impressions,leads,link_clicks,reach,Agency Tiers
0,IL,47.33,1780,0,7,1689,4.0
1,NY,63.65,1857,1,10,1737,4.0
2,OK,32.53,1718,0,7,1527,4.0
3,SC,32.31,1725,1,14,1645,4.0
4,CA,101.13,3745,1,28,3513,4.0


In [4]:
df['leads'].value_counts()

1    21698
0    12272
Name: leads, dtype: int64

In [5]:
# check on different state values for testing 
df['state'].value_counts()

CA    7685
TX    4254
VA    3239
CO    2386
FL    1758
MI    1655
GA    1620
NY    1486
NC    1050
TN     970
WA     960
NJ     919
CT     893
OH     842
MO     777
MN     628
IL     605
MA     454
SC     365
OK     365
MD     357
DC     250
SD     246
NE     173
PA      33
Name: state, dtype: int64

In [6]:
df2 = df.loc[df['state']== 'MN']
df2

Unnamed: 0,state,spend,Impressions,leads,link_clicks,reach,Agency Tiers
15298,MN,34.31,1297,1,10,1222,
15303,MN,0.00,0,0,0,0,
15305,MN,11.52,332,0,1,311,
15352,MN,8.68,416,0,7,402,
15359,MN,0.00,0,0,0,0,
...,...,...,...,...,...,...,...
32933,MN,34.18,1399,1,16,1220,
32978,MN,35.09,1734,1,19,1569,
33009,MN,34.70,1489,1,13,1402,
33062,MN,35.18,1867,1,24,1702,


In [7]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 628 entries, 15298 to 33072
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   state         628 non-null    object 
 1   spend         628 non-null    float64
 2   Impressions   628 non-null    int64  
 3   leads         628 non-null    int64  
 4   link_clicks   628 non-null    int64  
 5   reach         628 non-null    int64  
 6   Agency Tiers  0 non-null      float64
dtypes: float64(2), int64(4), object(1)
memory usage: 39.2+ KB


In [8]:
df3 = df2.drop(df2.columns[[0,6]], axis=1)

In [9]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 628 entries, 15298 to 33072
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   spend        628 non-null    float64
 1   Impressions  628 non-null    int64  
 2   leads        628 non-null    int64  
 3   link_clicks  628 non-null    int64  
 4   reach        628 non-null    int64  
dtypes: float64(1), int64(4)
memory usage: 29.4 KB


# Split into train and test features

In [10]:
# Split our preprocessed data into our features and target arrays
y = df3['leads'].values
X = df3.drop(['leads'], 1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

  This is separate from the ipykernel package so we can avoid doing imports until


In [11]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Compile, Train, Evaluate our Model 1 - NN

In [12]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
# using multi-layer perceptron (two layers)
numInputFeatures = len(X_train[0])

# I have X amount of columns
# layer1 = input layer, typically equals number of input variables in data
layer1 = 30
# layer 2 = hidden layer, typically 2/3 of input layer
layer2 = 15
# layer 3 = hidden layer
layer3= 2

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=layer1, 
                          input_dim=numInputFeatures, 
                          activation="hard_sigmoid")
)
# Second hidden layer
nn.add(tf.keras.layers.Dense(units=layer2, 
                             activation="elu"))

# adding a third layer to increase accuracy 
nn.add(tf.keras.layers.Dense(units=layer3, 
                             activation='elu'))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="hard_sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 30)                150       
                                                                 
 dense_1 (Dense)             (None, 15)                465       
                                                                 
 dense_2 (Dense)             (None, 2)                 32        
                                                                 
 dense_3 (Dense)             (None, 1)                 3         
                                                                 
Total params: 650
Trainable params: 650
Non-trainable params: 0
_________________________________________________________________


In [13]:
# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    # checkpoint directory and file structure defined above
    filepath=checkpoint_path,
    # notified when checkpoint is being saved to the directory
    verbose=1,
    # checkpoint files take small space
    save_weights_only=True,
    # checkpoints saved every epoch
    save_freq='epoch')

In [14]:
# Train the model
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])

Epoch 1/100
 1/15 [=>............................] - ETA: 5s - loss: 0.6932 - accuracy: 0.5625
Epoch 1: saving model to checkpoints\weights.01.hdf5
Epoch 2/100
 1/15 [=>............................] - ETA: 0s - loss: 0.6388 - accuracy: 0.7500
Epoch 2: saving model to checkpoints\weights.02.hdf5
Epoch 3/100
 1/15 [=>............................] - ETA: 0s - loss: 0.6908 - accuracy: 0.5000
Epoch 3: saving model to checkpoints\weights.03.hdf5
Epoch 4/100
 1/15 [=>............................] - ETA: 0s - loss: 0.6609 - accuracy: 0.6250
Epoch 4: saving model to checkpoints\weights.04.hdf5
Epoch 5/100
 1/15 [=>............................] - ETA: 0s - loss: 0.6739 - accuracy: 0.5312
Epoch 5: saving model to checkpoints\weights.05.hdf5
Epoch 6/100
 1/15 [=>............................] - ETA: 0s - loss: 0.6558 - accuracy: 0.6250
Epoch 6: saving model to checkpoints\weights.06.hdf5
Epoch 7/100
 1/15 [=>............................] - ETA: 0s - loss: 0.6548 - accuracy: 0.6562
Epoch 7: saving m

Epoch 36/100
 1/15 [=>............................] - ETA: 0s - loss: 0.5653 - accuracy: 0.6562
Epoch 36: saving model to checkpoints\weights.36.hdf5
Epoch 37/100
 1/15 [=>............................] - ETA: 0s - loss: 0.5314 - accuracy: 0.7500
Epoch 37: saving model to checkpoints\weights.37.hdf5
Epoch 38/100
 1/15 [=>............................] - ETA: 0s - loss: 0.4770 - accuracy: 0.7500
Epoch 38: saving model to checkpoints\weights.38.hdf5
Epoch 39/100
 1/15 [=>............................] - ETA: 0s - loss: 0.4497 - accuracy: 0.8125
Epoch 39: saving model to checkpoints\weights.39.hdf5
Epoch 40/100
 1/15 [=>............................] - ETA: 0s - loss: 0.5622 - accuracy: 0.6562
Epoch 40: saving model to checkpoints\weights.40.hdf5
Epoch 41/100
 1/15 [=>............................] - ETA: 0s - loss: 0.5115 - accuracy: 0.6875
Epoch 41: saving model to checkpoints\weights.41.hdf5
Epoch 42/100
 1/15 [=>............................] - ETA: 0s - loss: 0.4699 - accuracy: 0.8125
Epoc

Epoch 71/100
 1/15 [=>............................] - ETA: 0s - loss: 0.4714 - accuracy: 0.7500
Epoch 71: saving model to checkpoints\weights.71.hdf5
Epoch 72/100
 1/15 [=>............................] - ETA: 0s - loss: 0.6149 - accuracy: 0.5938
Epoch 72: saving model to checkpoints\weights.72.hdf5
Epoch 73/100
 1/15 [=>............................] - ETA: 0s - loss: 0.5456 - accuracy: 0.6250
Epoch 73: saving model to checkpoints\weights.73.hdf5
Epoch 74/100
 1/15 [=>............................] - ETA: 0s - loss: 0.6567 - accuracy: 0.5312
Epoch 74: saving model to checkpoints\weights.74.hdf5
Epoch 75/100
 1/15 [=>............................] - ETA: 0s - loss: 0.4460 - accuracy: 0.7500
Epoch 75: saving model to checkpoints\weights.75.hdf5
Epoch 76/100
 1/15 [=>............................] - ETA: 0s - loss: 0.4476 - accuracy: 0.6875
Epoch 76: saving model to checkpoints\weights.76.hdf5
Epoch 77/100
 1/15 [=>............................] - ETA: 0s - loss: 0.4962 - accuracy: 0.6875
Epoc

In [15]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# when brand 1, all states, accuracy = 36%
# when brand 2, all states, accuracy = 36%
# when brand 3, all states, accuracy = 36%

# B1, CA = 78%
# B2, CA = 80%
# B3, CA = 77%

# B3, PA = 100%
# B3, TX = 76%



5/5 - 0s - loss: 0.4598 - accuracy: 0.7707 - 115ms/epoch - 23ms/step
Loss: 0.45979663729667664, Accuracy: 0.7707006335258484


### B3 Results

In [16]:
data_results_b3 = {
        'State': ['PA', 'CA', 'TX', 'VA', 'NY', 'NC', 'SD', 'IL', 'DC', 'MA', 'TN', 'MN'],
        'Predictive Accuracy': [1.0, .77, .76, .79, .84, .83, .95, .81, .84, .85, .75, .78],
        'Tier': ['T1', 'T4', 'T4', 'T4', 'T3', 'T3', 'T1', 'T2', 'T1', 'T2', 'T3', 'T2']}
data_results_b3

{'State': ['PA',
  'CA',
  'TX',
  'VA',
  'NY',
  'NC',
  'SD',
  'IL',
  'DC',
  'MA',
  'TN',
  'MN'],
 'Predictive Accuracy': [1.0,
  0.77,
  0.76,
  0.79,
  0.84,
  0.83,
  0.95,
  0.81,
  0.84,
  0.85,
  0.75,
  0.78],
 'Tier': ['T1',
  'T4',
  'T4',
  'T4',
  'T3',
  'T3',
  'T1',
  'T2',
  'T1',
  'T2',
  'T3',
  'T2']}

In [17]:
# create the new df to display brand, state, and model accuracy 
df_results_b3 = pd.DataFrame(data_results_b3)
df_results_b3

Unnamed: 0,State,Predictive Accuracy,Tier
0,PA,1.0,T1
1,CA,0.77,T4
2,TX,0.76,T4
3,VA,0.79,T4
4,NY,0.84,T3
5,NC,0.83,T3
6,SD,0.95,T1
7,IL,0.81,T2
8,DC,0.84,T1
9,MA,0.85,T2


In [18]:
df_results_b3.to_csv('b3_results')

In [None]:
# # Export the model to HDF5 file
# nn.save("AlphabetSoupCharity_optimization.h5")