# Neural Network Model


In [3]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
import pandas as pd

# # Import and read the csv.
# b2 view
df = pd.read_csv("../ML_Data_&_Preprocessing/b2_df_nonEncoded.csv", index_col=[0])
df.head()

Unnamed: 0,state,spend,Impressions,leads,link_clicks,reach,Agency Tiers
0,IL,47.33,1780,0,7,1689,4.0
1,NY,63.65,1857,1,10,1737,4.0
2,OK,32.53,1718,0,7,1527,4.0
3,SC,32.31,1725,1,14,1645,4.0
4,CA,101.13,3745,1,28,3513,4.0


In [4]:
df['leads'].value_counts()

1    21698
0    12272
Name: leads, dtype: int64

In [5]:
# check on different state values for testing 
df['state'].value_counts()

CA    7685
TX    4254
VA    3239
CO    2386
FL    1758
MI    1655
GA    1620
NY    1486
NC    1050
TN     970
WA     960
NJ     919
CT     893
OH     842
MO     777
MN     628
IL     605
MA     454
SC     365
OK     365
MD     357
DC     250
SD     246
NE     173
PA      33
Name: state, dtype: int64

In [104]:
df2 = df.loc[df['state']== 'SD']
df2

Unnamed: 0,state,spend,Impressions,leads,link_clicks,reach,Agency Tiers
27429,SD,0.00,0,0,0,0,
27430,SD,47.75,3774,1,18,3507,
27431,SD,0.00,0,0,0,0,
27451,SD,0.00,0,0,0,0,
27452,SD,47.22,3483,1,29,3222,
...,...,...,...,...,...,...,...
32957,SD,26.24,1624,1,36,1519,
33007,SD,14.72,974,1,29,934,
33008,SD,36.69,2453,1,75,2159,
33043,SD,53.53,3144,1,83,2824,


In [105]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 246 entries, 27429 to 33087
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   state         246 non-null    object 
 1   spend         246 non-null    float64
 2   Impressions   246 non-null    int64  
 3   leads         246 non-null    int64  
 4   link_clicks   246 non-null    int64  
 5   reach         246 non-null    int64  
 6   Agency Tiers  0 non-null      float64
dtypes: float64(2), int64(4), object(1)
memory usage: 15.4+ KB


In [106]:
df3 = df2.drop(df2.columns[[0,6]], axis=1)

In [107]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 246 entries, 27429 to 33087
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   spend        246 non-null    float64
 1   Impressions  246 non-null    int64  
 2   leads        246 non-null    int64  
 3   link_clicks  246 non-null    int64  
 4   reach        246 non-null    int64  
dtypes: float64(1), int64(4)
memory usage: 11.5 KB


# Split into train and test features

In [108]:
# Split our preprocessed data into our features and target arrays
y = df3['leads'].values
X = df3.drop(['leads'], 1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

  This is separate from the ipykernel package so we can avoid doing imports until


In [109]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Compile, Train, Evaluate our Model 1 - NN

In [110]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
# using multi-layer perceptron (two layers)
numInputFeatures = len(X_train[0])

# I have X amount of columns
# layer1 = input layer, typically equals number of input variables in data
layer1 = 30
# layer 2 = hidden layer, typically 2/3 of input layer
layer2 = 15
# layer 3 = hidden layer
layer3= 2

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=layer1, 
                          input_dim=numInputFeatures, 
                          activation="hard_sigmoid")
)
# Second hidden layer
nn.add(tf.keras.layers.Dense(units=layer2, 
                             activation="elu"))

# adding a third layer to increase accuracy 
nn.add(tf.keras.layers.Dense(units=layer3, 
                             activation='elu'))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="hard_sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_36 (Dense)            (None, 30)                150       
                                                                 
 dense_37 (Dense)            (None, 15)                465       
                                                                 
 dense_38 (Dense)            (None, 2)                 32        
                                                                 
 dense_39 (Dense)            (None, 1)                 3         
                                                                 
Total params: 650
Trainable params: 650
Non-trainable params: 0
_________________________________________________________________


In [111]:
# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    # checkpoint directory and file structure defined above
    filepath=checkpoint_path,
    # notified when checkpoint is being saved to the directory
    verbose=1,
    # checkpoint files take small space
    save_weights_only=True,
    # checkpoints saved every epoch
    save_freq='epoch')

In [112]:
# Train the model
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])

Epoch 1/100
1/6 [====>.........................] - ETA: 1s - loss: 0.7368 - accuracy: 0.3125
Epoch 1: saving model to checkpoints\weights.01.hdf5
Epoch 2/100
1/6 [====>.........................] - ETA: 0s - loss: 0.6696 - accuracy: 0.6562
Epoch 2: saving model to checkpoints\weights.02.hdf5
Epoch 3/100
1/6 [====>.........................] - ETA: 0s - loss: 0.6257 - accuracy: 0.9062
Epoch 3: saving model to checkpoints\weights.03.hdf5
Epoch 4/100
1/6 [====>.........................] - ETA: 0s - loss: 0.6077 - accuracy: 0.7188
Epoch 4: saving model to checkpoints\weights.04.hdf5
Epoch 5/100
1/6 [====>.........................] - ETA: 0s - loss: 0.6128 - accuracy: 0.6250
Epoch 5: saving model to checkpoints\weights.05.hdf5
Epoch 6/100
1/6 [====>.........................] - ETA: 0s - loss: 0.5996 - accuracy: 0.8750
Epoch 6: saving model to checkpoints\weights.06.hdf5
Epoch 7/100
1/6 [====>.........................] - ETA: 0s - loss: 0.5724 - accuracy: 0.8750
Epoch 7: saving model to checkp

1/6 [====>.........................] - ETA: 0s - loss: 0.3971 - accuracy: 0.8125
Epoch 36: saving model to checkpoints\weights.36.hdf5
Epoch 37/100
1/6 [====>.........................] - ETA: 0s - loss: 0.2938 - accuracy: 0.9062
Epoch 37: saving model to checkpoints\weights.37.hdf5
Epoch 38/100
1/6 [====>.........................] - ETA: 0s - loss: 0.2422 - accuracy: 0.9375
Epoch 38: saving model to checkpoints\weights.38.hdf5
Epoch 39/100
1/6 [====>.........................] - ETA: 0s - loss: 0.2819 - accuracy: 0.9062
Epoch 39: saving model to checkpoints\weights.39.hdf5
Epoch 40/100
1/6 [====>.........................] - ETA: 0s - loss: 0.3020 - accuracy: 0.8750
Epoch 40: saving model to checkpoints\weights.40.hdf5
Epoch 41/100
1/6 [====>.........................] - ETA: 0s - loss: 0.4601 - accuracy: 0.7500
Epoch 41: saving model to checkpoints\weights.41.hdf5
Epoch 42/100
1/6 [====>.........................] - ETA: 0s - loss: 0.4381 - accuracy: 0.7812
Epoch 42: saving model to check

1/6 [====>.........................] - ETA: 0s - loss: 0.2661 - accuracy: 0.9062
Epoch 71: saving model to checkpoints\weights.71.hdf5
Epoch 72/100
1/6 [====>.........................] - ETA: 0s - loss: 0.3544 - accuracy: 0.8438
Epoch 72: saving model to checkpoints\weights.72.hdf5
Epoch 73/100
1/6 [====>.........................] - ETA: 0s - loss: 0.3843 - accuracy: 0.7812
Epoch 73: saving model to checkpoints\weights.73.hdf5
Epoch 74/100
1/6 [====>.........................] - ETA: 0s - loss: 0.3817 - accuracy: 0.8750
Epoch 74: saving model to checkpoints\weights.74.hdf5
Epoch 75/100
1/6 [====>.........................] - ETA: 0s - loss: 0.4020 - accuracy: 0.8438
Epoch 75: saving model to checkpoints\weights.75.hdf5
Epoch 76/100
1/6 [====>.........................] - ETA: 0s - loss: 0.2442 - accuracy: 0.9375
Epoch 76: saving model to checkpoints\weights.76.hdf5
Epoch 77/100
1/6 [====>.........................] - ETA: 0s - loss: 0.4259 - accuracy: 0.7812
Epoch 77: saving model to check

In [113]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# when brand 1, all states, accuracy = 36%
# when brand 2, all states, accuracy = 36%
# when brand 3, all states, accuracy = 36%

# FL = .84%
# 


2/2 - 0s - loss: 0.2452 - accuracy: 0.9355 - 86ms/epoch - 43ms/step
Loss: 0.24518010020256042, Accuracy: 0.9354838728904724


### B2 Results

#### B2

In [114]:
data_results_b2 = {
        'State': ['CA', 'FL', 'VA', 'GA', 'NY', 'NC', 'WA', 'NJ', 'CT', 'PA', 'NE', 'SD'],
        'Predictive Accuracy': [.80, .84, .78, .85, .83, .84, .78, .79, .81, 1.0, .86, .95 ],
        'Tier': ['T1', 'T1', 'T1', 'T2', 'T2', 'T2','T3', 'T3', 'T3', 'T4', 'T4', 'T4']}
data_results_b2

{'State': ['CA',
  'FL',
  'VA',
  'GA',
  'NY',
  'NC',
  'WA',
  'NJ',
  'CT',
  'PA',
  'NE',
  'SD'],
 'Predictive Accuracy': [0.8,
  0.84,
  0.78,
  0.85,
  0.83,
  0.84,
  0.78,
  0.79,
  0.81,
  1.0,
  0.86,
  0.95],
 'Tier': ['T1',
  'T1',
  'T1',
  'T2',
  'T2',
  'T2',
  'T3',
  'T3',
  'T3',
  'T4',
  'T4',
  'T4']}

In [115]:
# create the new df to display brand, state, and model accuracy 
df_results_b2 = pd.DataFrame(data_results_b2)
df_results_b2

Unnamed: 0,State,Predictive Accuracy,Tier
0,CA,0.8,T1
1,FL,0.84,T1
2,VA,0.78,T1
3,GA,0.85,T2
4,NY,0.83,T2
5,NC,0.84,T2
6,WA,0.78,T3
7,NJ,0.79,T3
8,CT,0.81,T3
9,PA,1.0,T4


In [116]:
df_results_b2.to_csv('b2_results')

In [None]:
# # Export the model to HDF5 file
# nn.save("AlphabetSoupCharity_optimization.h5")