In [1]:
# Dependencies
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

# File to Load 
customer_data_to_load = Path("Resources/customer_churn.csv")

# Read Data File and store into Pandas DataFrames
churn_df = pd.read_csv(customer_data_to_load)

# View the dataset
churn_df.head()

Unnamed: 0,Names,Age,Total_Purchase,Account_Manager,Years,Num_Sites,Onboard_date,Location,Company,Churn
0,Cameron Williams,42,11066.8,0,7.22,8,8/30/2013 7:00,"10265 Elizabeth Mission Barkerburgh, AK 89518",Harvey LLC,1
1,Kevin Mueller,41,11916.22,0,6.5,11,8/13/2013 0:38,"6157 Frank Gardens Suite 019 Carloshaven, RI 1...",Wilson PLC,1
2,Eric Lozano,38,12884.75,0,6.67,12,6/29/2016 6:20,"1331 Keith Court Alyssahaven, DE 90114","Miller, Johnson and Wallace",1
3,Phillip White,42,8010.76,0,6.71,10,4/22/2014 12:43,"13120 Daniel Mount Angelabury, WY 30645-4695",Smith Inc,1
4,Cynthia Norton,37,9191.58,0,5.56,9,1/19/2016 15:31,"765 Tricia Row Karenshire, MH 71730",Love-Jones,1


In [2]:
# Changing column names to make data cleaner
churn_df = churn_df.rename(columns = {'Total_Purchase': 'Total Purchase', 'Account_Manager' : 'Account Manager', 'Num_Sites': 'Number of Websites Used', 'Onboard_date' : 'Onboard Date'})

# View updated column names
churn_df.head()

Unnamed: 0,Names,Age,Total Purchase,Account Manager,Years,Number of Websites Used,Onboard Date,Location,Company,Churn
0,Cameron Williams,42,11066.8,0,7.22,8,8/30/2013 7:00,"10265 Elizabeth Mission Barkerburgh, AK 89518",Harvey LLC,1
1,Kevin Mueller,41,11916.22,0,6.5,11,8/13/2013 0:38,"6157 Frank Gardens Suite 019 Carloshaven, RI 1...",Wilson PLC,1
2,Eric Lozano,38,12884.75,0,6.67,12,6/29/2016 6:20,"1331 Keith Court Alyssahaven, DE 90114","Miller, Johnson and Wallace",1
3,Phillip White,42,8010.76,0,6.71,10,4/22/2014 12:43,"13120 Daniel Mount Angelabury, WY 30645-4695",Smith Inc,1
4,Cynthia Norton,37,9191.58,0,5.56,9,1/19/2016 15:31,"765 Tricia Row Karenshire, MH 71730",Love-Jones,1


In [3]:
# Summarize the data
churn_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Names                    900 non-null    object 
 1   Age                      900 non-null    int64  
 2   Total Purchase           900 non-null    float64
 3   Account Manager          900 non-null    int64  
 4   Years                    900 non-null    float64
 5   Number of Websites Used  900 non-null    int64  
 6   Onboard Date             900 non-null    object 
 7   Location                 900 non-null    object 
 8   Company                  900 non-null    object 
 9   Churn                    900 non-null    int64  
dtypes: float64(2), int64(4), object(4)
memory usage: 70.4+ KB


In [4]:
# Drop the non-beneficial ID columns.
churn_df = churn_df.drop(["Names","Onboard Date","Location","Company"],1)
churn_df.head()

Unnamed: 0,Age,Total Purchase,Account Manager,Years,Number of Websites Used,Churn
0,42,11066.8,0,7.22,8,1
1,41,11916.22,0,6.5,11,1
2,38,12884.75,0,6.67,12,1
3,42,8010.76,0,6.71,10,1
4,37,9191.58,0,5.56,9,1


In [5]:
# Determine the number of unique values in each column.
churn_df.nunique()

Age                         36
Total Purchase             900
Account Manager              2
Years                      418
Number of Websites Used     12
Churn                        2
dtype: int64

In [6]:
# Split our preprocessed data into our features and target arrays
y = churn_df['Churn'].values

# Separate the X variable, the features
x = churn_df.drop(columns='Churn').values

# Split the preprocessed data into a training and testing dataset
from sklearn.model_selection import train_test_split

# Split the data using train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)

In [7]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
x_scaler = scaler.fit(x_train)

# Scale the data
x_train_scaled = x_scaler.transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

In [8]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(x_train[0])
hidden_nodes_layer1 =  500
hidden_nodes_layer2 = 300

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="sigmoid"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="sigmoid"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 500)               3000      
                                                                 
 dense_1 (Dense)             (None, 300)               150300    
                                                                 
 dense_2 (Dense)             (None, 1)                 301       
                                                                 
Total params: 153601 (600.00 KB)
Trainable params: 153601 (600.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [9]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [10]:
# Train the model
fit_model = nn.fit(x_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [11]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(x_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

8/8 - 0s - loss: 0.3094 - accuracy: 0.8711 - 212ms/epoch - 26ms/step
Loss: 0.3094363212585449, Accuracy: 0.8711110949516296


In [12]:
# Export our model to HDF5 file
nn.save('Churn_Optimization_3.h3')

INFO:tensorflow:Assets written to: Churn_Optimization_3.h3\assets


INFO:tensorflow:Assets written to: Churn_Optimization_3.h3\assets
