## DNN - Keras and Tensorflow

In [8]:
# Import libraries
import os
import time
import datetime

# data science libraries
import numpy as np
import scipy as sp
import pandas as pd

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns

# Plots to appear in the Notebook
%matplotlib inline

# SciKit Learn libraries 
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler 
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

from sklearn.manifold import TSNE

from sklearn.decomposition import PCA
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import FastICA

from sklearn.random_projection import GaussianRandomProjection
from sklearn.random_projection import SparseRandomProjection

# lime
import lime
import lime.lime_tabular

In [9]:
#setting the data directory
os.chdir('/home/mike/Documents/mkp_code/Institute of Data Course/telco-customer-churn-project/data/processed')

In [10]:
customer_data = pd.read_csv('Telco-Customer-Churn-Processed.csv')

In [11]:
train_df, test_df = train_test_split(customer_data, test_size=0.20, random_state=1)
print(train_df.shape)

(5634, 21)


* Helper Functions

In [12]:
def get_keras_dataset(df):
    X = {str(col) : np.array(df[col]) for col in df.columns}
    return X

In [13]:
# Plot the results of the training
def plot_history(history):
    fig = plt.figure(figsize=(15,8))
    ax = plt.subplot(211)
    
    plt.xlabel('Epoch')
    plt.ylabel('loss, acc')
    
    # losses
    ax.plot(history.epoch, history.history['loss'], label='Train LOSS')
    ax.plot(history.epoch, history.history['val_loss'], label='Val LOSS')
    ax.plot(history.epoch, history.history['acc'], label ='Train Accuracy')
    ax.plot(history.epoch, history.history['val_acc'], label='Val Accuracy')
    plt.legend()
    
    # Plot the learning_rate
    if 'lr' in history.history:
        ax = plt.subplot(212)
        plt.ylabel('Learning rate')
        ax.plot(history.epoch, history.history['lr'], label='learning_rate')
        plt.legend()
    plt.show()

In [14]:
# Add the tenure group to the dataset:
customer_data["tenure_group"] = customer_data.apply(lambda customer_data:tenure_lab(customer_data), axis=1)

In [15]:
# Divide the numeric columns from the non-numeric 
numeric_cols = ['MonthlyCharges', 'TotalCharges', 'tenure']
target_col = ['Churn']

# Select categorical 
categorical_cols = customer_data.select_dtypes(include='object').columns
categorical_cols = [col for col in categorical_cols if col not in target_col]

In [16]:
# Use LabelEncoder instead of dummy categories
for col in categorical_cols:
    customer_data[col] = LabelEncoder().fit_transform(customer_data[col])

In [17]:
# Standardise the numeric data before fitting to the data to a model
customer_data[numeric_cols] = StandardScaler().fit_transform(customer_data[numeric_cols])

In [18]:
# Initialise the models
K.clear_session()

NameError: name 'K' is not defined

In [19]:
# Define constants
FEATURE_COLS = numeric_cols + categorical_cols
TARGET_COL = 'Churn'
EPOCHS = 50
BATCH_SIZE = 4
CLASS_WEIGHTS = {0 : 1., 1 : 2.5}

In [20]:
# model outputs
cat_inputs = []
num_inputs = []
embeddings = []
embedding_layer_names = []
emb_n = 10

In [21]:
# Embedding for categorical features
for col in categorical_cols:
    _input = layers.Input(shape=[1], name=col)
    _embed = layers.Embedding(customer_data[col].max() + 1, emb_n, name=col+'_emb')(_input)
    cat_inputs.append(_input)
    embeddings.append(_embed)
    embedding_layer_names.append(col+'_emb')
    

NameError: name 'layers' is not defined

In [22]:
# Simple inputs for the numeric features
for col in numeric_cols:
    numeric_input = layers.Input(shape=(1,), name=col)
    num_inputs.append(numeric_input)

NameError: name 'layers' is not defined

In [23]:
# Merge the numeric inputs
merged_num_inputs = layers.concatenate(num_inputs)
#numeric_dense = layers.Dense(20, activation='relu')(merged_num_inputs)


NameError: name 'layers' is not defined

In [24]:
# MLP for classification
x = layers.Dropout(0.2)(layers.Dense(100, activation='relu')(all_features))
x = layers.Dropout(0.2)(layers.Dense(50, activation='relu')(x))
x = layers.Dropout(0.2)(layers.Dense(25, activation='relu')(x))
x = layers.Dropout(0.2)(layers.Dense(15, activation='relu')(x))

NameError: name 'layers' is not defined

In [25]:
# Final model
output = layers.Dense(1, activation='sigmoid')(x)
model = models.Model(inputs=cat_inputs + num_inputs, outputs=output)

NameError: name 'layers' is not defined

In [26]:
def dice_coef(y_true, y_pred, smooth=1): intersection = K.sum(K.abs(y_true y_pred), axis=-1) return (2. intersection + smooth) / (K.sum(K.square(y_true),-1) + K.sum(K.square(y_pred),-1) + smooth)

def dice_coef_loss(y_true, y_pred): return 1-dice_coef(y_true, y_pred)

SyntaxError: invalid syntax (<ipython-input-26-6ab0a19527a5>, line 1)

In [27]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

NameError: name 'model' is not defined

In [28]:
# Export the transformed data to verify 
customer_data.to_csv('/home/mike/Documents/mkp_code/Institute of Data Course/telco-customer-churn-project/data/processed/Telco-Customer-Churn-Keras.csv')