# Task for Today  

***

## Marketing Effectiveness Prediction  

Given *data about subjects' responses to a bank's marketing campaign*, let's try to predict whether a given subject will **place a deposit** or not.

We will use a TensorFlow ANN to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import classification_report, confusion_matrix

In [None]:
tf.random.set_seed(100)

In [None]:
data = pd.read_csv('../input/bank-marketing-campaigns-dataset/bank-additional-full.csv', delimiter=';')

In [None]:
data

In [None]:
data.info()

# Encoding Labels

In [None]:
data['y'] = data['y'].apply(lambda y: 1 if y == 'yes' else 0)

# Encoding Categorical Features

In [None]:
data.select_dtypes('object')

In [None]:
{column: len(data[column].unique()) for column in data.select_dtypes('object').columns}

In [None]:
{column: list(data[column].unique()) for column in data.select_dtypes('object').columns}

In [None]:
data = data.replace('unknown', np.NaN)

In [None]:
data.isna().sum()

In [None]:
def onehot_encode(df, columns, prefixes):
    df = df.copy()
    for column, prefix in zip(columns, prefixes):
        dummies = pd.get_dummies(df[column], prefix=prefix)
        df = pd.concat([df, dummies], axis=1)
        df = df.drop(column, axis=1)
    return df
        
def ordinal_encode(df, columns, orderings):
    df = df.copy()
    for column, ordering in zip(columns, orderings):
        df[column] = df[column].apply(lambda x: ordering.index(x))
    return df

def binary_encode(df, columns, positive_values):
    df = df.copy()
    for column, positive_value in zip(columns, positive_values):
        df[column] = df[column].apply(lambda x: 1 if x == positive_value else x)
        df[column] = df[column].apply(lambda x: 0 if str(x) != 'nan' else x)
    return df

In [None]:
nominal_features = [
    'job',
    'marital',
    'education',
    'day_of_week',
    'poutcome'
]

ordinal_features = [
    'month'
]

binary_features = [
    'default',
    'housing',
    'loan',
    'contact' 
]

In [None]:
prefixes = ['J', 'M', 'E', 'D', 'P']

orderings = [
    ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
]

positive_values = [
    'yes',
    'yes',
    'yes',
    'cellular'
]

In [None]:
data = onehot_encode(
    data,
    columns=nominal_features,
    prefixes=prefixes
)

data = ordinal_encode(
    data,
    columns=ordinal_features,
    orderings=orderings
)

data = binary_encode(
    data,
    columns=binary_features,
    positive_values=positive_values
)

In [None]:
data

# Filling Missing Values

In [None]:
for column in ['default', 'housing', 'loan']:
    data[column] = data[column].fillna(data[column].mean())

In [None]:
print("Remaining missing values:", data.isna().sum().sum())

In [None]:
print("Remaining non-numeric columns:", len(data.select_dtypes('object').columns))

# Splitting/Scaling

In [None]:
data

In [None]:
y = data['y'].copy()
X = data.drop('y', axis=1).copy()

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=100)

# Modeling/Training

In [None]:
print("Positive examples: {}".format(y.sum()))
print("Negative examples: {}".format(len(y) - y.sum()))

print("\nClass Distribution: {:.1f}% / {:.1f}%".format(y.mean() * 100, (1 - y.mean()) * 100))

In [None]:
inputs = tf.keras.Input(shape=(X.shape[1],))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs, outputs)


model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.AUC(name='auc')
    ]
)

batch_size = 32
epochs = 100

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )
    ]
)

# Results

In [None]:
model.evaluate(X_test, y_test)

In [None]:
y_true = np.array(y_test)
y_pred = np.squeeze(np.array(model.predict(X_test) >= 0.5, dtype=np.int))

In [None]:
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))

In [None]:
print("Classification Report:\n\n", classification_report(y_true, y_pred))

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/w1exIx0GDeQ