# Bank Marketing - Predicting Term Deposit Suscriptions

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
cd /kaggle/working

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Database pre-processing

In [None]:
base = pd.read_csv('/kaggle/input/bank-marketing-dataset/bank.csv')
print(base.shape)

# Checking for duplicate data in the database

In [None]:
base.duplicated().sum()

# Checking for null data in the database

In [None]:
base.isnull().sum().sum()

# Database overview

In [None]:
base.head(10)

# DataFrame Info

In [None]:
base.info()

# Viewing Data Description

In [None]:
base.describe()

# Analyze with graphics using Matplotlib + Seaborn

In [None]:
cols= ['#1A5B6D','#D8C99B','#D8973C','#BD632F','#273E47']
sns.set(style="darkgrid")

In [None]:
sns.palplot(cols)

In [None]:
fig = plt.figure(figsize=(12,8))
plt.suptitle("Comparision of Education with deposit", family='Serif', size=17, ha='center', weight='bold')
plt.figtext(0.5,0.93,"comparing the education impact on deposit", family='Serif', size=12, ha='center')
gs = GridSpec(nrows=1, ncols=2, width_ratios=[5,2])
ax1=plt.subplot(gs[0,0])
ax1=plt.pie(base[base['deposit']=='yes']['education'].value_counts(),
            labels=base[base['deposit']=='yes']['education'].unique(), autopct='%2d', colors=cols)
ax2=plt.subplot(gs[0,1])
ax2=plt.pie(base[base['deposit']=='no']['education'].value_counts(),
            labels=base[base['deposit']=='no']['education'].unique(), autopct='%2d', colors=cols)

In [None]:
plt.figure(figsize=(12,9))
sns.boxplot(x="marital",y="balance", data=base[base['deposit']=='yes'], palette="Blues")
plt.show()

# Attribute

In [None]:
df_marital=base[["marital","previous"]]
df_marital.describe()

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(x="marital",data=base, palette="Blues")
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(x="campaign",data=base, palette="dark")
plt.show()

# Pearson correlations, which measures the strength of a linear relationship
In this case, we are checking the correlation between the attributes

In [None]:
plt.figure(figsize=(12, 6))
sns.heatmap(base.corr(method='pearson'), cmap="Greys", annot=True)

# Creating machine learning model with sklearn

We must discard the duration attribute and convert categorical variables into numeric ones.


In [None]:
base

# Separating the prediction attributes and the database class

In [None]:
#  prediction attributes
previsores = base.iloc[:, 0:16].values

# Class
classe = base.iloc[:, 16].values

In [None]:
classe

# Converting the Class to Numeric Attributes

In [None]:
for i, v in enumerate(classe):
    if v == "yes":
        classe[i] = 1
    else:
         classe[i] = 0

In [None]:
print(previsores[1])

In [None]:
type(previsores)

In [None]:
type(classe)

In [None]:
print(classe[0: 100])

# Applying OneHotEncoder to Categorical Attributes of Forecasters

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [None]:
onehotencoder = ColumnTransformer(transformers=[("OneHot", OneHotEncoder(), [1, 2, 3, 4, 6, 7, 8, 10, 15])],remainder='passthrough')
previsores = onehotencoder.fit_transform(previsores)
previsores[1].dtype

In [None]:
type(previsores)

# Applying the Normalization of Predictive Attributes

In [None]:
normalizar = preprocessing.normalize
previsores = normalizar(previsores)

In [None]:
previsores.shape

In [None]:
print(previsores[1])

# Creating the Training and Testing databases

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(previsores,classe,test_size=0.25, random_state=2)

In [None]:
type(X_train)

In [None]:
X_train[1]

In [None]:
Y_train[2: 10]

In [None]:
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)

Y_train = np.asarray(Y_train).astype(np.int).reshape((-1, 1))
Y_test = np.asarray(Y_test).astype(np.int).reshape((-1, 1))

In [None]:
print(type(X_train), type(Y_train))
print(type(X_test), type(Y_test))

In [None]:
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)

# Creating a Machine Learning Model with Tensorflow and Keras

In [None]:
# Define Sequential model with 3 layers
model = keras.Sequential()
model.add(layers.Dense(units = 250, activation="relu", input_dim=51))
model.add(layers.Dropout(0.3))

model.add(layers.Dense(units = 150, activation="relu"))
model.add(layers.Dropout(0.3))

model.add(layers.Dense(units = 150, activation="relu"))
model.add(layers.Dropout(0.2))

model.add(layers.Dense(units = 50, activation="relu"))
model.add(layers.Dropout(0.2))

model.add(layers.Dense(units = 50, activation="relu"))
model.add(layers.Dropout(0.2))

model.add(layers.Dense(units = 1, activation="sigmoid"))

model.summary()


In [None]:
model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

In [None]:
model.fit(X_train, Y_train, batch_size = 300, epochs=300, validation_data=(X_test, Y_test), verbose = 2)

In [None]:
previsoes = model.predict(X_test)

In [None]:
previsoes

In [None]:
for i in range(len(previsoes)):
    if previsoes[i] > 0.75:
        previsoes[i] = 1
    else:
        previsoes[i] = 0

In [None]:
previsoes

In [None]:
Y_test

In [None]:
confusion_matrix = confusion_matrix(Y_test, previsoes)
confusion_matrix

# Plot Confusion Matrix of Machine Learning Model

In [None]:
import seaborn as sn
import matplotlib.pyplot as plt

In [None]:
sn.set(font_scale=1.4) # for label size
plt.figure(figsize=(10,7))
sn.heatmap(confusion_matrix, annot=True, annot_kws={"size": 14}) # font size

plt.show()

In [None]:
results = model.evaluate(X_test, Y_test, batch_size=128)

# Accuracy: 0.8348

In [None]:
results

# Creating Model with DecisionTreeClassifier() Algorithm

In [None]:

# Criar objeto classificador de árvore de decisão
clf = DecisionTreeClassifier()

# Classificador de árvore de decisão Trainamento
clf = clf.fit(X_train,Y_train)

#Preveja a resposta para o conjunto de dados de teste
y_pred = clf.predict(X_test)

# Accuracy: 0.7628

In [None]:
print("Accuracy:",metrics.accuracy_score(Y_test, y_pred))

# Creating Model with RandomForestClassifier() Algorithm

In [None]:
# Criando o objeto classificador Random Forest
rfc = RandomForestClassifier()

# Classificador Random Forest Trainamento do modelo
rfc = clf.fit(X_train,Y_train)

# Preveja a resposta para o conjunto de dados de teste
y_pred = rfc.predict(X_test)

# Accuracy: 0.7671

In [None]:
print("Accuracy:",metrics.accuracy_score(Y_test, y_pred))

# Predicting an Example
## Actual Answer: "yes"

In [None]:
# Example for testing algorithms
exemplo =np.array([20,"student","single","secondary","no",502,"no","no","cellular",30,"apr","261",1,-1,0,"unknown"]).reshape(1, -1) 

In [None]:
# Applying HotEncoder to Categorical Variables
exemplo = onehotencoder.transform(exemplo)
exemplo = normalizar(exemplo)

# Testing with DecisionTreeClassifier()

In [None]:
rfc.predict(exemplo)

# Testing with RandomForestClassifier() template

In [None]:
clf.predict(exemplo)

The second model is also right

# Testing with the Machine Learning Model

In [None]:
ex_model = model.predict(exemplo)

In [None]:
ex_model