### What You're Aiming For

This ➡️ dataset contains banking marketing campaign data and we can use it to optimize marketing campaigns to attract more customers to term deposit subscription.

What is a Term Deposit?

A Term deposit is a deposit that a bank or a financial institution offers with a fixed rate (often better than just opening deposit account) in which your money will be returned back at a specific maturity time.


### Instructions

In this assignment you will have to take the following steps:

    - Import your dataset and perform initial analysis and visualization
    - Clean the data
    - Build a  deep learning network to predict the marketing campaign outcome ("deposit" either Yes or No)
    - You have to submit a jupyter notebook file. 

- Link : Get started with Google Colab 

Please make sure it's a public shareable link.

In [None]:
import numpy as np
import pandas as pd

#import standard visualization
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

In [21]:
df = pd.read_csv("bank (1).csv")

In [22]:
df

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,-1,0,unknown,yes
1,56,admin.,married,secondary,no,45,no,no,unknown,5,may,1467,1,-1,0,unknown,yes
2,41,technician,married,secondary,no,1270,yes,no,unknown,5,may,1389,1,-1,0,unknown,yes
3,55,services,married,secondary,no,2476,yes,no,unknown,5,may,579,1,-1,0,unknown,yes
4,54,admin.,married,tertiary,no,184,no,no,unknown,5,may,673,2,-1,0,unknown,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11157,33,blue-collar,single,primary,no,1,yes,no,cellular,20,apr,257,1,-1,0,unknown,no
11158,39,services,married,secondary,no,733,no,no,unknown,16,jun,83,4,-1,0,unknown,no
11159,32,technician,single,secondary,no,29,no,no,cellular,19,aug,156,2,-1,0,unknown,no
11160,43,technician,married,secondary,no,0,no,yes,cellular,8,may,9,2,172,5,failure,no


In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 28 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   age                  11162 non-null  float64
 1   balance              11162 non-null  float64
 2   duration             11162 non-null  float64
 3   campaign             11162 non-null  float64
 4   deposit              11162 non-null  float64
 5   job_admin.           11162 non-null  float64
 6   job_blue-collar      11162 non-null  float64
 7   job_entrepreneur     11162 non-null  float64
 8   job_housemaid        11162 non-null  float64
 9   job_management       11162 non-null  float64
 10  job_retired          11162 non-null  float64
 11  job_self-employed    11162 non-null  float64
 12  job_services         11162 non-null  float64
 13  job_student          11162 non-null  float64
 14  job_technician       11162 non-null  float64
 15  job_unemployed       11162 non-null 

In [15]:
df.describe(include = "object").T

Unnamed: 0,count,unique,top,freq
job,11162,12,management,2566
marital,11162,3,married,6351
education,11162,4,secondary,5476
housing,11162,2,no,5881
loan,11162,2,no,9702
deposit,11162,2,no,5873


In [None]:
df.describe().T

In [23]:
df = df.drop("default", axis = 1)
df = df.drop("contact", axis = 1)
df = df.drop("month", axis = 1)
df = df.drop("poutcome", axis = 1)
df = df.drop("pdays", axis = 1)
df = df.drop("day", axis = 1)
df = df.drop("previous", axis = 1)

In [16]:
df = df.drop_duplicates().reset_index(drop=True)

In [24]:
df["deposit"] = label_encoder.fit_transform(df["deposit"])

In [25]:
df = pd.get_dummies(df, drop_first = False)

In [26]:
df

Unnamed: 0,age,balance,duration,campaign,deposit,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,...,marital_married,marital_single,education_primary,education_secondary,education_tertiary,education_unknown,housing_no,housing_yes,loan_no,loan_yes
0,59,2343,1042,1,1,True,False,False,False,False,...,True,False,False,True,False,False,False,True,True,False
1,56,45,1467,1,1,True,False,False,False,False,...,True,False,False,True,False,False,True,False,True,False
2,41,1270,1389,1,1,False,False,False,False,False,...,True,False,False,True,False,False,False,True,True,False
3,55,2476,579,1,1,False,False,False,False,False,...,True,False,False,True,False,False,False,True,True,False
4,54,184,673,2,1,True,False,False,False,False,...,True,False,False,False,True,False,True,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11157,33,1,257,1,0,False,True,False,False,False,...,False,True,True,False,False,False,False,True,True,False
11158,39,733,83,4,0,False,False,False,False,False,...,True,False,False,True,False,False,True,False,True,False
11159,32,29,156,2,0,False,False,False,False,False,...,False,True,False,True,False,False,True,False,True,False
11160,43,0,9,2,0,False,False,False,False,False,...,True,False,False,True,False,False,True,False,False,True


In [40]:
bool_cols = df.select_dtypes(include='bool').columns  # Get names of all boolean columns
df[bool_cols] = df[bool_cols].astype(int)

In [47]:
# Get names of all integer columns
int_cols = df.select_dtypes(include='int').columns  

# Convert all integer columns to float
df[int_cols] = df[int_cols].astype(float)  


In [61]:
import tensorflow as tf  # TensorFlow library for building neural networks
from tensorflow.keras.models import Sequential  # Sequential model for building a feedforward neural network
from tensorflow.keras.layers import Dense  # Dense layer for fully connected layers
from sklearn.datasets import fetch_california_housing  # California Housing dataset
from sklearn.model_selection import train_test_split  # Function to split data into training and test sets
from sklearn.preprocessing import StandardScaler  

In [50]:
# Convert the entire DataFrame to float32
# Convert all columns except the target column to a NumPy array
X = df.drop(columns=['deposit']).astype(np.float32).to_numpy()  # Exclude 'deposit' column
y = df['deposit'].astype(int).to_numpy()  # Target variable

In [62]:
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (normalize features) so they have a mean of 0 and variance of 1.
# This is important for better convergence in neural networks.
scaler = StandardScaler()  # Create an instance of the StandardScaler
X_train = scaler.fit_transform(X_train)  # Fit on training data and apply the transformation
X_test = scaler.transform(X_test)  # Apply the same transformation to test data (without fitting again)

In [31]:
from keras.models import Sequential  # Sequential model to define a feedforward neural network
from keras.layers import Dense

In [29]:
len(X.columns)

27

In [55]:
from keras.models import Sequential
from keras.layers import Dense, Input

# Create a Sequential model
model = Sequential()

# Use Input layer to define input shape
model.add(Input(shape=(27,)))  # Define input shape explicitly

# Add Dense layers
model.add(Dense(64, activation='relu'))  # First hidden layer
model.add(Dense(32, activation='relu'))  # Second hidden layer
model.add(Dense(1, activation='sigmoid'))  # For binary classification

In [56]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [63]:
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

Epoch 1/100
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5543 - loss: 27.1078 - val_accuracy: 0.6131 - val_loss: 2.3333
Epoch 2/100
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5876 - loss: 2.4369 - val_accuracy: 0.6495 - val_loss: 1.3436
Epoch 3/100
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6260 - loss: 1.2784 - val_accuracy: 0.6517 - val_loss: 0.9037
Epoch 4/100
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6429 - loss: 0.9462 - val_accuracy: 0.6428 - val_loss: 0.8080
Epoch 5/100
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6523 - loss: 0.7895 - val_accuracy: 0.6753 - val_loss: 0.7214
Epoch 6/100
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6620 - loss: 0.7198 - val_accuracy: 0.6825 - val_loss: 0.6832
Epoch 7/100
[1m224/2

<keras.src.callbacks.history.History at 0x1c2cf998e90>

In [58]:
predictions = model.predict(X)
print(predictions)

[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[[8.9470476e-01]
 [9.1576976e-01]
 [9.3911213e-01]
 ...
 [3.9752894e-09]
 [0.0000000e+00]
 [6.6831553e-01]]


In [59]:
print(y)

[1 1 1 ... 0 0 0]


In [64]:
mse = model.evaluate(X_test, y_test)
print(f'Mean Squared Error on Test Set: {mse}')

[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7593 - loss: 0.5544
Mean Squared Error on Test Set: [0.5332458019256592, 0.7675772309303284]
