<a href="https://colab.research.google.com/github/sahug/tensorflow-colab/blob/master/Tensorflow_2_0_Bank_Customer_Satisfaction_Using_CNN_and_Feature_Selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Bank Customer Satisfaction Using CNN and Feature Selection**

In [None]:
!pip install tensorflow-gpu==2.0.0

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv1D, MaxPool1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

In [None]:
print(tf.__version__)

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold

In [None]:
# Dataset Link - https://github.com/laxmimerit/Data-Files-for-Feature-Selection
!git clone https://github.com/laxmimerit/Data-Files-for-Feature-Selection.git

In [None]:
# Read Data
data = pd.read_csv("/content/Data-Files-for-Feature-Selection/santander-train.csv")
data.head()

# target is 0 - Unsattisfied and 1 Satisfied in below data.

In [None]:
data.shape

In [None]:
x = data.drop(labels=["ID", "TARGET"], axis=1) # Dropping unnecessary data

In [None]:
x.shape

In [None]:
y = data["TARGET"] # This is waht we are predicting

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0, stratify=y)

In [None]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

#### **Remove Constant, Quassi Constant and Duplicate Features**

In [None]:
filter = VarianceThreshold(0.01) # Removing 1%
x_train = filter.fit_transform(x_train)
x_test = filter.transform(x_test)

In [None]:
x_train.shape, x_test.shape

In [None]:
# Removing Duplicates.
# Transpossing Rows and Columns
x_train_t = x_train.T
x_test_t = x_test.T

In [None]:
x_train_t = pd.DataFrame(x_train_t)
x_test_t = pd.DataFrame(x_test_t)

In [None]:
# Here we can see we have transformed rows into columns and columns into rows
x_train_t.shape, x_test_t.shape 

In [None]:
x_train_t.duplicated().sum() # Finiding No of Duplicated Features that needs to be removed.

In [None]:
duplicated_features = x_train_t.duplicated()
duplicated_features

#True gives duplicated features and False non duplicated features

In [None]:
# Now we only want to keep non duplicated features.
# Inverting True to False and False to True
features_to_keep = [not index for index in duplicated_features]
features_to_keep

In [None]:
# Filtering and Transpossing back to original form
x_train = x_train_t[features_to_keep].T
x_train.shape

In [None]:
x_test = x_test_t[features_to_keep].T
x_test.shape

#### Standardizing data. Bringing variance to common level.

In [None]:
x_train, x_test # Here we can see lot of zeroes and hig differences in values.

In [None]:
# Bringing down the differences.
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
x_train, x_test # Now we can see the differences are close enough

In [None]:
x_train.shape, x_test.shape

In [None]:
# Reshapping the 2D data to 3D as neural networks understand 3D
x_train = x_train.reshape(60816, 256, 1)
x_test = x_test.reshape(15204, 256, 1)

In [None]:
x_train.shape, x_test.shape

In [None]:
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

## Build CNN

In [None]:
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation="relu", input_shape=(256, 1)))
model.add(BatchNormalization())
model.add(MaxPool1D(2))
model.add(Dropout(0.3))

model.add(Conv1D(filters=64, kernel_size=3, activation="relu"))
model.add(BatchNormalization())
model.add(MaxPool1D(2))
model.add(Dropout(0.5))

model.add(Conv1D(filters=128, kernel_size=3, activation="relu"))
model.add(BatchNormalization())
model.add(MaxPool1D(2))
model.add(Dropout(0.3))

model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.5))

model.add(Dense(1, activation="sigmoid"))

In [None]:
model.summary()

In [None]:
model.compile(optimizer=Adam(lr=0.00005), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), verbose=1)

In [None]:
history.history

In [None]:
epoch_range = range(1, 11)

In [None]:
# Plot training and validation accuracy values
# sparse_categorical_accuracy vs val_sparse_categorical_accuracy
plt.plot(epoch_range, history.history["accuracy"])
plt.plot(epoch_range, history.history["val_accuracy"])
plt.title("Model Accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Val"], loc="upper left")
plt.show()

In [None]:
# Plot training and validation loss values
# Loss vs Val_Loss
plt.plot(epoch_range, history.history["loss"])
plt.plot(epoch_range, history.history["val_loss"])
plt.title("Model Loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Train", "Val"], loc="upper left")
plt.show()