In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from  tensorflow.keras.optimizers.schedules import InverseTimeDecay

#   JOB-A-THON - May 2021

In [None]:
#Loading the dataset
features = pd.read_csv("../input/jobathon-may-2021-credit-card-lead-prediction/train.csv")
labels = pd.read_csv("../input/jobathon-may-2021-credit-card-lead-prediction/test.csv")
tmp = pd.read_csv("../input/jobathon-may-2021-credit-card-lead-prediction/sample_submission.csv")

data = features

data = data.drop(columns=['ID'])
data = data.dropna()


Credit Card Lead Prediction

Happy Customer Bank is a mid-sized private bank that deals in all kinds of banking products, like Savings accounts, Current accounts, investment products, credit products, among other offerings.

The bank also cross-sells products to its existing customers and to do so they use different kinds of communication like telecasting, e-mails, recommendations on net banking, mobile banking, etc.

In this case, the Happy Customer Bank wants to cross-sell its credit cards to its existing customers. The bank has identified a set of customers that are eligible for taking these credit cards.

Now, the bank is looking for your help in identifying customers that could show higher intent towards a recommended credit card, given:

This dataset was part of May 2021 Jobathon conducted my analytics vidhya, for more info check:https://datahack.analyticsvidhya.com/contest/job-a-thon-2/

### Dataset Explanation
- ID : Unique Identifier for a row
- Gender: Gender of the Customer

- Age : Age of the Customer (in Years)

- Region_Code : Code of the Region for the customers

- Occupation : Occupation Type for the customer

- Channel_Code : Acquisition Channel Code for the Customer (Encoded)

- Vintage : Vintage for the Customer (In Months), Number of Days, Customer has been associated with the company

- Credit_Product : If the Customer has any active credit product (Home loan, Personal loan, Credit Card etc.)

- Avg_Account_Balance : Average Account Balance for the Customer in last 12 Months

- Is_Active : If the Customer is Active in last 3 Months

- Is_Lead(Target) : If the Customer is interested for the Credit Card, 0 / 1: Customer is not interested / interest

###  Dataset Preperation
 I am concating both datasets to gain a more complete picture over the data

In [None]:
#Split Columns into Categorical and Numerical Columns
num_cols = data._get_numeric_data().columns
num_cols = num_cols.to_numpy()
num_cols = num_cols.tolist()
cat_cols = []
for col in data.columns:
    if col in num_cols:
        print()
    else:
        cat_cols.append(col)

##   Data Visualisation

In [None]:
data

### Barplot for Categorical Data


In [None]:
for col in cat_cols:
    x =data[col]
    ax = sns.countplot(x, hue= data["Is_Lead"],color='#fc0356')
    plt.xticks(rotation=90)
    ax.set_title(label = col, fontsize = 20)
    plt.show()

x =data["Is_Lead"]
ax = sns.countplot(x,color='#fc0356')
ax.set_title(label = "Is_Lead", fontsize = 20)
plt.show()

### Boxplot for Numerical Data

In [None]:
#Encode and Replace Categorical Data for Easier Use Later
ord_Encoder = OrdinalEncoder()
tmp = ord_Encoder.fit_transform(data[cat_cols])
data[cat_cols] = tmp

In [None]:
for col in num_cols:
    x =data[col]
    ax = sns.boxplot(x, hue=data["Is_Lead"],color='#fc0356')
    ax.set_title(label = col, fontsize = 20)
    plt.show()

### Description for all of the Data

In [None]:
data.describe()

###   Correlation Heatmap

In [None]:
#Heatmap to see Correlation between label and features
sns.heatmap(data.corr(),annot=True);


In [None]:
data.shape

In [None]:
labels = data["Is_Lead"]
features = data.drop(columns=["Is_Lead"])

num_cols.pop(len(num_cols)-1)

In [None]:
#Encode
num_Encoder = MinMaxScaler()


tmp = num_Encoder.fit_transform(features[num_cols])
features[num_cols] = tmp

In [None]:
features

In [None]:
features.shape

In [None]:
labels

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split( features, labels, test_size=0.33, shuffle=True)

### Creating the Model

In [None]:
model = keras.Sequential([
    layers.Dense(64, activation='softmax', input_shape=[9]),
    layers.BatchNormalization(),
    layers.Dropout(0.6),
    layers.Dense(128, activation='softmax'),
    layers.Dropout(0.6),
    layers.Dense(128, activation='softmax'),
    layers.Dropout(0.2),
    layers.Dense(1, activation="sigmoid")
])

###  Compiling the Model

In [None]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
)

###  Fitting the Model

In [None]:
### 3.3. Fitting the Model
history = model.fit(
    x=X_train,
    y=Y_train,
    epochs=20,
    verbose=1,
    batch_size=264,
    workers=4
)

In [None]:
history_frame = pd.DataFrame(history.history)

In [None]:

history_frame

In [None]:

history_frame['loss'].plot()


In [None]:
history_frame['binary_accuracy'].plot()

In [None]:

model.evaluate(X_test, Y_test)