In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')
%matplotlib inline

In [None]:
from matplotlib import rcParams
rcParams['patch.force_edgecolor']=True
rcParams['patch.facecolor']='b'

In [None]:
df = pd.read_excel('./Bank_Personal_Loan_Modelling.xlsx', 'Data')

# 1.Overview

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.nunique()

In [None]:
df.drop_duplicates(inplace=True)

In [None]:
df.shape

In [None]:
df.set_index('ID', inplace=True)

In [None]:
cate_var = [col for col in df.columns if df[col].nunique()<=5]
cate_var.remove('Personal Loan')
cont_var = [col for col in df.columns if df[col].nunique()>5]
print('Categorical variables:', cate_var)
print('Continuous variables:', cont_var)

# 2.Plotting

## 2.1.Continuous and Categorical variables

In [None]:
fig_1 = plt.figure(figsize=(25,9))
for i, col in enumerate(cont_var):
    ax = fig_1.add_subplot(2,3,i+1)
    sns.distplot(df[col], color='y')

In [None]:
fig_2 = plt.figure(figsize=(25,9))
for i, col in enumerate(cate_var):
    ax = fig_2.add_subplot(2,3,i+1)
    sns.countplot(df[col], palette='RdBu_r')

## 2.2.Personal Loan

In [None]:
fig_3 = plt.figure(figsize=(25,9))
for i, col in enumerate(cont_var):
    ax = fig_3.add_subplot(2,3,i+1)
    sns.boxplot(x=df['Personal Loan'], y=df[col], palette='RdBu_r')

In [None]:
fig_4 = plt.figure(figsize=(25,9))
for i, col in enumerate(cont_var):
    ax = fig_4.add_subplot(2,3,i+1)
    ax1 = sns.distplot(df[col][df['Personal Loan']==0], hist=False, label='No Personal Loan', color='r')
    sns.distplot(df[col][df['Personal Loan']==1], hist=False, ax=ax1, label='Is Personal Loan', color='b')

In [None]:
fig_5 = plt.figure(figsize=(25,9))
for i, col in enumerate(cate_var):
    ax = fig_5.add_subplot(2,3,i+1)
    sns.barplot(x=col, y='Personal Loan', data=df, ci=None, palette='RdBu_r')

In [None]:
fig_6 = plt.figure(figsize=(25,9))
for i, col in enumerate(cate_var):
    ax = fig_6.add_subplot(2,3,i+1)
    sns.countplot(x=col, hue='Personal Loan', data=df, palette='RdBu_r')

## 2.3.Income

In [None]:
cont_var_temp = cont_var.copy()
cont_var_temp.remove('Income')

In [None]:
fig_7 = plt.figure(figsize=(25,9))
for i, col in enumerate(cont_var_temp):
    ax = fig_7.add_subplot(2,3,i+1)
    sns.scatterplot('Income', y=col, hue='Personal Loan', data=df, palette='RdBu_r')

In [None]:
fig_8 = plt.figure(figsize=(25,9))
for i, col in enumerate(cate_var):
    ax = fig_8.add_subplot(2,3,i+1)
    sns.scatterplot('Income', y=col, hue='Personal Loan', data=df, palette='RdBu_r')

## 2.4.CCAvg

In [None]:
cont_var_temp.remove('CCAvg')

In [None]:
fig_9 = plt.figure(figsize=(25,9))
for i, col in enumerate(cont_var_temp):
    ax = fig_9.add_subplot(2,2,i+1)
    sns.scatterplot('CCAvg', col, hue='Personal Loan', data=df, palette='RdBu_r')

In [None]:
fig_10 = plt.figure(figsize=(25,9))
for i, col in enumerate(cate_var):
    ax = fig_10.add_subplot(2,3,i+1)
    sns.scatterplot('CCAvg', col, hue='Personal Loan', data=df, palette='RdBu_r')