### Importing the packages

In [1]:
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn import preprocessing

### Loading the dataset

In [2]:
data = pd.read_excel('Bank_Personal_Loan_Modelling.xlsx',sheet_name=1)
data.head()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


### Checking for null values

In [3]:
data.isna().sum()

ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              0
Personal Loan         0
Securities Account    0
CD Account            0
Online                0
CreditCard            0
dtype: int64

In [6]:
data.columns

Index(['ID', 'Age', 'Experience', 'Income', 'ZIP Code', 'Family', 'CCAvg',
       'Education', 'Mortgage', 'Personal Loan', 'Securities Account',
       'CD Account', 'Online', 'CreditCard'],
      dtype='object')

### Random Forest

In [4]:
from sklearn.ensemble import RandomForestClassifier

In [7]:
features = ['Age', 'Experience', 'Income','Family', 'CCAvg',
       'Education', 'Mortgage','Securities Account',
       'CD Account', 'Online', 'CreditCard']

In [8]:
rf_model=RandomForestClassifier(n_estimators=1000,max_features=2,oob_score=True)
rf_model.fit(X=data[features],y=data['Personal Loan'])
rf_model.oob_score_

0.987

In [9]:
for features,imp in zip(features,rf_model.feature_importances_):
    print(features,imp)

Age 0.04358172290818363
Experience 0.04335993575446825
Income 0.35349530503828713
Family 0.09671840917680596
CCAvg 0.17617179443122807
Education 0.16774130720813824
Mortgage 0.043270145275605604
Securities Account 0.005419010270199526
CD Account 0.05235887864918948
Online 0.008140631148736739
CreditCard 0.009742860139157468


#### Inference : This shows that the important features are Income,CCAvg and Education

### Decision Tree

In [14]:
tree_model = tree.DecisionTreeClassifier(max_depth=6)
predictors = pd.DataFrame([data['Income'],data['CCAvg'],data['Education']]).T
tree_model.fit(X=predictors,y=data['Personal Loan'])

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=6,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [15]:
tree_model.score(X=predictors,y=data['Personal Loan'])

0.9738

In [16]:
with open('Dtree3.dot','w') as f:
    f = tree.export_graphviz(tree_model,feature_names=['Income','CCAvg','Education'],out_file=f)