# Decision Tree
## Bank_Personal_Loan_Modelling

---

In [1]:
import numpy as np
import pandas as pd 
from sklearn import tree
from sklearn import preprocessing

In [2]:
dataset = pd.read_excel('Bank_Personal_Loan_Modelling.xlsx',sheet_name=1)
dataset.head()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


### Checking Null values

In [3]:
dataset.isna().sum().sum()

0

### Drop Unnecessary records

In [4]:
dataset.drop(['ID','ZIP Code'],axis=1,inplace=True)

In [5]:
dataset.head()

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,25,1,49,4,1.6,1,0,0,1,0,0,0
1,45,19,34,3,1.5,1,0,0,1,0,0,0
2,39,15,11,1,1.0,1,0,0,0,0,0,0
3,35,9,100,1,2.7,2,0,0,0,0,0,0
4,35,8,45,4,1.0,2,0,0,0,0,0,1


In [6]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Age                 5000 non-null   int64  
 1   Experience          5000 non-null   int64  
 2   Income              5000 non-null   int64  
 3   Family              5000 non-null   int64  
 4   CCAvg               5000 non-null   float64
 5   Education           5000 non-null   int64  
 6   Mortgage            5000 non-null   int64  
 7   Personal Loan       5000 non-null   int64  
 8   Securities Account  5000 non-null   int64  
 9   CD Account          5000 non-null   int64  
 10  Online              5000 non-null   int64  
 11  CreditCard          5000 non-null   int64  
dtypes: float64(1), int64(11)
memory usage: 468.9 KB


---

## Random Forest 

In [7]:
from sklearn.ensemble import RandomForestClassifier

In [8]:
dataset.columns

Index(['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage', 'Personal Loan', 'Securities Account', 'CD Account',
       'Online', 'CreditCard'],
      dtype='object')

### - define random forest

In [9]:
rf_model = RandomForestClassifier(n_estimators=1000, max_features=2, oob_score=True)

In [10]:
features = ['Age','Experience','Income','Family','CCAvg','Education','Mortgage','Securities Account','CD Account','Online','CreditCard']

In [11]:
rf_model.fit( X =dataset[features], y=dataset['Personal Loan'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features=2,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=True, random_state=None,
                       verbose=0, warm_start=False)

### - Accuracy

In [12]:
print('Accuracy of 10 IDV and 1 DV ')
print('OOB Accuracy :',rf_model.oob_score_)

Accuracy of 10 IDV and 1 DV 
OOB Accuracy : 0.9876


### - Imp varaible

In [13]:
for features,imp in zip (features,rf_model.feature_importances_) :
    print(features,imp)

Age 0.04473006251736327
Experience 0.045071524370042124
Income 0.3481239506631919
Family 0.09453342516083349
CCAvg 0.18421884765054197
Education 0.15981548681106658
Mortgage 0.04407004931112012
Securities Account 0.005402728488668064
CD Account 0.05558866699881775
Online 0.008363735540908619
CreditCard 0.010081522487446015


---

# Decision Tree 1

In [14]:
predictors = pd.DataFrame([dataset['Income'],dataset['Education'],dataset['CCAvg']]).T

In [15]:
tree_model = tree.DecisionTreeClassifier(max_depth=6)

In [16]:
tree_model.fit(X=predictors, y=dataset['Personal Loan'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=6, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

### - Graphviz

In [18]:
with open('Dtree1.dot','w') as f :
    f = tree.export_graphviz(tree_model, feature_names=['Income','Education','CCAvg'], out_file=f);

### - Accuracy

In [20]:
Accuracy = tree_model.score(X=predictors, y=dataset['Personal Loan'])

In [21]:
print('Accuracy of 3 IDV and 1 DV ')
print('Accuracy :',Accuracy)

Accuracy of 3 IDV and 1 DV 
Accuracy : 0.9738


In [22]:
print('Accuracy of 3 IDV and 1 DV ')
print('Accuracy :',Accuracy*100,'%')

Accuracy of 3 IDV and 1 DV 
Accuracy : 97.38 %


---

# Decision Tree 2

In [23]:
tree_model2 = tree.DecisionTreeClassifier()

In [24]:
predictors2 = pd.DataFrame([dataset['Income'],dataset['Family'],dataset['Education'],dataset['CCAvg']]).T

In [25]:
tree_model2 = tree.DecisionTreeClassifier(max_depth=8)

In [26]:
tree_model2.fit(X=predictors2, y=dataset['Personal Loan'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=8, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

### - Graphviz

In [27]:
with open('Dtree2.dot','w') as f :
    f2 = tree.export_graphviz(tree_model2, feature_names=['Income','Family','Education','CCAvg'], out_file=f);

### - Accuracy

In [28]:
Accuracy2 = tree_model2.score(X=predictors2, y=dataset['Personal Loan'])

In [29]:
print('Accuracy of 4 IDV and 1 DV ')
print('Accuracy :',Accuracy2)

Accuracy of 4 IDV and 1 DV 
Accuracy : 0.9956


In [30]:
print('Accuracy of 4 IDV and 1 DV ')
print('Accuracy :',Accuracy2*100,'%')

Accuracy of 4 IDV and 1 DV 
Accuracy : 99.56 %
