# Import Packages

In [68]:
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier

# Loading the dataset

In [69]:
bank=pd.read_excel("C:/Python/Day-24/Dataset/Bank_Personal_Loan_Modelling.xlsx",sheet_name='Data')
bank.head()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


# Checking Null Values

In [70]:
bank.isna().sum()

ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              0
Personal Loan         0
Securities Account    0
CD Account            0
Online                0
CreditCard            0
dtype: int64

In [71]:
bank.columns

Index(['ID', 'Age', 'Experience', 'Income', 'ZIP Code', 'Family', 'CCAvg',
       'Education', 'Mortgage', 'Personal Loan', 'Securities Account',
       'CD Account', 'Online', 'CreditCard'],
      dtype='object')

#  Dropping Unnecessary data

In [72]:
bank2=bank.drop(['ID','ZIP Code'],axis=1,inplace=True)

In [73]:
bank2=bank.dropna()

In [74]:
bank3=bank2.drop_duplicates()

### Now we are going to use Tree Model so we are using Random Forest here

In [75]:
from sklearn.ensemble import RandomForestClassifier

In [76]:
bank3['CCAvg']=np.round(bank3['CCAvg'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


# Defining Attributes Random Forest

In [77]:
rf_model=RandomForestClassifier(n_estimators=100,max_features=2,oob_score=True)

In [78]:
features=[ 'Age', 'Experience', 'Income',  'Family', 'CCAvg',
       'Education', 'Mortgage', 'Securities Account',
       'CD Account', 'Online', 'CreditCard']

In [79]:
rf_model.fit(X=bank3[features],y=bank3['Personal Loan'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features=2,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=True, random_state=None,
                       verbose=0, warm_start=False)

# OOB Accuracy

In [80]:
print("OOB Accuracy :-",rf_model.oob_score_)

OOB Accuracy :- 0.9859635051132946


# Impure Variable

In [81]:
for features,imp in zip(features,rf_model.feature_importances_):
    print(features,imp)

Age 0.0495517096864813
Experience 0.04847258371385971
Income 0.3645722543744029
Family 0.10512784497247324
CCAvg 0.14227660807502862
Education 0.16953682610099471
Mortgage 0.04549708665054393
Securities Account 0.00586955576993773
CD Account 0.05065622260620472
Online 0.008810702164940199
CreditCard 0.009628605885133002


# Decision Tree

In [82]:
tree_model=tree.DecisionTreeClassifier(max_depth=6,max_leaf_nodes=10)

In [83]:
predictors=pd.DataFrame([bank3['Education'],bank3['CCAvg'],bank3['Income']]).T

In [84]:
tree_model.fit(X=predictors, y=bank3['Personal Loan'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=6, max_features=None, max_leaf_nodes=10,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

# Graphiz

In [85]:
with open('Bank Loan Modeling.dot','w') as f:
    f=tree.export_graphviz(tree_model,feature_names=['Education','CCAvg','Income'],out_file=f);


# Accuracy

In [86]:
Accuracy = tree_model.score(X=predictors, y=bank3['Personal Loan'])
Accuracy

0.970723882093443

#  This model is 97.5 % is accurate

In [87]:
from sklearn.tree import export_text
feature=['Education','CCAvg','Income']
r=export_text(tree_model,feature_names=feature)
print(r)

|--- Income <= 113.50
|   |--- CCAvg <= 3.50
|   |   |--- Income <= 101.50
|   |   |   |--- class: 0
|   |   |--- Income >  101.50
|   |   |   |--- class: 0
|   |--- CCAvg >  3.50
|   |   |--- Income <= 88.50
|   |   |   |--- class: 0
|   |   |--- Income >  88.50
|   |   |   |--- Education <= 1.50
|   |   |   |   |--- class: 0
|   |   |   |--- Education >  1.50
|   |   |   |   |--- Education <= 2.50
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- Education >  2.50
|   |   |   |   |   |--- Income <= 92.50
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- Income >  92.50
|   |   |   |   |   |   |--- class: 1
|--- Income >  113.50
|   |--- Education <= 1.50
|   |   |--- class: 0
|   |--- Education >  1.50
|   |   |--- Income <= 116.50
|   |   |   |--- class: 0
|   |   |--- Income >  116.50
|   |   |   |--- class: 1

