In [None]:
# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import sklearn.metrics as metric

## Reading data

In [None]:
#reading Bank_Personal_Loan_Modelling.csv using pandas
data = pd.read_csv("Bank_Personal_Loan_Modelling.csv")

## Data Insight

In [None]:
#Shape of data
print('Number of instances = {}\nNumber of features = {}'.format(data.shape[0],data.shape[1]))

In [None]:
#Sneak Peak into the data
data.head()

In [None]:
#Printing datatypes
print('Data DataType')
data.info()

In [None]:
#Statistical Info of data
print('Data Description')
data.describe()

In [None]:
#plotting data for better visualisation
data.hist(bins=50,figsize=(20,15))
plt.show()

In [None]:
#Unique values of each column
col_names = list(data.columns)
for features in col_names:
    print("Unique Elements in Column : {}".format(features))
    print(data[features].value_counts())
    print('\n\n')

In [None]:
#Selecting data having mortgage = 0 and printing it's length
zero_mortage = data[data['Mortgage']== 0]
print('People with zero Mortgage : {}'.format(len(zero_mortage)))

In [None]:
#Selecting people having zero credit card spending  and printing it's lenght
zero_credit = data[data['CCAvg']== 0]
print('People with zero credit card spending per month : {}'.format(len(zero_credit)))

In [None]:
#Selecting Categorial Columns
cat_col = ['CD Account','CreditCard','Education','Family','Online','Personal Loan','Securities Account']

# Printing value counts of each categorical columns
for features in cat_col:
    print("Unique Elements in Column : {}".format(features))
    print(data[features].value_counts())
    print('\n\n')

## Getting data model ready

In [None]:
#selecting label i.e. target value
label= data['Personal Loan']

#### Normalizing the data i.e z = (x- min(x))/(max(x)-min(x)) where x is a dataframe

In [None]:
data['Age'] = (data['Age']-min(data['Age']))/(max(data['Age'])-min(data['Age']))
data['CCAvg'] = (data['CCAvg']-min(data['CCAvg']))/(max(data['CCAvg'])-min(data['CCAvg']))
data['Experience'] = (data['Experience']-min(data['Experience']))/(max(data['Experience'])-min(data['Experience']))
data['Income'] = (data['Income']-min(data['Income']))/(max(data['Income'])-min(data['Income']))
data['Mortgage'] = (data['Mortgage']-min(data['Mortgage']))/(max(data['Mortgage'])-min(data['Mortgage']))

In [None]:
#Dropping Irrelevant Data and label column
data = data.drop(['ID','ZIP Code','Personal Loan'],axis = 1)

## Splitting Data into train and test

In [None]:
#Splitting the data in ration 70:30 (train:test)
X_train,X_test,Y_train,Y_test = train_test_split(data,label,stratify=label,train_size= 0.70,test_size= 0.30)

In [None]:
#Printing the shape of Data
X_train.shape,X_test.shape

## Logistic Regression

In [None]:
#Making instance of LogisticRegression
log_reg = LogisticRegression()

In [None]:
#Fitting data to calssifier
log_reg.fit(X_train,Y_train)

In [None]:
#Predicting values using pre trained values
Y_pred = log_reg.predict(X_test)

## Metric Score

In [None]:
#Printing accuracy
print('ACCURACY : {}\n'.format(metric.accuracy_score(Y_test,Y_pred)))
#Print Confuison matrix
print('Confusion Matrix :\n {}\n'.format(metric.confusion_matrix(Y_test,Y_pred)))

print('Classification Report :\n {}\n'.format(metric.classification_report(Y_test,Y_pred)))