# Logistic Regression loan data Classification

In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import roc_curve
import warnings
warnings.filterwarnings("ignore")

## Problem statement

## Data Gathering

In [56]:
df=pd.read_csv("loan_data.csv")
df.head()

Unnamed: 0,credit_policy,purpose,int_rate,installment,log_annual_inc,dti,fico,days_with_cr_line,revol_bal,revol_util,inq_last_6mths,delinq_2yrs,pub_rec,not_fully_paid
0,1,debt_consolidation,0.1189,829.1,11.350407,19.48,737,5639.958333,28854,52.1,0,0,0,0
1,1,credit_card,0.1071,228.22,11.082143,14.29,707,2760.0,33623,76.7,0,0,0,0
2,1,debt_consolidation,0.1357,366.86,10.373491,11.63,682,4710.0,3511,25.6,1,0,0,0
3,1,debt_consolidation,0.1008,162.34,11.350407,8.1,712,2699.958333,33667,73.2,1,0,0,0
4,1,credit_card,0.1426,102.92,11.299732,14.97,667,4066.0,4740,39.5,0,1,0,0


## EDA

In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9578 entries, 0 to 9577
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   credit_policy      9578 non-null   int64  
 1   purpose            9578 non-null   object 
 2   int_rate           9578 non-null   float64
 3   installment        9578 non-null   float64
 4   log_annual_inc     9578 non-null   float64
 5   dti                9578 non-null   float64
 6   fico               9578 non-null   int64  
 7   days_with_cr_line  9578 non-null   float64
 8   revol_bal          9578 non-null   int64  
 9   revol_util         9578 non-null   float64
 10  inq_last_6mths     9578 non-null   int64  
 11  delinq_2yrs        9578 non-null   int64  
 12  pub_rec            9578 non-null   int64  
 13  not_fully_paid     9578 non-null   int64  
dtypes: float64(6), int64(7), object(1)
memory usage: 1.0+ MB


In [58]:
df.describe()

Unnamed: 0,credit_policy,int_rate,installment,log_annual_inc,dti,fico,days_with_cr_line,revol_bal,revol_util,inq_last_6mths,delinq_2yrs,pub_rec,not_fully_paid
count,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0,9578.0
mean,0.80497,0.12264,319.089413,10.932117,12.606679,710.846314,4560.767197,16913.96,46.799236,1.577469,0.163708,0.062122,0.160054
std,0.396245,0.026847,207.071301,0.614813,6.88397,37.970537,2496.930377,33756.19,29.014417,2.200245,0.546215,0.262126,0.366676
min,0.0,0.06,15.67,7.547502,0.0,612.0,178.958333,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,0.1039,163.77,10.558414,7.2125,682.0,2820.0,3187.0,22.6,0.0,0.0,0.0,0.0
50%,1.0,0.1221,268.95,10.928884,12.665,707.0,4139.958333,8596.0,46.3,1.0,0.0,0.0,0.0
75%,1.0,0.1407,432.7625,11.291293,17.95,737.0,5730.0,18249.5,70.9,2.0,0.0,0.0,0.0
max,1.0,0.2164,940.14,14.528354,29.96,827.0,17639.95833,1207359.0,119.0,33.0,13.0,5.0,1.0


In [59]:
df["purpose"].unique()

array(['debt_consolidation', 'credit_card', 'all_other',
       'home_improvement', 'small_business', 'major_purchase',
       'educational'], dtype=object)

In [78]:
df['purpose'].replace({"all_other":0,"small_business":1,"educational":2,"major_purchase":3,"debt_consolidation":4,
                      "home_improvement":5,"credit_card":6},inplace=True)

In [79]:
purpose={"all_other":0,"small_business":1,"educational":2,"major_purchase":3,"debt_consolidation":4,
                      "home_improvement":5,"credit_card":6}
purpose

{'all_other': 0,
 'small_business': 1,
 'educational': 2,
 'major_purchase': 3,
 'debt_consolidation': 4,
 'home_improvement': 5,
 'credit_card': 6}

## Model Training

In [62]:
x=df.drop("not_fully_paid",axis=1)
y=df["not_fully_paid"]

In [63]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=2,stratify=y)


In [64]:
## Create instance
model=LogisticRegression()

## Fit the Model
model.fit(x_train,y_train)



## Model Evaluation

In [65]:
## Testing data
y_pred = model.predict(x_test)
y_pred

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [66]:
## For Testing
y_pred = model.predict(x_test)

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n",cnf_matrix)

print("-"*65)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy Score:",accuracy)
print("-"*65)

clf_report = classification_report(y_test, y_pred)
print("Classification report:\n",clf_report)

Confusion Matrix:
 [[2414    0]
 [ 460    0]]
-----------------------------------------------------------------
Accuracy Score: 0.8399443284620738
-----------------------------------------------------------------
Classification report:
               precision    recall  f1-score   support

           0       0.84      1.00      0.91      2414
           1       0.00      0.00      0.00       460

    accuracy                           0.84      2874
   macro avg       0.42      0.50      0.46      2874
weighted avg       0.71      0.84      0.77      2874



In [67]:
## For Training
y_pred_train = model.predict(x_train)

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix:\n",cnf_matrix)

print("-"*65)
accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy Score:",accuracy)
print("-"*65)

clf_report = classification_report(y_train, y_pred_train)
print("Classification report:\n",clf_report)

Confusion Matrix:
 [[5630    1]
 [1072    1]]
-----------------------------------------------------------------
Accuracy Score: 0.8399463007159904
-----------------------------------------------------------------
Classification report:
               precision    recall  f1-score   support

           0       0.84      1.00      0.91      5631
           1       0.50      0.00      0.00      1073

    accuracy                           0.84      6704
   macro avg       0.67      0.50      0.46      6704
weighted avg       0.79      0.84      0.77      6704



## Test single row

In [68]:
column=x.columns
column

Index(['credit_policy', 'purpose', 'int_rate', 'installment', 'log_annual_inc',
       'dti', 'fico', 'days_with_cr_line', 'revol_bal', 'revol_util',
       'inq_last_6mths', 'delinq_2yrs', 'pub_rec'],
      dtype='object')

In [69]:
x_test.head(1).T

Unnamed: 0,298
credit_policy,1.0
purpose,0.0
int_rate,0.0933
installment,87.88
log_annual_inc,11.626254
dti,13.46
fico,702.0
days_with_cr_line,6120.0
revol_bal,27786.0
revol_util,85.5


In [70]:
credit_policy = 1.000000
purpose = 0.000000
int_rate = 0.093300
installment = 87.880000
log_annual_inc = 11.626254
dti = 13.460000
fico = 702.000000
days_with_cr_line= 6120.000000
revol_bal = 27786.000000
revol_util = 85.500000
inq_last_6mths = 2.000000
delinq_2yrs = 0.000000
pub_rec = 1.000000

In [71]:
array=np.zeros(len(column),dtype=int)
array

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [72]:
array[0] = credit_policy
array[1] = purpose
array[2] = int_rate
array[3] = installment
array[4] = log_annual_inc
array[5] = dti
array[6] = fico
array[7] = days_with_cr_line
array[8] = revol_bal
array[9] = revol_util
array[10] = inq_last_6mths
array[11] = delinq_2yrs
array[12] = pub_rec

In [73]:
array

array([    1,     0,     0,    87,    11,    13,   702,  6120, 27786,
          85,     2,     0,     1])

In [75]:
prediction=model.predict([array])[0]
prediction
if prediction == 0:
    print("Sorry , We are unble to proceed with your Request.. Please try again next time")
else:
    print("Congratulations !!!, You are eligible to get Loan from our Organisation.")

Sorry , We are unble to proceed with your Request.. Please try again next time


In [80]:

project_data={"purpose":purpose,"column":list(column)}
project_data

{'purpose': {'all_other': 0,
  'small_business': 1,
  'educational': 2,
  'major_purchase': 3,
  'debt_consolidation': 4,
  'home_improvement': 5,
  'credit_card': 6},
 'column': ['credit_policy',
  'purpose',
  'int_rate',
  'installment',
  'log_annual_inc',
  'dti',
  'fico',
  'days_with_cr_line',
  'revol_bal',
  'revol_util',
  'inq_last_6mths',
  'delinq_2yrs',
  'pub_rec']}

In [81]:
import json
with open("Loan_data.json","w") as f:
    json.dump(project_data,f)

In [82]:
import pickle
with open("Loan_data.pkl","wb") as f:
    pickle.dump(model,f)

In [84]:
import json
with open("Loan_data.json","r") as f:
    dicti=json.load(f)

In [85]:
dicti

{'purpose': {'all_other': 0,
  'small_business': 1,
  'educational': 2,
  'major_purchase': 3,
  'debt_consolidation': 4,
  'home_improvement': 5,
  'credit_card': 6},
 'column': ['credit_policy',
  'purpose',
  'int_rate',
  'installment',
  'log_annual_inc',
  'dti',
  'fico',
  'days_with_cr_line',
  'revol_bal',
  'revol_util',
  'inq_last_6mths',
  'delinq_2yrs',
  'pub_rec']}

In [86]:
list1=list(dicti['purpose'].keys())

In [87]:
for i in list1:
    print(f'<option value = "{i}">{i}</option>')

<option value = "all_other">all_other</option>
<option value = "small_business">small_business</option>
<option value = "educational">educational</option>
<option value = "major_purchase">major_purchase</option>
<option value = "debt_consolidation">debt_consolidation</option>
<option value = "home_improvement">home_improvement</option>
<option value = "credit_card">credit_card</option>
