In [145]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler,OneHotEncoder,OrdinalEncoder
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import seaborn as sns
import pandas as pd

In [146]:
df = pd.read_csv('loan_approval_dataset.csv')

In [147]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Age                1000 non-null   int64 
 1   Salary             1000 non-null   int64 
 2   Credit_Score       1000 non-null   int64 
 3   Loan_Amount        1000 non-null   int64 
 4   Loan_Term          1000 non-null   object
 5   Employment_Status  1000 non-null   object
 6   Residence_Type     1000 non-null   object
 7   Previous_Default   1000 non-null   object
 8   Loan_Approved      1000 non-null   object
dtypes: int64(4), object(5)
memory usage: 70.4+ KB


In [148]:
df = df.dropna()

In [149]:
df.head()

Unnamed: 0,Age,Salary,Credit_Score,Loan_Amount,Loan_Term,Employment_Status,Residence_Type,Previous_Default,Loan_Approved
0,56,136748,584,38209,36 months,Employed,Owned,Yes,Yes
1,46,25287,815,27424,24 months,Self-Employed,Rented,No,Yes
2,32,146593,398,42396,12 months,Unemployed,Rented,Yes,Yes
3,60,54387,696,11370,24 months,Unemployed,Owned,No,No
4,25,28512,788,14528,12 months,Employed,Owned,No,No


In [150]:
X = df.drop('Loan_Approved',axis=1)
y = df.Loan_Approved

In [151]:
xtrain,xtest,ytrain,ytest = train_test_split(X,y,train_size=0.8,random_state=42)

In [152]:
cat_cols = X.select_dtypes(include='object').columns

In [153]:
encode = OrdinalEncoder()

In [154]:
xtrain[cat_cols] = encode.fit_transform(xtrain[cat_cols])

In [155]:
xtrain.head()

Unnamed: 0,Age,Salary,Credit_Score,Loan_Amount,Loan_Term,Employment_Status,Residence_Type,Previous_Default
29,44,85441,507,17109,1.0,1.0,2.0,0.0
535,38,87298,489,10776,2.0,0.0,0.0,0.0
695,45,49629,762,39441,2.0,0.0,0.0,0.0
557,47,75337,809,40596,0.0,0.0,0.0,1.0
836,46,101121,504,31956,2.0,2.0,0.0,0.0


In [156]:
grid_search_cv = GridSearchCV(
    estimator  = LogisticRegression(max_iter=1000),
    param_grid = {
                  'C':[0.01,0.1,1.0,10],
                  'penalty':['l1','l2'],
                  'solver':['liblinear'],
                  'class_weight': ['balanced',None]
                  },
    cv=10,
    n_jobs=1,
    verbose = 1,
    scoring='f1_macro'
)

In [157]:
# xtrain

In [158]:
grid_search_cv.fit(xtrain,ytrain)

Fitting 10 folds for each of 16 candidates, totalling 160 fits


0,1,2
,estimator,LogisticRegre...max_iter=1000)
,param_grid,"{'C': [0.01, 0.1, ...], 'class_weight': ['balanced', None], 'penalty': ['l1', 'l2'], 'solver': ['liblinear']}"
,scoring,'f1_macro'
,n_jobs,1
,refit,True
,cv,10
,verbose,1
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,10
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'liblinear'
,max_iter,1000


In [159]:
xtest[cat_cols]= encode.transform(xtest[cat_cols])

In [160]:
grid_search_cv.score(xtrain,ytrain)

0.5462322746982304

In [161]:
grid_search_cv.predict(xtest)

array(['Yes', 'No', 'No', 'Yes', 'No', 'No', 'Yes', 'No', 'No', 'No',
       'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No',
       'No', 'No', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes',
       'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No',
       'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes',
       'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes',
       'No', 'Yes', 'No', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No',
       'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes',
       'Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'No',
       'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'No',
       'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'No',
       'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes',
       'No', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No',
       'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Y

In [162]:
grid_search_cv.best_params_

{'C': 10, 'class_weight': 'balanced', 'penalty': 'l1', 'solver': 'liblinear'}

In [163]:
grid_search_cv.best_score_

np.float64(0.5042733205077621)

In [164]:
model = grid_search_cv.best_estimator_

In [165]:
model.fit(xtrain,ytrain)

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,10
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'liblinear'
,max_iter,1000


In [166]:
model.score(xtrain,ytrain)

0.54625

In [171]:
cv_result = pd.DataFrame(grid_search_cv.cv_results_)

In [174]:
cv_result.sort_values(by='rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_class_weight,param_penalty,param_solver,params,split0_test_score,...,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
12,0.013291,0.002329,0.006351,0.001637,10.0,balanced,l1,liblinear,"{'C': 10, 'class_weight': 'balanced', 'penalty...",0.536849,...,0.52381,0.574734,0.537428,0.449656,0.503106,0.425,0.393939,0.504273,0.057656,1
14,0.008546,0.001344,0.00499,0.001291,10.0,,l1,liblinear,"{'C': 10, 'class_weight': None, 'penalty': 'l1...",0.537428,...,0.508739,0.548872,0.562432,0.437412,0.469697,0.399625,0.404969,0.497557,0.061578,2
0,0.012149,0.00577,0.009194,0.003041,0.01,balanced,l1,liblinear,"{'C': 0.01, 'class_weight': 'balanced', 'penal...",0.423559,...,0.399625,0.59596,0.499687,0.545455,0.531571,0.483546,0.45561,0.497151,0.057443,3
10,0.0124,0.003033,0.006706,0.001504,1.0,,l1,liblinear,"{'C': 1.0, 'class_weight': None, 'penalty': 'l...",0.512424,...,0.48549,0.561884,0.549719,0.461743,0.506251,0.398496,0.390476,0.4961,0.059901,4
1,0.008464,0.00211,0.005912,0.001354,0.01,balanced,l2,liblinear,"{'C': 0.01, 'class_weight': 'balanced', 'penal...",0.475,...,0.399625,0.584317,0.473684,0.560784,0.559125,0.499687,0.374609,0.49511,0.06534,5
7,0.007422,0.001598,0.006407,0.001536,0.1,,l2,liblinear,"{'C': 0.1, 'class_weight': None, 'penalty': 'l...",0.448622,...,0.436708,0.637443,0.510588,0.587436,0.539642,0.410196,0.371464,0.493802,0.076861,6
8,0.012069,0.001888,0.00639,0.00178,1.0,balanced,l1,liblinear,"{'C': 1.0, 'class_weight': 'balanced', 'penalt...",0.486778,...,0.473684,0.574734,0.548872,0.45,0.528587,0.386637,0.385098,0.492153,0.064888,7
11,0.008558,0.00205,0.007152,0.00174,1.0,,l2,liblinear,"{'C': 1.0, 'class_weight': None, 'penalty': 'l...",0.460392,...,0.474672,0.59975,0.47203,0.574734,0.539642,0.44127,0.357581,0.491599,0.065678,8
2,0.005254,0.000798,0.004824,0.001041,0.01,,l1,liblinear,"{'C': 0.01, 'class_weight': None, 'penalty': '...",0.446889,...,0.398496,0.587436,0.449656,0.536849,0.548872,0.477624,0.458353,0.486434,0.053194,9
3,0.008132,0.002028,0.006032,0.000799,0.01,,l2,liblinear,"{'C': 0.01, 'class_weight': None, 'penalty': '...",0.48742,...,0.412408,0.59975,0.460392,0.561884,0.492063,0.44127,0.371464,0.483441,0.063711,10
