Dataset link - https://www.kaggle.com/datasets/ninzaami/loan-predication/data


In [63]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [64]:
data = pd.read_csv('/content/train.csv')

In [65]:
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [66]:
data.shape

(614, 13)

In [67]:
data.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,614.0,614.0,592.0,600.0,564.0
mean,5403.459283,1621.245798,146.412162,342.0,0.842199
std,6109.041673,2926.248369,85.587325,65.12041,0.364878
min,150.0,0.0,9.0,12.0,0.0
25%,2877.5,0.0,100.0,360.0,1.0
50%,3812.5,1188.5,128.0,360.0,1.0
75%,5795.0,2297.25,168.0,360.0,1.0
max,81000.0,41667.0,700.0,480.0,1.0


In [68]:
data.isnull().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [69]:
# dropping missing values
data = data.dropna()

In [70]:
data.isnull().sum()

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [71]:
data.shape

(480, 13)

In [72]:
data['Loan_Status'].value_counts()

Y    332
N    148
Name: Loan_Status, dtype: int64

In [73]:
data['Dependents'].value_counts()

0     274
2      85
1      80
3+     41
Name: Dependents, dtype: int64

In [74]:
data['Gender'].value_counts()

Male      394
Female     86
Name: Gender, dtype: int64

In [75]:
data['Married'].value_counts()

Yes    311
No     169
Name: Married, dtype: int64

In [76]:
data['Education'].value_counts()

Graduate        383
Not Graduate     97
Name: Education, dtype: int64

In [77]:
data['Self_Employed'].value_counts()

No     414
Yes     66
Name: Self_Employed, dtype: int64

In [78]:
data['Property_Area'].value_counts()

Semiurban    191
Urban        150
Rural        139
Name: Property_Area, dtype: int64

In [79]:
# Label encoding
data.replace({'Loan_Status':{'N':0,'Y':1},'Dependents':{'3+':4},'Gender':{'Male':1,'Female':0}, 'Married':{'Yes':1,'No':0}, 'Education':{'Graduate':1,'Not Graduate':0},
              'Self_Employed':{'Yes':1,'No':0},'Property_Area':{'Rural':0,'Semiurban':1,'Urban':2}}, inplace=True)

In [80]:
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,1,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0,0
2,LP001005,1,1,0,1,1,3000,0.0,66.0,360.0,1.0,2,1
3,LP001006,1,1,0,0,0,2583,2358.0,120.0,360.0,1.0,2,1
4,LP001008,1,0,0,1,0,6000,0.0,141.0,360.0,1.0,2,1
5,LP001011,1,1,2,1,1,5417,4196.0,267.0,360.0,1.0,2,1


In [81]:
X = data.drop(columns = ['Loan_ID', 'Loan_Status'],axis = 1)
Y = data['Loan_Status']

In [82]:
print(X[:5])
print(Y[:5])

   Gender  Married Dependents  Education  Self_Employed  ApplicantIncome  \
1       1        1          1          1              0             4583   
2       1        1          0          1              1             3000   
3       1        1          0          0              0             2583   
4       1        0          0          1              0             6000   
5       1        1          2          1              1             5417   

   CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  \
1             1508.0       128.0             360.0             1.0   
2                0.0        66.0             360.0             1.0   
3             2358.0       120.0             360.0             1.0   
4                0.0       141.0             360.0             1.0   
5             4196.0       267.0             360.0             1.0   

   Property_Area  
1              0  
2              2  
3              2  
4              2  
5              2  
1    0
2

In [86]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify=Y, test_size = 0.1, random_state=2)

In [87]:
print(X.shape,X_train.shape,X_test.shape)

(480, 11) (432, 11) (48, 11)


In [90]:
model = svm.SVC(kernel='linear')

In [91]:
model.fit(X_train, Y_train)

In [92]:
def accuracy_precision_recall_f1(true_labels,pred_labels):
  accuracy_value = accuracy_score(true_labels,pred_labels)
  precision_value = precision_score(true_labels,pred_labels)
  recall_value = recall_score(true_labels,pred_labels)
  f1_value = f1_score(true_labels,pred_labels)
  print('Accuracy score =',round(accuracy_value*100,2),'%')
  print('Precision score =',round(precision_value*100,2),'%')
  print('Recall score =',round(recall_value*100,2),'%')
  print('F1 score =',round(f1_value*100,2),'%')

In [93]:
X_train_pred = model.predict(X_train)
X_test_pred = model.predict(X_test)
print('For training Data:')
accuracy_precision_recall_f1(Y_train, X_train_pred)
print('---------------------------------------')
print('For testing Data:')
accuracy_precision_recall_f1(Y_test, X_test_pred)
print('---------------------------------------')

For training Data:
Accuracy score = 79.86 %
Precision score = 78.04 %
Recall score = 98.66 %
F1 score = 87.15 %
---------------------------------------
For testing Data:
Accuracy score = 83.33 %
Precision score = 83.78 %
Recall score = 93.94 %
F1 score = 88.57 %
---------------------------------------
