In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv("training_set.csv")
df['Loan_ID'] = df['Loan_ID'].str.replace("LP", "", regex=True).astype(int)
# Create one-hot encoded columns for property_Area while keeping the original column
df_encoded = pd.get_dummies(df['property_Area'], prefix='property_Area')
df = pd.concat([df, df_encoded], axis=1)
df.rename(columns={'property_Area_Urban': 'Urban', 'property_Area_Rural': 'Rural', 'property_Area_Semiurban': 'Semiurban'}, inplace=True)

df_encoded_gender = pd.get_dummies(df['Gender'], prefix='Gender')
df = pd.concat([df, df_encoded_gender], axis=1)
df.rename(columns={'Gender_Male': 'Male', 'Gender_Female': 'Female'}, inplace=True)
categorical_columns = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
                       'property_Area', 'Loan_Status','Rural','Urban','Semiurban','Female','Male']

# Convert categorical values to numeric using label encoding
for col in categorical_columns:
    df[col] = df[col].astype('category').cat.codes  # Converts categories to numbers
df.replace(-1, np.nan, inplace=True)
df


for col in categorical_columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

num_cols = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']
for col in num_cols:
    df[col].fillna(df[col].median(), inplace=True)

df['Credit_History'].fillna(df['Credit_History'].mode()[0], inplace=True)

df['Total_Income'] = df['ApplicantIncome'] + df['CoapplicantIncome']
df['Loan_Income_Ratio'] = df['LoanAmount'] / df['Total_Income']
df['EMI'] = df['LoanAmount'] / df['Loan_Amount_Term']
df['Balance_Income'] = df['Total_Income'] - df['EMI']



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting valu

In [2]:
from sklearn.model_selection import train_test_split
df_train,df_val=train_test_split(df,train_size=0.75,random_state=42)
df_val

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,...,Loan_Status,Rural,Semiurban,Urban,Female,Male,Total_Income,Loan_Income_Ratio,EMI,Balance_Income
350,2139,1.0,1.0,0.0,0.0,0.0,9083.0,0.0,228.0,360.0,...,1,0,1,0,0,1,9083.0,0.025102,0.633333,9082.366667
377,2223,1.0,1.0,0.0,0.0,0.0,4310.0,0.0,130.0,360.0,...,1,0,1,0,0,1,4310.0,0.030162,0.361111,4309.638889
163,1570,1.0,1.0,2.0,0.0,0.0,4167.0,1447.0,158.0,360.0,...,1,1,0,0,0,1,5614.0,0.028144,0.438889,5613.561111
609,2978,0.0,0.0,0.0,0.0,0.0,2900.0,0.0,71.0,360.0,...,1,1,0,0,1,0,2900.0,0.024483,0.197222,2899.802778
132,1478,1.0,0.0,0.0,0.0,0.0,2718.0,0.0,70.0,360.0,...,1,0,1,0,0,1,2718.0,0.025754,0.194444,2717.805556
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535,2732,1.0,0.0,0.0,1.0,0.0,2550.0,2042.0,126.0,360.0,...,1,1,0,0,0,1,4592.0,0.027439,0.350000,4591.650000
468,2502,0.0,1.0,2.0,1.0,0.0,210.0,2917.0,98.0,360.0,...,1,0,1,0,1,0,3127.0,0.031340,0.272222,3126.727778
582,2894,0.0,1.0,0.0,0.0,0.0,3166.0,0.0,36.0,360.0,...,1,0,1,0,1,0,3166.0,0.011371,0.100000,3165.900000
291,1938,1.0,1.0,2.0,0.0,0.0,4400.0,0.0,127.0,360.0,...,0,0,1,0,0,1,4400.0,0.028864,0.352778,4399.647222


In [3]:
y_train=df_train["Loan_Status"]
x_train=df_train.drop(columns="Loan_Status")
y_val=df_val["Loan_Status"]
x_val=df_val.drop(columns="Loan_Status")

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
poly_svm = SVC(kernel='poly', degree=3, C=1.0, gamma='scale')
poly_svm.fit(x_train, y_train)
y_pred_val = poly_svm.predict(x_val)

In [6]:
cm = confusion_matrix(y_val, y_pred_val)
acc = accuracy_score(y_val, y_pred_val)
precision = precision_score(y_val, y_pred_val)
recall = recall_score(y_val, y_pred_val)
f1 = f1_score(y_val, y_pred_val)

print("Confusion Matrix:")
print(cm)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

Confusion Matrix:
[[  1  53]
 [  0 100]]
Accuracy: 0.6558, Precision: 0.6536, Recall: 1.0000, F1-score: 0.7905

