In [45]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [48]:
df = pd.read_csv("LoanApproval.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [52]:
df = df.drop("Loan_ID", axis=1)

In [53]:
df = df.dropna(how="any")

In [54]:
def label_encoder(column):
    le = LabelEncoder().fit(column)
    print(column.name, le.classes_)
    return le.transform(column)

In [55]:
columns = ["Gender", "Married", "Dependents", "Education", "Self_Employed", "Property_Area"]

In [56]:
for column in columns:
    df[column] = label_encoder(df[column])

Gender ['Female' 'Male']
Married ['No' 'Yes']
Dependents ['0' '1' '2' '3+']
Education ['Graduate' 'Not Graduate']
Self_Employed ['No' 'Yes']
Property_Area ['Rural' 'Semiurban' 'Urban']


In [None]:
df.columns

In [None]:
df.head()

In [59]:
X = df.drop("Loan_Status", axis=1)
y = df["Loan_Status"]

In [60]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=4242)

In [61]:
X_train.shape, X_test.shape

((336, 11), (144, 11))

In [62]:
y_train.shape, y_test.shape

((336,), (144,))

In [63]:
models = {
    "Logistic Regression": LogisticRegression(),
    "SVC": SVC(),
    "KNN": KNeighborsClassifier(),
    "Random Forest": RandomForestClassifier()
}

In [64]:
for i, val in models.items():
    print("*****" + i + "*****")
    models[i].fit(X_train, y_train)
    y_pred = models[i].predict(X_test)
    print("Accuracy: ", accuracy_score(y_test, y_pred))
    print()

*****Logistic Regression*****
Accuracy:  0.8263888888888888

*****SVC*****
Accuracy:  0.7152777777777778

*****KNN*****
Accuracy:  0.6388888888888888

*****Random Forest*****
Accuracy:  0.8125



In [65]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

In [66]:
pickle.dump(rf_model, open("rf.pkl", "wb"))