In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

data = pd.read_csv("loan_data.csv")

data.head()

data.drop('Loan_ID', axis=1, inplace=True)

label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])
data['Married'] = label_encoder.fit_transform(data['Married'])
data['Education'] = label_encoder.fit_transform(data['Education'])
data['Self_Employed'] = label_encoder.fit_transform(data['Self_Employed'])
data['Credit_History'] = label_encoder.fit_transform(data['Credit_History'])
data['Property_Area'] = label_encoder.fit_transform(data['Property_Area'])
data['Loan_Status'] = label_encoder.fit_transform(data['Loan_Status'])

# Replace '3+' with '4'
data['Dependents'].replace('3+', '4', inplace=True)
data['Dependents'] = pd.to_numeric(data['Dependents'])

imputer = SimpleImputer(strategy='mean')
imputed_data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

X = imputed_data.drop('Loan_Status', axis=1)
y = imputed_data['Loan_Status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

classifier = DecisionTreeClassifier()

classifier.fit(X_train, y_train)
print(X_test)

y_pred = classifier.predict(X_test)

print(y_pred)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)

     Gender  Married  Dependents  Education  Self_Employed  ApplicantIncome  \
266     1.0      1.0         0.0        0.0            0.0           5829.0   
192     1.0      1.0         1.0        0.0            0.0           1625.0   
46      1.0      1.0         4.0        0.0            0.0           3029.0   
55      1.0      1.0         1.0        0.0            1.0           1000.0   
57      0.0      0.0         0.0        0.0            0.0           3846.0   
..      ...      ...         ...        ...            ...              ...   
165     1.0      1.0         2.0        0.0            0.0           4708.0   
63      1.0      1.0         2.0        1.0            0.0           3273.0   
249     1.0      1.0         2.0        0.0            0.0           3276.0   
280     0.0      1.0         0.0        1.0            1.0           7142.0   
357     0.0      0.0         0.0        0.0            0.0           2378.0   

     CoapplicantIncome  LoanAmount  Loan_Amount_Ter

In [2]:
from joblib import dump
dump(classifier,'./../saved_models/decision.joblib')

['./../saved_models/decision.joblib']