In [40]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder


In [42]:
try:
    data = pd.read_csv('loan prediction.csv')
except FileNotFoundError:
    print("File not found. Please make sure the path is correct.")
    pass

In [44]:
for column in data.select_dtypes(include=['number']).columns:
    try:
        data[column] = pd.to_numeric(data[column])
    except ValueError:
        data[column].fillna(data[column].mode()[0], inplace=True)
numeric_cols = data.select_dtypes(include=['number']).columns
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].mean())
label_encoders = {}
categorical_cols = data.select_dtypes(include=['object']).columns

In [56]:
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le
X = data.drop(['Loan_Status', 'Loan_ID'], axis=1)
y = data['Loan_Status']


In [58]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

In [60]:
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

Accuracy: 0.7804878048780488
Precision: 0.7623762376237624
Recall: 0.9625


In [62]:
f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1}")

F1 Score: 0.850828729281768


In [64]:
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Confusion Matrix:
[[19 24]
 [ 3 77]]
