In [5]:
# -------------------------------
# model_build.py
# -------------------------------

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pickle
import warnings
warnings.filterwarnings("ignore")

# Load dataset
df = pd.read_csv("loan_approval_dataset.csv")
df.columns = df.columns.str.strip().str.lower()

target = "loan_status"
X = df.drop(target, axis=1)
y = df[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Identify categorical + numerical
cat_cols = X_train.select_dtypes(include='object').columns.tolist()
num_cols = X_train.select_dtypes(exclude='object').columns.tolist()

label_encoders = {}

# Encode categorical
for col in cat_cols:
    le = LabelEncoder()
    X_train[col] = X_train[col].fillna(X_train[col].mode()[0])
    X_train[col] = le.fit_transform(X_train[col])
    label_encoders[col] = le

# Scale numerical
scaler = MinMaxScaler()
X_train[num_cols] = X_train[num_cols].fillna(X_train[num_cols].median())
X_train[num_cols] = scaler.fit_transform(X_train[num_cols])

# ⭐ SAVE FEATURE ORDER (VERY IMPORTANT)
feature_order = list(X_train.columns)
pickle.dump(feature_order, open("feature_order.pkl", "wb"))

# Train Model
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)

# Process test data
for col in cat_cols:
    X_test[col] = X_test[col].fillna(X_train[col].mode()[0])
    X_test[col] = label_encoders[col].transform(X_test[col])

X_test[num_cols] = X_test[num_cols].fillna(X_train[num_cols].median())
X_test[num_cols] = scaler.transform(X_test[num_cols])

# Predictions
acc = accuracy_score(y_test, model.predict(X_test))
print(f"Model Accuracy: {acc*100:.2f}%")




Model Accuracy: 88.52%


In [6]:
# Save artifacts
pickle.dump(model, open("model.pkl", "wb"))
pickle.dump(scaler, open("scaler.pkl", "wb"))
pickle.dump(label_encoders, open("label_encoders.pkl", "wb"))
pickle.dump(cat_cols, open("cat_cols.pkl", "wb"))
pickle.dump(num_cols, open("num_cols.pkl", "wb"))

print("Training Completed — All Files Saved!")

Training Completed — All Files Saved!
