In [8]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, roc_auc_score, confusion_matrix,
    precision_score, recall_score
)
import mlflow
import mlflow.sklearn
import numpy as np
from sklearn.metrics import classification_report
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import GridSearchCV

In [2]:
df1 = pd.read_csv("C:/ML_AI/loan/loan_acceptance_using_multiple-algorithm/data/ordinal_encode/train_processed_data.csv")

df2= pd.read_csv("C:/ML_AI/loan/loan_acceptance_using_multiple-algorithm/data/ordinal_encode/test_processed_data.csv")

In [3]:
xtrain=df1.drop(labels=["loan_status"],axis=1)
ytrain=df1[["loan_status"]]

In [4]:
xtest=df2.drop(labels=["loan_status"],axis=1)
ytest=df2[["loan_status"]]

In [5]:
rus = RandomUnderSampler(sampling_strategy=1.0, random_state=42)  
X_res, y_res = rus.fit_resample(xtrain, ytrain)

In [6]:

log_reg = LogisticRegression(random_state=42)


param_grid = {
    'penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
    'solver': ['liblinear', 'saga', 'lbfgs'],
    'max_iter': [100, 200, 500],
    'l1_ratio': [0, 0.5, 1]  
}


grid_search = GridSearchCV(
    estimator=log_reg,
    param_grid=param_grid,
    scoring='f1',       
    cv=5,               
    n_jobs=-1,          
    verbose=2
)

# Fit the grid search
grid_search.fit(X_res, y_res)

print("Best Parameters:",grid_search.best_params_)
print("Best Score:",grid_search.best_score_)

Fitting 5 folds for each of 756 candidates, totalling 3780 fits
Best Parameters: {'C': 0.01, 'l1_ratio': 0, 'max_iter': 100, 'penalty': 'l1', 'solver': 'liblinear'}
Best Score: 0.8092573663831327


1890 fits failed out of a total of 3780.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
315 fits failed with the following error:
Traceback (most recent call last):
  File "c:\ETLhive\python\venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\ETLhive\python\venv\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\ETLhive\python\venv\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1218, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [None]:
Best_Parameters={'C': 0.01, 'l1_ratio': 0, 'max_iter': 100, 'penalty': 'l1', 'solver': 'liblinear'}
mlflow.set_tracking_uri("file:///C:/ML_AI/loan/loan_acceptance_using_multiple-algorithm/mlruns")
mlflow.set_experiment("loan_acceptance_logreg")

with mlflow.start_run(run_name=f"Ordinal_encoding_logistic_regression"):


    mlflow.log_param("C", Best_Parameters["C"])
    mlflow.log_param("penalty", Best_Parameters["penalty"])
    mlflow.log_param("solver", Best_Parameters["solver"])
    mlflow.log_param("max_iter", Best_Parameters["max_iter"])
    mlflow.log_param("random_state", 42)

    model = LogisticRegression(
        C=Best_Parameters["C"],
        penalty=Best_Parameters["penalty"],
        solver=Best_Parameters["solver"],
        max_iter=Best_Parameters["max_iter"],
        random_state=42
    )
    model.fit(xtrain, ytrain)

    y_pred_train = model.predict(xtrain)
    y_prob_train = model.predict_proba(xtrain)[:, 1]

    acc_train = accuracy_score(ytrain, y_pred_train)
    roc_train = roc_auc_score(ytrain, y_prob_train)
    prec_train = precision_score(ytrain, y_pred_train)
    rec_train = recall_score(ytrain, y_pred_train)

    y_pred_test = model.predict(xtest)
    y_prob_test = model.predict_proba(xtest)[:, 1]

    acc_test = accuracy_score(ytest, y_pred_test)
    roc_test = roc_auc_score(ytest, y_prob_test)
    prec_test = precision_score(ytest, y_pred_test)
    rec_test = recall_score(ytest, y_pred_test)

    mlflow.log_metric("train_accuracy", acc_train)
    mlflow.log_metric("train_roc_auc", roc_train)
    mlflow.log_metric("train_precision", prec_train)
    mlflow.log_metric("train_recall", rec_train)

    mlflow.log_metric("test_accuracy", acc_test)
    mlflow.log_metric("test_roc_auc", roc_test)
    mlflow.log_metric("test_precision", prec_test)
    mlflow.log_metric("test_recall", rec_test)

    import os
    os.makedirs("metrics", exist_ok=True)

    cm_train = confusion_matrix(ytrain, y_pred_train)
    cm_test = confusion_matrix(ytest, y_pred_test)

    np.save("metrics/confusion_matrix_train.npy", cm_train)
    np.save("metrics/confusion_matrix_test.npy", cm_test)

    mlflow.log_artifact("metrics/confusion_matrix_train.npy", artifact_path="metrics")
    mlflow.log_artifact("metrics/confusion_matrix_test.npy", artifact_path="metrics")

    mlflow.sklearn.log_model(model, artifact_path="model")

    print(f"Run logged: C={Best_Parameters['C']}, penalty={Best_Parameters['penalty']}, solver={Best_Parameters['solver']}")
    print(f"Train: Acc={acc_train:.3f}, ROC={roc_train:.3f}, Prec={prec_train:.3f}, Rec={rec_train:.3f}")
    print(f"Test : Acc={acc_test:.3f}, ROC={roc_test:.3f}, Prec={prec_test:.3f}, Rec={rec_test:.3f}")


  y = column_or_1d(y, warn=True)


Run logged: C=0.01, penalty=l1, solver=liblinear
Train: Acc=0.894, ROC=0.881, Prec=0.740, Rec=0.397
Test : Acc=0.898, ROC=0.880, Prec=0.751, Rec=0.408
