**ML Model**

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import accuracy_score, classification_report, mean_absolute_error

merged_df = pd.read_csv("merged.csv")
# -------------------------------
# Prepare Features and Targets
# -------------------------------
# Assume merged_df is your final DataFrame from previous steps.
# Drop irrelevant columns and target columns from features.
# Targets: "approved" (classification), "credit_limit" and "interest_rate" (regression)
features = merged_df.drop(columns=["applicant_id", "approved", "credit_limit", "interest_rate"])

# Convert categorical features to dummy variables
features = pd.get_dummies(features, drop_first=True)

# Define target variables
y_class = merged_df["approved"]  # Binary target
y_reg = merged_df[["credit_limit", "interest_rate"]]  # Regression targets

# -------------------------------
# Train-Test Split (70/30)
# -------------------------------
X_train, X_test, y_class_train, y_class_test, y_reg_train, y_reg_test = train_test_split(
    features, y_class, y_reg, test_size=0.3, random_state=42
)


In [4]:

# =============================================================================
# Variation 1: Logistic Regression (for approval) and Linear Regression (for credit limit & interest rate)
# =============================================================================
# Classification: Logistic Regression
clf_logistic = LogisticRegression(max_iter=10000, random_state=42)
clf_logistic.fit(X_train, y_class_train)
y_class_pred_logistic = clf_logistic.predict(X_test)

# Regression: Linear Regression for multiple outputs
reg_linear = MultiOutputRegressor(LinearRegression())
reg_linear.fit(X_train, y_reg_train)
y_reg_pred_linear = reg_linear.predict(X_test)

print("Variation 1: Logistic Regression + Linear Regression")
print("Approval Classification Report:")
print(classification_report(y_class_test, y_class_pred_logistic))
print("Approval Accuracy:", accuracy_score(y_class_test, y_class_pred_logistic))
print("\nRegression Performance (MAE):")
print("Credit Limit MAE:", mean_absolute_error(y_reg_test["credit_limit"], y_reg_pred_linear[:, 0]))
print("Interest Rate MAE:", mean_absolute_error(y_reg_test["interest_rate"], y_reg_pred_linear[:, 1]))


Variation 1: Logistic Regression + Linear Regression
Approval Classification Report:
              precision    recall  f1-score   support

         0.0       0.84      0.61      0.71       584
         1.0       0.78      0.92      0.85       890

    accuracy                           0.80      1474
   macro avg       0.81      0.77      0.78      1474
weighted avg       0.81      0.80      0.79      1474

Approval Accuracy: 0.8005427408412483

Regression Performance (MAE):
Credit Limit MAE: 4144.834896632041
Interest Rate MAE: 0.6231534959206302


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [5]:
# =============================================================================
# Variation 2: Decision Tree
# =============================================================================
# Classification: Decision Tree Classifier
clf_tree = DecisionTreeClassifier(random_state=42)
clf_tree.fit(X_train, y_class_train)
y_class_pred_tree = clf_tree.predict(X_test)

# Regression: Decision Tree Regressor (multi-output)
reg_tree = MultiOutputRegressor(DecisionTreeRegressor(random_state=42))
reg_tree.fit(X_train, y_reg_train)
y_reg_pred_tree = reg_tree.predict(X_test)

print("\nVariation 2: Decision Tree")
print("Approval Classification Report:")
print(classification_report(y_class_test, y_class_pred_tree))
print("Approval Accuracy:", accuracy_score(y_class_test, y_class_pred_tree))
print("\nRegression Performance (MAE):")
print("Credit Limit MAE:", mean_absolute_error(y_reg_test["credit_limit"], y_reg_pred_tree[:, 0]))
print("Interest Rate MAE:", mean_absolute_error(y_reg_test["interest_rate"], y_reg_pred_tree[:, 1]))



Variation 2: Decision Tree
Approval Classification Report:
              precision    recall  f1-score   support

         0.0       0.95      0.93      0.94       584
         1.0       0.95      0.97      0.96       890

    accuracy                           0.95      1474
   macro avg       0.95      0.95      0.95      1474
weighted avg       0.95      0.95      0.95      1474

Approval Accuracy: 0.9525101763907734

Regression Performance (MAE):
Credit Limit MAE: 1079.9056229513662
Interest Rate MAE: 0.19402985074626866


In [6]:
# =============================================================================
# Variation 3: Random Forest
# =============================================================================
# Classification: Random Forest Classifier
clf_rf = RandomForestClassifier(n_estimators=100, random_state=42)
clf_rf.fit(X_train, y_class_train)
y_class_pred_rf = clf_rf.predict(X_test)

# Regression: Random Forest Regressor (multi-output)
reg_rf = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
reg_rf.fit(X_train, y_reg_train)
y_reg_pred_rf = reg_rf.predict(X_test)

print("\nVariation 3: Random Forest")
print("Approval Classification Report:")
print(classification_report(y_class_test, y_class_pred_rf))
print("Approval Accuracy:", accuracy_score(y_class_test, y_class_pred_rf))
print("\nRegression Performance (MAE):")
print("Credit Limit MAE:", mean_absolute_error(y_reg_test["credit_limit"], y_reg_pred_rf[:, 0]))
print("Interest Rate MAE:", mean_absolute_error(y_reg_test["interest_rate"], y_reg_pred_rf[:, 1]))


Variation 3: Random Forest
Approval Classification Report:
              precision    recall  f1-score   support

         0.0       0.97      0.98      0.97       584
         1.0       0.99      0.98      0.98       890

    accuracy                           0.98      1474
   macro avg       0.98      0.98      0.98      1474
weighted avg       0.98      0.98      0.98      1474

Approval Accuracy: 0.9796472184531886

Regression Performance (MAE):
Credit Limit MAE: 998.3514554901355
Interest Rate MAE: 0.1906241519674355
