In [6]:
import pandas as pd
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# 1. Load data
df = pd.read_csv(r"c:\Users\subha\Desktop\risk_credit_final\risk_scored_applicants_updated.csv")

# 2. Select the four components of the formula
features = pd.DataFrame({
    "PIS": df["Payment_Irregularity_Score"],
    "one_minus_CoLI": 1 - df["CoLI_ridge"],
    "one_minus_BRI": 1 - df["BRI_ridge"],
    "one_minus_FRI": 1 - df["Financial_Resilience_Index"]
})

# 3. Choose the target you want to approximate (default prob)
y = df["Default_Prob_Final"]      # or df["Default_Prob"]

# 4. Fit Ridge regression without an intercept (formula has no constant term)
alphas = (0.001, 0.01, 0.1, 1, 10, 100)   # search space for regularisation strength
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

ridge = RidgeCV(alphas=alphas, fit_intercept=False)
ridge.fit(X_scaled, y)
w_std = ridge.coef_
w_orig = w_std / scaler.scale_

print("Weights (standardized):", w_std)
print("Weights (original scale):", w_orig)


# 5. Inspect learned weights
w1, w2, w3, w4 = ridge.coef_
print("Weights:")
print(f"  w1 (PIS)        = {w1:.6f}")
print(f"  w2 (1 - CoLI)   = {w2:.6f}")
print(f"  w3 (1 - BRI)    = {w3:.6f}")
print(f"  w4 (1 - FRI)    = {w4:.6f}")
print(f"Chosen alpha      = {ridge.alpha_}")

# 6. Optional: evaluate fit quality
y_pred = ridge.predict(X_scaled)
rmse = mean_squared_error(y, y_pred) ** 0.5
print("RMSE :", rmse)
print("R^2  :", r2_score(y, y_pred))

Weights (standardized): [0.01019808 0.01173606 0.03441564 0.05057033]
Weights (original scale): [0.42291579 0.76224303 0.71664486 0.15960954]
Weights:
  w1 (PIS)        = 0.010198
  w2 (1 - CoLI)   = 0.011736
  w3 (1 - BRI)    = 0.034416
  w4 (1 - FRI)    = 0.050570
Chosen alpha      = 100.0
RMSE : 0.1234357072628156
R^2  : 0.31216086151165845
