In [2]:
# 📚 Credit Risk Model Training Script
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
import joblib

# 1️⃣ Load Data
df = pd.read_csv("/Users/sangeetha/Downloads/credit_risk_data.csv")  # Replace with your real dataset

# 2️⃣ Preprocessing
features = ['loan_amount', 'credit_score', 'ltv_ratio', 'credit_history', 'dti_ratio']
X = df[features]
y = df['default_status']

# 3️⃣ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4️⃣ Train Model
model = LogisticRegression()
model.fit(X_train, y_train)

# 5️⃣ Evaluate Model
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]

print(classification_report(y_test, y_pred))
print("AUC-ROC:", roc_auc_score(y_test, y_pred_proba))

# 6️⃣ Save Model
joblib.dump(model, "credit_risk_model.pkl")
print("✅ Model saved as credit_risk_model.pkl")


              precision    recall  f1-score   support

           0       0.69      1.00      0.82        69
           1       0.00      0.00      0.00        31

    accuracy                           0.69       100
   macro avg       0.34      0.50      0.41       100
weighted avg       0.48      0.69      0.56       100

AUC-ROC: 0.49602618045815805
✅ Model saved as credit_risk_model.pkl


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
import joblib
import pandas as pd

# Load data
df = pd.read_csv("/Users/sangeetha/Downloads/credit_risk_data.csv")
X = df[['loan_amount', 'credit_score', 'ltv_ratio', 'credit_history', 'dti_ratio']]
y = df['default_status']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42, test_size=0.2)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train logistic regression with class balancing
model = LogisticRegression(class_weight='balanced', max_iter=500)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)
y_proba = model.predict_proba(X_test_scaled)[:, 1]

# Metrics
print(classification_report(y_test, y_pred))
print("AUC-ROC:", roc_auc_score(y_test, y_proba))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Save model and scaler
joblib.dump(model, "credit_risk_model.pkl")
joblib.dump(scaler, "credit_risk_scaler.pkl")


              precision    recall  f1-score   support

           0       0.78      0.63      0.70        71
           1       0.38      0.55      0.45        29

    accuracy                           0.61       100
   macro avg       0.58      0.59      0.57       100
weighted avg       0.66      0.61      0.63       100

AUC-ROC: 0.569208353569694
Confusion Matrix:
 [[45 26]
 [13 16]]


['credit_risk_scaler.pkl']

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
import joblib
import pandas as pd

# Load data
df = pd.read_csv("/Users/sangeetha/Downloads/credit_risk_data.csv")
X = df[['loan_amount', 'credit_score', 'ltv_ratio', 'credit_history', 'dti_ratio']]
y = df['default_status']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42, test_size=0.2)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train logistic regression with class balancing
model = LogisticRegression(class_weight='balanced', max_iter=500)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)
y_proba = model.predict_proba(X_test_scaled)[:, 1]

# Metrics
print(classification_report(y_test, y_pred))
print("AUC-ROC:", roc_auc_score(y_test, y_proba))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Save model and scaler
joblib.dump(model, "credit_risk_model.pkl")
joblib.dump(scaler, "credit_risk_scaler.pkl")
