In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
np.random.seed(42)
data = pd.DataFrame({
    'income': np.random.randint(20000, 100000, 1000),
    'debt': np.random.randint(1000, 30000, 1000),
    'payment_history': np.random.choice([0, 1], size=1000),  # 0 = poor, 1 = good
    'creditworthy': np.random.choice([0, 1], size=1000)  # 0 = not creditworthy, 1 = creditworthy
})

In [3]:
X = data[['income', 'debt', 'payment_history']]
y = data['creditworthy']

In [4]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [6]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier()
}

In [7]:
for name, model in models.items():
    print(f"\n🧠 Model: {name}")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else y_pred


🧠 Model: Logistic Regression

🧠 Model: Decision Tree

🧠 Model: Random Forest


In [8]:
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("ROC-AUC Score:", roc_auc_score(y_test, y_prob))

Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.42      0.45       106
           1       0.44      0.50      0.47        94

    accuracy                           0.46       200
   macro avg       0.46      0.46      0.46       200
weighted avg       0.46      0.46      0.46       200

ROC-AUC Score: 0.46141107988759533


In [9]:
accuracy = model.score(X_test, y_test)
print("Accuracy:", accuracy)


Accuracy: 0.46


In [10]:

print("\n🔍 Enter user details to predict creditworthiness:")
try:
    income = float(input("Enter Income: "))
    debt = float(input("Enter Debt: "))
    payment_history = int(input("Enter Payment History (1 = Good, 0 = Poor): "))

    # Fix warning: use DataFrame with column names
    user_input_df = pd.DataFrame(
        [[income, debt, payment_history]],
        columns=['income', 'debt', 'payment_history']
    )
    user_input = scaler.transform(user_input_df)

    # Prediction using Random Forest (best model)
    best_model = models["Random Forest"]
    prediction = best_model.predict(user_input)[0]
    probability = best_model.predict_proba(user_input)[0][1]

    print("\n🧾 Prediction Result:")
    print("Creditworthy ✅" if prediction == 1 else "Not Creditworthy ❌")
    print(f"Confidence Score: {round(probability * 100, 2)}%")

except Exception as e:
    print("❌ Invalid input:", e)



🔍 Enter user details to predict creditworthiness:


Enter Income:  60000
Enter Debt:  15000
Enter Payment History (1 = Good, 0 = Poor):  1



🧾 Prediction Result:
Creditworthy ✅
Confidence Score: 68.0%
