In [None]:
import pandas as pd
import numpy as np
import joblib
import os

# === Load the saved model and threshold ===
model = joblib.load("..\\models\\Random_Forest_best_model.pkl")
with open("..\\models\\best_threshold.txt", "r") as f:
    threshold = float(f.read())

train_columns = joblib.load("..\\models\\train_columns.pkl")

# === Helper functions for conversions ===
def age_to_days_birth(age_years):
    """Convert age in years to DAYS_BIRTH (negative days since birth)"""
    return -int(age_years * 365.25)  # Using 365.25 to account for leap years

def employment_years_to_days_employed(employment_years):
    """Convert employment duration in years to DAYS_EMPLOYED (negative if employed)"""
    if employment_years == 0:
        return 0  # Unemployed
    return -int(employment_years * 365.25)  # Negative for employed

# === Collect real-time input from user ===
print("\n📥 Please enter the following details:")

# Get age and convert to days internally
age_years = float(input("Age (in years): "))
days_birth = age_to_days_birth(age_years)

# Get employment duration and convert to days internally
print("Employment Duration:")
print("  - Enter 0 if unemployed")
print("  - Enter years of employment if employed")
employment_years = float(input("Years of Employment: "))
days_employed = employment_years_to_days_employed(employment_years)

user_data = {
    'CODE_GENDER': input("Gender (M/F): ").strip().upper(),
    'FLAG_OWN_CAR': input("Owns a Car? (Y/N): ").strip().upper(),
    'FLAG_OWN_REALTY': input("Owns Realty? (Y/N): ").strip().upper(),
    'CNT_CHILDREN': int(input("Number of Children: ")),
    'AMT_INCOME_TOTAL': float(input("Annual Income (in numbers): ")),
    'NAME_INCOME_TYPE': input("Income Type (Working/Pensioner/Commercial associate/State servant/Student): ").strip(),
    'NAME_EDUCATION_TYPE': input("Education Type (Secondary / Higher / Incomplete / Academic): ").strip(),
    'NAME_FAMILY_STATUS': input("Family Status (Single/Married/Divorced/Separated/Widow): ").strip(),
    'NAME_HOUSING_TYPE': input("Housing Type (House / With parents / Rented / Co-op / Office / Municipal): ").strip(),
    'Age': days_birth,  # Converted from age
    'Years of Working': days_employed,  # Converted from employment years
    'FLAG_WORK_PHONE': int(input("Has Work Phone? (1 = Yes, 0 = No): ")),
    'FLAG_PHONE': int(input("Has Phone? (1 = Yes, 0 = No): ")),
    'FLAG_EMAIL': int(input("Has Email? (1 = Yes, 0 = No): ")),
    'CNT_FAM_MEMBERS': float(input("Number of Family Members: "))
}

# Convert to DataFrame
user_df = pd.DataFrame([user_data])

# === Preprocess (one-hot encoding + align columns) ===
user_df_encoded = pd.get_dummies(user_df)

# Add missing columns
for col in train_columns:
    if col not in user_df_encoded.columns:
        user_df_encoded[col] = 0

# Keep only training columns
user_df_encoded = user_df_encoded[train_columns]

# === Predict ===
proba = model.predict_proba(user_df_encoded)[:, 1][0]
prediction = int(proba >= threshold)

'''1. ROC Curve Analysis

The threshold that maximizes the True Positive Rate while minimizing False Positive Rate
Usually found at the point closest to the top-left corner of the ROC curve

2. Precision-Recall Curve

Balances precision (avoiding false approvals) and recall (catching actual bad credits)
Important for imbalanced datasets (more good credits than bad credits)

3. Business Cost Optimization

Cost of False Positive: Rejecting a good customer (lost business opportunity)
Cost of False Negative: Approving a bad customer (potential loan default)
Threshold chosen to minimize total business cost'''

# === Result ===
print(f"\n CREDIT RISK ASSESSMENT RESULTS")
print(f"{'='*50}")
print(f"Probability of Bad Credit: {proba:.4f} ({proba*100:.2f}%)")
print(f"Threshold Used: {threshold:.4f}")

if prediction == 1:
    print(" Prediction: BAD CREDIT RISK — Not Eligible for Credit Card")
else:
    print(" Prediction: GOOD CREDIT RISK — Eligible for Credit Card")

print(f"{'='*50}")


📥 Please enter the following details:
Employment Duration:
  - Enter 0 if unemployed
  - Enter years of employment if employed

📈 CREDIT RISK ASSESSMENT RESULTS
Probability of Bad Credit: 0.1462 (14.62%)
Threshold Used: 0.3400
✅ Prediction: GOOD CREDIT RISK — Eligible for Credit Card
