In [1]:
import numpy as np
import pandas as pd


In [6]:
import numpy as np
import pandas as pd

np.random.seed(42)
N = 20000

# Core features
age = np.random.randint(21, 65, N)
monthly_income = np.random.lognormal(mean=10, sigma=0.4, size=N)
employment_tenure = np.random.randint(0, 360, N)
income_variance = np.random.uniform(0.05, 0.6, N)

total_debt = monthly_income * np.random.uniform(2, 10, N)
credit_utilization = np.clip(np.random.beta(2, 5, N), 0, 1)
num_loans = np.random.randint(0, 8, N)

on_time_rate = np.random.beta(5, 2, N)
missed_payments = np.random.poisson(1.2, N)

savings = monthly_income * np.random.uniform(0, 6, N)
monthly_expenses = monthly_income * np.random.uniform(0.4, 0.9, N)

# Stand-out engineered features
credit_dependency = (credit_utilization * monthly_income) / monthly_income
payment_momentum = np.random.normal(0, 0.15, N)
resilience_score = savings / (3 * monthly_expenses + 1)

# Risk score
risk_score = (
    0.35 * credit_utilization +
    0.25 * credit_dependency +
    0.20 * (missed_payments / (missed_payments.max() + 1)) -
    0.30 * resilience_score -
    0.20 * payment_momentum
)

risk_score += np.random.normal(0, 0.1, N)

creditworthy = (risk_score < np.percentile(risk_score, 70)).astype(int)

# DataFrame
df = pd.DataFrame({
    'age': age,
    'monthly_income': monthly_income,
    'employment_tenure_months': employment_tenure,
    'income_variance_6m': income_variance,
    'total_debt': total_debt,
    'credit_utilization': credit_utilization,
    'num_active_loans': num_loans,
    'credit_dependency_ratio': credit_dependency,
    'on_time_payment_rate_6m': on_time_rate,
    'missed_payment_count': missed_payments,
    'payment_discipline_momentum': payment_momentum,
    'savings_balance': savings,
    'monthly_expenses': monthly_expenses,
    'financial_shock_resilience_score': resilience_score,
    'creditworthy': creditworthy
})

df.to_csv(r"hybrid_credit_scoring_dataset.csv", index=False)
