In [22]:
import numpy as np
import pandas as pd

np.random.seed(42)
NUM_ROWS = 1000


In [23]:
age = np.random.randint(21, 60, NUM_ROWS)
monthly_income = np.random.randint(25000, 110000, NUM_ROWS)
loan_amount = np.random.randint(300000, 1800000, NUM_ROWS)
interest_rate = np.random.uniform(8.0, 15.0, NUM_ROWS).round(2)
tenure_years = np.random.randint(2, 15, NUM_ROWS)
dependents = np.random.randint(0, 4, NUM_ROWS)
credit_score = np.random.randint(600, 850, NUM_ROWS)

employment_type = np.random.choice(
    ["salaried", "business"],
    NUM_ROWS,
    p=[0.65, 0.35]
)


In [24]:
monthly_rate = interest_rate / (12 * 100)
months = tenure_years * 12

monthly_emi = (
    loan_amount * monthly_rate * (1 + monthly_rate) ** months
) / ((1 + monthly_rate) ** months - 1)

monthly_emi = monthly_emi.round(0)


In [25]:
emi_ratio = monthly_emi / monthly_income

stress_level = []
for ratio in emi_ratio:
    if ratio > 0.40:
        stress_level.append("High")
    elif ratio > 0.25:
        stress_level.append("Medium")
    else:
        stress_level.append("Low")


In [26]:
df = pd.DataFrame({
    "age": age,
    "monthly_income": monthly_income,
    "loan_amount": loan_amount,
    "interest_rate": interest_rate,
    "tenure_years": tenure_years,
    "monthly_emi": monthly_emi,
    "dependents": dependents,
    "credit_score": credit_score,
    "employment_type": employment_type,
    "stress_level": stress_level
})


In [27]:
df.head()

Unnamed: 0,age,monthly_income,loan_amount,interest_rate,tenure_years,monthly_emi,dependents,credit_score,employment_type,stress_level
0,59,82761,1286753,8.31,12,14148.0,2,782,salaried,Low
1,49,61368,1025947,11.15,3,33661.0,2,819,salaried,High
2,35,31783,572900,14.51,2,27645.0,2,691,business,High
3,28,66914,761227,9.16,10,9709.0,0,787,salaried,Low
4,41,95507,1647098,10.73,13,19621.0,0,786,salaried,Low


In [28]:
df["stress_level"].value_counts(normalize=True)


stress_level
Low       0.443
High      0.295
Medium    0.262
Name: proportion, dtype: float64

In [29]:
df["stress_level"].value_counts(normalize=True)


stress_level
Low       0.443
High      0.295
Medium    0.262
Name: proportion, dtype: float64

In [30]:
df.to_csv("../dataset/loans.csv", index=False)
