### Imports

In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import os

##### Parameters for each class

In [10]:
params = {
    "LowRisk": {
        "income": (85000, 15000),
        "age": (44, 10),
        "credit_score": (760, 30),
        "debt_ratio": (0.22, 0.10),
        "loan_amount": (12000, 5000)
    },
    "MediumRisk": {
        "income": (55000, 12000),
        "age": (37, 12),
        "credit_score": (660, 40),
        "debt_ratio": (0.38, 0.15),
        "loan_amount": (18000, 7000)
    },
    "HighRisk": {
        "income": (32000, 8000),
        "age": (30, 8),
        "credit_score": (550, 50),
        "debt_ratio": (0.58, 0.20),
        "loan_amount": (25000, 10000)
    }
}

##### Sample size per class

In [11]:
samples_per_class = 2000   # Total = 6000 samples

##### Function to create data for one class

In [12]:
def generate_class_data(class_name, params, n_samples):
    """Generate synthetic samples for a given risk class."""
    cls_params = params[class_name]

    data = {
        "income": np.random.normal(cls_params["income"][0], cls_params["income"][1], n_samples),
        "age": np.random.normal(cls_params["age"][0], cls_params["age"][1], n_samples),
        "credit_score": np.random.normal(cls_params["credit_score"][0], cls_params["credit_score"][1], n_samples),
        "debt_ratio": np.random.normal(cls_params["debt_ratio"][0], cls_params["debt_ratio"][1], n_samples),
        "loan_amount": np.random.normal(cls_params["loan_amount"][0], cls_params["loan_amount"][1], n_samples),
        "risk_class": [class_name] * n_samples
    }
    return pd.DataFrame(data)

##### Append all classes in dataframe list

In [13]:
df_list = []

for cls in params.keys():
    df_list.append(generate_class_data(cls, params, samples_per_class))

df = pd.concat(df_list, ignore_index=True)

In [14]:
print("Dataset shape:", df.shape)
df.head()

Dataset shape: (6000, 6)


Unnamed: 0,income,age,credit_score,debt_ratio,loan_amount,risk_class
0,72923.478862,42.513884,739.756575,0.222687,14489.698884,LowRisk
1,103410.539507,41.733267,770.110477,0.256367,10186.764685,LowRisk
2,77138.024451,39.950839,768.283551,0.322879,14972.56257,LowRisk
3,98363.424949,27.243548,822.800091,0.210413,12804.894388,LowRisk
4,95397.414763,43.779875,757.619328,0.264339,7631.384561,LowRisk


##### Save to out.csv inside Dataset folder

In [16]:
folder_name = "Dataset"
if not os.path.exists(folder_name):
    os.mkdir(folder_name)
    print(f"Folder '{folder_name}' created.")
else:
    print(f"Folder '{folder_name}' already exists.")

# Source - https://stackoverflow.com/a
# Posted by Andy Hayden, modified by community. See post 'Timeline' for change history
# Retrieved 2025-12-11, License - CC BY-SA 4.0

df.to_csv("Dataset\out.csv", encoding='utf-8', index=False, header=True)

Folder 'Dataset' already exists.
