In [None]:
Credit_Scoring_System/
│
├── notebooks/
│   ├── 01_CRISP_DM_and_Data_Generation.ipynb
│   ├── 02_Data_Preparation_and_EDA.ipynb
│   ├── 03_Model_Development_and_MLflow.ipynb
│   ├── 04_Fairness_Analysis.ipynb
│   └── 05_API_Deployment_Prototype.ipynb
│
├── data/
│   ├── raw/
│   │   └── simulated_microfinance_data.csv
│   │
│   └── processed/
│       └── YourLastName.csv
│
├── models/
│   └── credit_model.pkl
│
├── api/
│   └── app.py
│
├── reports/
│   ├── final_report.pdf
│   └── presentation_slides.pdf
│
├── requirements.txt
└── README.md

In [None]:
# CRISP-DM framing, hypotheses, dataset sourcing, compliance

# CRISP-DM framing

# Hypotheses

# Dataset simulation

# Save raw data

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

# Ensure paths
BASE_DIR = Path("..")
RAW_DATA_DIR = BASE_DIR / "data" / "raw"
RAW_DATA_DIR.mkdir(parents=True, exist_ok=True)

np.random.seed(42)
n = 20000

df = pd.DataFrame({
    "income": np.random.lognormal(10, 0.5, n),
    "loan_amount": np.random.lognormal(9, 0.6, n),
    "loan_term_months": np.random.choice([6,12,18,24,36], n),
    "employment_status": np.random.choice(
        ["formal","informal","self-employed"], n
    ),
    "credit_history_years": np.random.randint(0,10,n),
    "gender": np.random.choice(["male","female"], n),
    "age_group": np.random.choice(
        ["18-25","26-35","36-45","46+"], n
    )
})

df["debt_to_income"] = df["loan_amount"] / df["income"]

prob_default = (
    0.35 * (df["debt_to_income"] > 0.4) +
    0.25 * (df["credit_history_years"] < 2) +
    0.15 * (df["employment_status"] == "informal")
)

df["loan_default"] = np.random.binomial(1, prob_default.clip(0,1))

# Save raw dataset
df.to_csv(RAW_DATA_DIR / "simulated_microfinance_data.csv", index=False)

df.head()

Unnamed: 0,income,loan_amount,loan_term_months,employment_status,credit_history_years,gender,age_group,debt_to_income,loan_default
0,28236.114017,9986.323128,24,formal,2,female,26-35,0.353672,0
1,20555.171637,9604.568556,18,formal,5,female,26-35,0.467258,0
2,30450.091226,4619.71407,24,informal,1,male,26-35,0.151714,0
3,47170.073204,11472.986293,24,informal,3,female,46+,0.243226,0
4,19592.915242,3314.1299,36,self-employed,0,male,18-25,0.169149,0


In [2]:
import numpy as np
import pandas as pd
from pathlib import Path

BASE_DIR = Path("..")
RAW_DIR = BASE_DIR / "data" / "raw"
RAW_DIR.mkdir(parents=True, exist_ok=True)

np.random.seed(42)
n = 20000

df = pd.DataFrame({
    "income": np.random.lognormal(10, 0.5, n),
    "loan_amount": np.random.lognormal(9, 0.6, n),
    "loan_term": np.random.choice([6,12,18,24,36], n),
    "employment_status": np.random.choice(
        ["formal", "informal", "self-employed"], n
    ),
    "credit_history_years": np.random.randint(0, 10, n),
    "gender": np.random.choice(["male", "female"], n),
    "age_group": np.random.choice(
        ["18-25", "26-35", "36-45", "46+"], n
    )
})

df["debt_to_income"] = df["loan_amount"] / df["income"]

prob_default = (
    0.35 * (df["debt_to_income"] > 0.4) +
    0.25 * (df["credit_history_years"] < 2) +
    0.15 * (df["employment_status"] == "informal")
)

df["loan_default"] = np.random.binomial(1, prob_default.clip(0,1))

df.to_csv(RAW_DIR / "simulated_microfinance_data.csv", index=False)
df.head()

Unnamed: 0,income,loan_amount,loan_term,employment_status,credit_history_years,gender,age_group,debt_to_income,loan_default
0,28236.114017,9986.323128,24,formal,2,female,26-35,0.353672,0
1,20555.171637,9604.568556,18,formal,5,female,26-35,0.467258,0
2,30450.091226,4619.71407,24,informal,1,male,26-35,0.151714,0
3,47170.073204,11472.986293,24,informal,3,female,46+,0.243226,0
4,19592.915242,3314.1299,36,self-employed,0,male,18-25,0.169149,0
