The generated dataset is saved as a CSV file: `loan_data.csv` and an Excel file: `loan_data.xlsx`
.


In [5]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)

# Number of cases
n_cases = 10000

# Generate synthetic data
loan_data = {
    "Loan_ID": [f"LN{i:05}" for i in range(1, n_cases + 1)],
    "Loan_Amount": np.random.randint(1000, 50000, size=n_cases),  # Loan amounts in $
    "Loan_Term": np.random.choice([12, 24, 36, 48, 60], size=n_cases),  # Loan term in months
    "Annual_Income": np.random.randint(20000, 200000, size=n_cases),  # Annual income in $
    "Credit_Score": np.random.randint(300, 850, size=n_cases),  # Credit score range
    "Employment_Length": np.random.randint(0, 30, size=n_cases),  # Employment length in years
    "Loan_Status": np.random.choice(["Approved", "Default"], size=n_cases, p=[0.85, 0.15]),  # 85% Approved
}

# Convert to DataFrame
loan_df = pd.DataFrame(loan_data)

# Generate random Loan Issue Dates within the range 2010-01-01 to 2024-12-31
start_date = datetime(2010, 1, 1)
end_date = datetime(2024, 12, 31)
date_range = (end_date - start_date).days

loan_df["Loan_Issue_Date"] = [
    start_date + timedelta(days=np.random.randint(0, date_range)) for _ in range(n_cases)
]

# Save to CSV
loan_df.to_csv("loan_data.csv", index=False)
print("Dataset saved to 'loan_data.csv'")

# Save to Excel
loan_df.to_excel("loan_data.xlsx", index=False, engine='openpyxl')
print("Dataset saved to 'loan_data.xlsx'")


Dataset saved to 'loan_data.csv'
Dataset saved to 'loan_data.xlsx'
