In [2]:
import pandas as pd
import numpy as np
import random

In [13]:
def generate_customer_details(number_of_customers):
    df = pd.DataFrame({
        'Customer_ID': [f"{i+1}" for i in range(number_of_customers)],
        'Age': np.random.randint(18, 101, number_of_customers)
    })

    salary_mask = np.random.choice([True, False], number_of_customers, p=[0.80, 0.20])
    df['Salary_Net_Income'] = np.where(salary_mask, np.random.randint(500, 5001, number_of_customers), 0)
    df['Self_employed_net_income'] = np.where(np.random.random(number_of_customers) < 0.3,
                                              np.random.randint(0, 5001, number_of_customers), 0)

    df['Salary_frequency'] = [np.random.choice(['weekly', 'biweekly', 'monthly']) if r > 0 else None
                              for r in df['Salary_Net_Income']]

    months_current_job = []
    total_experience_months = []

    for idx, row in df.iterrows():
        if row['Salary_Net_Income'] > 0:
            max_total = (row['Age'] - 18) * 12
            total_exp = np.random.randint(0, max_total) if max_total > 0 else 0
            if total_exp > 90 and random.random() < 0.1:
                current_job = np.random.randint(91, total_exp)
            else:
                current_job = np.random.randint(0, min(90, total_exp))
            months_current_job.append(current_job)
            total_experience_months.append(total_exp)
        else:
            months_current_job.append(None)
            total_experience_months.append(None)

    df['Months_current_job'] = months_current_job
    df['Total_experience_months'] = total_experience_months

    df['Current_Bankruptcy_status'] = np.random.choice([False, True], number_of_customers, p=[0.99, 0.01])
    df['Ever_Bankrupt'] = np.where(
        df['Current_Bankruptcy_status'],
        True,
        np.random.choice([False, True], number_of_customers, p=[0.95, 0.05])
    )

    return df

In [14]:
#  Generate data and print
df = generate_customer_details(100)
print(df.head())

  Customer_ID  Age  Salary_Net_Income  Self_employed_net_income  \
0           1   80               4793                         0   
1           2   25                  0                      3904   
2           3   98               4406                         0   
3           4   49               4686                       958   
4           5   45               1543                       416   

  Salary_frequency  Months_current_job  Total_experience_months  \
0         biweekly                36.0                    392.0   
1             None                 NaN                      NaN   
2          monthly                 8.0                    487.0   
3          monthly                36.0                    269.0   
4         biweekly               107.0                    226.0   

   Current_Bankruptcy_status  Ever_Bankrupt  
0                      False          False  
1                      False          False  
2                      False          False  
3         

In [16]:
## ----- payment calculation ------

def pmt_calculate(loan_amount, annual_rate, tenure, tenure_type):
  annual_rate = annual_rate/100
  if tenure_type == "weekly":
    rate = annual_rate/52
  elif tenure_type == "biweekly":
    rate = annual_rate/36
  elif tenure_type == 'monthly':
    rate = annual_rate/12
  else:
    rate = 0
  if rate == 0:
    return loan_amount / tenure
  else:
    return (loan_amount * rate) / (1 - (1 + rate) ** -tenure)

In [34]:
# --- Loan Details ---
def generate_loan_details(num_counts):
  data = pd.DataFrame({
      'Loan_ID' : np.arange(1000, 1000 + num_counts),
      'Loan_Amount' : np.random.randint(100, 5001, num_counts),
      'Interest_rate' : np.random.uniform(10, 30, num_counts).round(2),
      'Tenure' : np.random.randint(6, 26, num_counts),
      'Tenure_type' : np.random.choice(['weekly','biweekly','monthly'], num_counts)
  })

  # Fees calculation

  data['Fee'] = np.where(data['Loan_Amount'] <= 1000, 50, 100)

  data['Recurring_payment_amount'] = data.apply(lambda row: np.round(
      pmt_calculate(
          row['Loan_Amount'],
          row['Interest_rate'],
          row['Tenure'],
          row['Tenure_type']
      ),2
  ),axis = 1
  )



In [31]:
df

Unnamed: 0,Customer_ID,Age,Salary_Net_Income,Self_employed_net_income,Salary_frequency,Months_current_job,Total_experience_months,Current_Bankruptcy_status,Ever_Bankrupt,Loan_ID,Loan_Amount,Interest_rate,Tenure,Tenure_type
0,1,80,4793,0,biweekly,36.0,392.0,False,False,1000.0,4619.0,17.90,20.0,biweekly
1,2,25,0,3904,,,,False,False,1001.0,1964.0,27.83,20.0,weekly
2,3,98,4406,0,monthly,8.0,487.0,False,False,1002.0,4474.0,13.00,17.0,biweekly
3,4,49,4686,958,monthly,36.0,269.0,False,False,1003.0,2241.0,18.24,15.0,weekly
4,5,45,1543,416,biweekly,107.0,226.0,False,False,1004.0,1297.0,26.36,22.0,monthly
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,23,0,0,,,,False,False,,,,,
96,97,89,2916,3849,weekly,803.0,837.0,False,False,,,,,
97,98,91,992,0,monthly,1.0,258.0,False,False,,,,,
98,99,78,2297,4228,weekly,33.0,142.0,False,False,,,,,
