In [69]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

np.random.seed(42)  # For reproducibility

customer_ids = [f'CUST_{i:03d}' for i in range(1, 1001)]
ages = np.random.choice([25, 30, 35, 40, 45, 50, 55, None], size=1000)
employ_statuses = np.random.choice(['unemployed', 'part time', 'full time', None], size=1000)
indebtedness_scores = np.random.choice([20, 35, 50, 65, 80, 95, None], size=1000)
incomes = np.random.choice([20000, 30000, 40000, 50000, 60000, 70000, None], size=1000)
loan_amount = np.random.choice([500, 1000, 700], size=1000)
actual_decision = np.random.choice(['ACCEPT', 'DECLINE'], size=1000)

df = pd.DataFrame({
    'customer_id': customer_ids,
    'age': ages,
    'employ_status': employ_statuses,
    'indebtedness_score': indebtedness_scores,
    'income': incomes,
    'loan_amount': loan_amount,
    'actual_decision': actual_decision
})
display(df.head(2))

Unnamed: 0,customer_id,age,employ_status,indebtedness_score,income,loan_amount,actual_decision
0,CUST_001,55,part time,,30000,1000,DECLINE
1,CUST_002,40,full time,65.0,40000,1000,DECLINE


In [70]:
# initialize declines
decline_codes = ['AP01', 'AP02']
for code in decline_codes:
    df[code] = 0
display(df.head(2))

Unnamed: 0,customer_id,age,employ_status,indebtedness_score,income,loan_amount,actual_decision,AP01,AP02
0,CUST_001,55,part time,,30000,1000,DECLINE,0,0
1,CUST_002,40,full time,65.0,40000,1000,DECLINE,0,0


#### Policy Defined Rules

In [71]:
# 1 for decline and 0 for accept
def ap01(row, col = 'age'):
    if pd.notna(row[col]):
        if (row[col] < 21) | (row[col] > 45):
            return 1
    return 0

def ap02(row, col = 'employ_status'):
    if pd.notna(row[col]):
        if row[col] in ['unemployed', 'part time']:
            return 1
    return 0

#### Applying Policies on Customers Bureau Data

In [72]:
for i, row in tqdm(df.iterrows(), total=len(df)):
    df.at[i, 'AP01'] = ap01(row)
    df.at[i, 'AP02'] = ap02(row)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [73]:
# 1 for decline and 0 for accept
df['simulated_decision'] = (df[decline_codes] == 1).any(axis=1).astype(int)

#### Metrics

In [79]:
# match rate
pd.crosstab(df['actual_decision'], df['simulated_decision'])

simulated_decision,0,1
actual_decision,Unnamed: 1_level_1,Unnamed: 2_level_1
ACCEPT,209,323
DECLINE,165,303
