In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


Dataset approval and preprocessing

In [13]:
data = pd.read_csv('credit_card_approval.csv')

# Remove the missing values
data = data.dropna()


# Drop the ID 
data.drop('ID', axis=1, inplace=True)
# print(data.head())

data['DAYS_BIRTH'] = abs(data['DAYS_BIRTH'])
data['DAYS_EMPLOYED'] = abs(data['DAYS_EMPLOYED'])
data['BEGIN_MONTHS'] = abs(data['BEGIN_MONTHS'])


#  Encode the gender, flag_own_car columns and flag_own_realty, F as 1, M as 0, and Y as 1, N as 0.
data['CODE_GENDER'] = data['CODE_GENDER'].map({'F': 1, 'M': 0})
data['FLAG_OWN_CAR'] = data['FLAG_OWN_CAR'].map({'Y': 1, 'N': 0})
data['FLAG_OWN_REALTY'] = data['FLAG_OWN_REALTY'].map({'Y': 1, 'N': 0})


# Preprocess the children column, replace "2+ Children" with 2, "1 Child" with 1, and "No Children" with 0.
def process_children(x):
    if 'No children' in x:
        return 0
    elif '1 children' in x:
        return 1
    elif '2+ children' in x:
        return 2  
    
    else:
        return None 
data['CNT_CHILDREN'] = data['CNT_CHILDREN'].apply(process_children)

# One-hot encode the columns NAME_EDUCATION_TYPE, NAME_FAMILY_STATUS, NAME_HOUSING_TYPE, and JOB.
data = pd.get_dummies(data, columns=['NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'JOB'])

# Defite status
status_mapping = {
    '0': 1,   # 1-29 days past due
    '1': 2,   # 30-59 days past due
    '2': 3,   # 60-89 days overdue
    '3': 4,   # 90-119 days overdue
    '4': 5,   # 120-149 days overdue
    '5': 6,   # Overdue or bad debts, write-offs for more than 150 days
    'C': 0,   # paid off that month
    'X': -1   # No loan for the month
}

data['STATUS'] = data['STATUS'].map(status_mapping)



print(data.head())


   CODE_GENDER  FLAG_OWN_CAR  FLAG_OWN_REALTY  CNT_CHILDREN  AMT_INCOME_TOTAL  \
0            1             1                0             2          270000.0   
1            1             0                0             0           81000.0   
2            0             1                1             0          270000.0   
3            1             1                1             1          112500.0   
4            0             1                1             2          139500.0   

   DAYS_BIRTH  DAYS_EMPLOYED  FLAG_MOBIL  FLAG_WORK_PHONE  FLAG_PHONE  ...  \
0       13258           2300           1                0           0  ...   
1       17876            377           1                1           1  ...   
2       19579           1028           1                0           1  ...   
3       15109           1956           1                0           0  ...   
4       17281           5578           1                1           0  ...   

   JOB_Laborers  JOB_Low-skill Laborers  JOB