In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

In [2]:
pre_app = pd.read_csv("C:/Users/rajpu/OneDrive/Desktop/Bank Loan Case Study/Uncleaned Datasets/previous_application.csv")

In [4]:
def wrangle(filepath):
    # Read CSV file
    df = pd.read_csv(filepath)

    #removing higher null value columns
    null_df = (round((df.isnull().sum()/1670214)*100,2))
    threshold = 40 
    max_null_columns_to_drop = null_df[null_df  > threshold].index
    df_cleaned_null = df.drop(columns=max_null_columns_to_drop)

    #coverting these columns to positive because days cannot be negative
    for col in df_cleaned_null.select_dtypes(include='number').columns:
        df_cleaned_null[col] = df_cleaned_null[col].abs()
    
    #cleaning unnecessary columns
    unecessary_columns_to_remove = ["WEEKDAY_APPR_PROCESS_START","HOUR_APPR_PROCESS_START","FLAG_LAST_APPL_PER_CONTRACT","NFLAG_LAST_APPL_IN_DAY"]
    df_cleaned = df_cleaned_null.drop(columns = unecessary_columns_to_remove)

    # Separate numeric and non-numeric columns
    numeric_cols = df_cleaned.select_dtypes(include=['number']).columns
    non_numeric_cols = df_cleaned.select_dtypes(exclude=['number']).columns

    # Impute missing values in numeric columns
    imputer = SimpleImputer(strategy='mean')
    df_cleaned[numeric_cols] = imputer.fit_transform(df_cleaned[numeric_cols])

    # Impute missing values in non-numeric columns using a different strategy, e.g., 'most_frequent'
    imputer_non_numeric = SimpleImputer(strategy='most_frequent')
    df_cleaned[non_numeric_cols] = imputer_non_numeric.fit_transform(df_cleaned[non_numeric_cols])

    return df_cleaned

In [5]:
pre_app_cleaned = wrangle("C:/Users/rajpu/OneDrive/Desktop/Bank Loan Case Study/Uncleaned Datasets/previous_application.csv")
print(pre_app_cleaned.info())
pre_app_cleaned.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1670214 entries, 0 to 1670213
Data columns (total 22 columns):
 #   Column                  Non-Null Count    Dtype  
---  ------                  --------------    -----  
 0   SK_ID_PREV              1670214 non-null  float64
 1   SK_ID_CURR              1670214 non-null  float64
 2   NAME_CONTRACT_TYPE      1670214 non-null  object 
 3   AMT_ANNUITY             1670214 non-null  float64
 4   AMT_APPLICATION         1670214 non-null  float64
 5   AMT_CREDIT              1670214 non-null  float64
 6   AMT_GOODS_PRICE         1670214 non-null  float64
 7   NAME_CASH_LOAN_PURPOSE  1670214 non-null  object 
 8   NAME_CONTRACT_STATUS    1670214 non-null  object 
 9   DAYS_DECISION           1670214 non-null  float64
 10  NAME_PAYMENT_TYPE       1670214 non-null  object 
 11  CODE_REJECT_REASON      1670214 non-null  object 
 12  NAME_CLIENT_TYPE        1670214 non-null  object 
 13  NAME_GOODS_CATEGORY     1670214 non-null  object 
 14  NA

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_GOODS_PRICE,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION
0,2030495.0,271877.0,Consumer loans,1730.43,17145.0,17145.0,17145.0,XAP,Approved,73.0,Cash through the bank,XAP,Repeater,Mobile,POS,XNA,Country-wide,35.0,Connectivity,12.0,middle,POS mobile with interest
1,2802425.0,108129.0,Cash loans,25188.615,607500.0,679671.0,607500.0,XNA,Approved,164.0,XNA,XAP,Repeater,XNA,Cash,x-sell,Contact center,1.0,XNA,36.0,low_action,Cash X-Sell: low
2,2523466.0,122040.0,Cash loans,15060.735,112500.0,136444.5,112500.0,XNA,Approved,301.0,Cash through the bank,XAP,Repeater,XNA,Cash,x-sell,Credit and cash offices,1.0,XNA,12.0,high,Cash X-Sell: high
3,2819243.0,176158.0,Cash loans,47041.335,450000.0,470790.0,450000.0,XNA,Approved,512.0,Cash through the bank,XAP,Repeater,XNA,Cash,x-sell,Credit and cash offices,1.0,XNA,12.0,middle,Cash X-Sell: middle
4,1784265.0,202054.0,Cash loans,31924.395,337500.0,404055.0,337500.0,Repairs,Refused,781.0,Cash through the bank,HC,Repeater,XNA,Cash,walk-in,Credit and cash offices,1.0,XNA,24.0,high,Cash Street: high


In [3]:
pre_app.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1670214 entries, 0 to 1670213
Data columns (total 37 columns):
 #   Column                       Non-Null Count    Dtype  
---  ------                       --------------    -----  
 0   SK_ID_PREV                   1670214 non-null  int64  
 1   SK_ID_CURR                   1670214 non-null  int64  
 2   NAME_CONTRACT_TYPE           1670214 non-null  object 
 3   AMT_ANNUITY                  1297979 non-null  float64
 4   AMT_APPLICATION              1670214 non-null  float64
 5   AMT_CREDIT                   1670213 non-null  float64
 6   AMT_DOWN_PAYMENT             774370 non-null   float64
 7   AMT_GOODS_PRICE              1284699 non-null  float64
 8   WEEKDAY_APPR_PROCESS_START   1670214 non-null  object 
 9   HOUR_APPR_PROCESS_START      1670214 non-null  int64  
 10  FLAG_LAST_APPL_PER_CONTRACT  1670214 non-null  object 
 11  NFLAG_LAST_APPL_IN_DAY       1670214 non-null  int64  
 12  RATE_DOWN_PAYMENT            774370 non-nu

In [6]:
pre_app.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,RATE_INTEREST_PRIMARY,RATE_INTEREST_PRIVILEGED,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,2030495,271877,Consumer loans,1730.43,17145.0,17145.0,0.0,17145.0,SATURDAY,15,Y,1,0.0,0.182832,0.867336,XAP,Approved,-73,Cash through the bank,XAP,,Repeater,Mobile,POS,XNA,Country-wide,35,Connectivity,12.0,middle,POS mobile with interest,365243.0,-42.0,300.0,-42.0,-37.0,0.0
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,,607500.0,THURSDAY,11,Y,1,,,,XNA,Approved,-164,XNA,XAP,Unaccompanied,Repeater,XNA,Cash,x-sell,Contact center,-1,XNA,36.0,low_action,Cash X-Sell: low,365243.0,-134.0,916.0,365243.0,365243.0,1.0
2,2523466,122040,Cash loans,15060.735,112500.0,136444.5,,112500.0,TUESDAY,11,Y,1,,,,XNA,Approved,-301,Cash through the bank,XAP,"Spouse, partner",Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,high,Cash X-Sell: high,365243.0,-271.0,59.0,365243.0,365243.0,1.0
3,2819243,176158,Cash loans,47041.335,450000.0,470790.0,,450000.0,MONDAY,7,Y,1,,,,XNA,Approved,-512,Cash through the bank,XAP,,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,middle,Cash X-Sell: middle,365243.0,-482.0,-152.0,-182.0,-177.0,1.0
4,1784265,202054,Cash loans,31924.395,337500.0,404055.0,,337500.0,THURSDAY,9,Y,1,,,,Repairs,Refused,-781,Cash through the bank,HC,,Repeater,XNA,Cash,walk-in,Credit and cash offices,-1,XNA,24.0,high,Cash Street: high,,,,,,


In [6]:
null_pre_app = (round((pre_app.isnull().sum()/1670214)*100,2))

In [7]:
null_pre_app

SK_ID_PREV                      0.00
SK_ID_CURR                      0.00
NAME_CONTRACT_TYPE              0.00
AMT_ANNUITY                    22.29
AMT_APPLICATION                 0.00
AMT_CREDIT                      0.00
AMT_DOWN_PAYMENT               53.64
AMT_GOODS_PRICE                23.08
WEEKDAY_APPR_PROCESS_START      0.00
HOUR_APPR_PROCESS_START         0.00
FLAG_LAST_APPL_PER_CONTRACT     0.00
NFLAG_LAST_APPL_IN_DAY          0.00
RATE_DOWN_PAYMENT              53.64
RATE_INTEREST_PRIMARY          99.64
RATE_INTEREST_PRIVILEGED       99.64
NAME_CASH_LOAN_PURPOSE          0.00
NAME_CONTRACT_STATUS            0.00
DAYS_DECISION                   0.00
NAME_PAYMENT_TYPE               0.00
CODE_REJECT_REASON              0.00
NAME_TYPE_SUITE                49.12
NAME_CLIENT_TYPE                0.00
NAME_GOODS_CATEGORY             0.00
NAME_PORTFOLIO                  0.00
NAME_PRODUCT_TYPE               0.00
CHANNEL_TYPE                    0.00
SELLERPLACE_AREA                0.00
N

In [9]:
threshold = 40 
max_null_columns_to_drop = null_pre_app[null_pre_app  > threshold].index

In [10]:
max_null_columns_to_drop

Index(['AMT_DOWN_PAYMENT', 'RATE_DOWN_PAYMENT', 'RATE_INTEREST_PRIMARY',
       'RATE_INTEREST_PRIVILEGED', 'NAME_TYPE_SUITE', 'DAYS_FIRST_DRAWING',
       'DAYS_FIRST_DUE', 'DAYS_LAST_DUE_1ST_VERSION', 'DAYS_LAST_DUE',
       'DAYS_TERMINATION', 'NFLAG_INSURED_ON_APPROVAL'],
      dtype='object')

In [11]:
pre_app_cleaned_null = pre_app.drop(columns=max_null_columns_to_drop)

In [12]:
pre_app_cleaned_null.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1670214 entries, 0 to 1670213
Data columns (total 26 columns):
 #   Column                       Non-Null Count    Dtype  
---  ------                       --------------    -----  
 0   SK_ID_PREV                   1670214 non-null  int64  
 1   SK_ID_CURR                   1670214 non-null  int64  
 2   NAME_CONTRACT_TYPE           1670214 non-null  object 
 3   AMT_ANNUITY                  1297979 non-null  float64
 4   AMT_APPLICATION              1670214 non-null  float64
 5   AMT_CREDIT                   1670213 non-null  float64
 6   AMT_GOODS_PRICE              1284699 non-null  float64
 7   WEEKDAY_APPR_PROCESS_START   1670214 non-null  object 
 8   HOUR_APPR_PROCESS_START      1670214 non-null  int64  
 9   FLAG_LAST_APPL_PER_CONTRACT  1670214 non-null  object 
 10  NFLAG_LAST_APPL_IN_DAY       1670214 non-null  int64  
 11  NAME_CASH_LOAN_PURPOSE       1670214 non-null  object 
 12  NAME_CONTRACT_STATUS         1670214 non-n

In [13]:
pre_app_cleaned_null.shape

(1670214, 26)

In [14]:
pre_app_cleaned_null.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION
0,2030495,271877,Consumer loans,1730.43,17145.0,17145.0,17145.0,SATURDAY,15,Y,1,XAP,Approved,-73,Cash through the bank,XAP,Repeater,Mobile,POS,XNA,Country-wide,35,Connectivity,12.0,middle,POS mobile with interest
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,607500.0,THURSDAY,11,Y,1,XNA,Approved,-164,XNA,XAP,Repeater,XNA,Cash,x-sell,Contact center,-1,XNA,36.0,low_action,Cash X-Sell: low
2,2523466,122040,Cash loans,15060.735,112500.0,136444.5,112500.0,TUESDAY,11,Y,1,XNA,Approved,-301,Cash through the bank,XAP,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,high,Cash X-Sell: high
3,2819243,176158,Cash loans,47041.335,450000.0,470790.0,450000.0,MONDAY,7,Y,1,XNA,Approved,-512,Cash through the bank,XAP,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,middle,Cash X-Sell: middle
4,1784265,202054,Cash loans,31924.395,337500.0,404055.0,337500.0,THURSDAY,9,Y,1,Repairs,Refused,-781,Cash through the bank,HC,Repeater,XNA,Cash,walk-in,Credit and cash offices,-1,XNA,24.0,high,Cash Street: high


In [15]:
#Checking which columns have negative values
for column in pre_app_cleaned_null.select_dtypes(include='number').columns:
    if (pre_app_cleaned_null[column] < 0).any():
        print(f"Column '{column}' contains negative values.")

Column 'DAYS_DECISION' contains negative values.
Column 'SELLERPLACE_AREA' contains negative values.


In [16]:
#coverting these columns to positive because days cannot be negative
for col in pre_app_cleaned_null.select_dtypes(include='number').columns:
    pre_app_cleaned_null[col] = pre_app_cleaned_null[col].abs()

In [17]:
#rechecking to ensure
for column in pre_app_cleaned_null.select_dtypes(include='number').columns:
    if (pre_app_cleaned_null[column] < 0).any():
        print(f"Column '{column}' contains negative values.")
else:
    print("All Columns contains positive values")

All Columns contains positive values


In [18]:
pre_app_cleaned_null.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1670214 entries, 0 to 1670213
Data columns (total 26 columns):
 #   Column                       Non-Null Count    Dtype  
---  ------                       --------------    -----  
 0   SK_ID_PREV                   1670214 non-null  int64  
 1   SK_ID_CURR                   1670214 non-null  int64  
 2   NAME_CONTRACT_TYPE           1670214 non-null  object 
 3   AMT_ANNUITY                  1297979 non-null  float64
 4   AMT_APPLICATION              1670214 non-null  float64
 5   AMT_CREDIT                   1670213 non-null  float64
 6   AMT_GOODS_PRICE              1284699 non-null  float64
 7   WEEKDAY_APPR_PROCESS_START   1670214 non-null  object 
 8   HOUR_APPR_PROCESS_START      1670214 non-null  int64  
 9   FLAG_LAST_APPL_PER_CONTRACT  1670214 non-null  object 
 10  NFLAG_LAST_APPL_IN_DAY       1670214 non-null  int64  
 11  NAME_CASH_LOAN_PURPOSE       1670214 non-null  object 
 12  NAME_CONTRACT_STATUS         1670214 non-n

In [19]:
unecessary_columns_to_remove = ["WEEKDAY_APPR_PROCESS_START","HOUR_APPR_PROCESS_START","FLAG_LAST_APPL_PER_CONTRACT","NFLAG_LAST_APPL_IN_DAY"]

In [20]:
pre_app_cleaned = pre_app_cleaned_null.drop(columns = unecessary_columns_to_remove)

In [21]:
pre_app_cleaned.shape

(1670214, 22)

In [22]:
print(pre_app_cleaned.isnull().sum().sort_values(ascending = True))

SK_ID_PREV                     0
SK_ID_CURR                     0
NAME_CONTRACT_TYPE             0
AMT_APPLICATION                0
NAME_CASH_LOAN_PURPOSE         0
NAME_PAYMENT_TYPE              0
DAYS_DECISION                  0
NAME_CONTRACT_STATUS           0
NAME_CLIENT_TYPE               0
NAME_GOODS_CATEGORY            0
NAME_PORTFOLIO                 0
CODE_REJECT_REASON             0
NAME_PRODUCT_TYPE              0
NAME_SELLER_INDUSTRY           0
SELLERPLACE_AREA               0
CHANNEL_TYPE                   0
NAME_YIELD_GROUP               0
AMT_CREDIT                     1
PRODUCT_COMBINATION          346
CNT_PAYMENT               372230
AMT_ANNUITY               372235
AMT_GOODS_PRICE           385515
dtype: int64


In [23]:
# Separate numeric and non-numeric columns
numeric_cols = pre_app_cleaned.select_dtypes(include=['number']).columns
non_numeric_cols = pre_app_cleaned.select_dtypes(exclude=['number']).columns

In [24]:
numeric_cols

Index(['SK_ID_PREV', 'SK_ID_CURR', 'AMT_ANNUITY', 'AMT_APPLICATION',
       'AMT_CREDIT', 'AMT_GOODS_PRICE', 'DAYS_DECISION', 'SELLERPLACE_AREA',
       'CNT_PAYMENT'],
      dtype='object')

In [25]:
non_numeric_cols

Index(['NAME_CONTRACT_TYPE', 'NAME_CASH_LOAN_PURPOSE', 'NAME_CONTRACT_STATUS',
       'NAME_PAYMENT_TYPE', 'CODE_REJECT_REASON', 'NAME_CLIENT_TYPE',
       'NAME_GOODS_CATEGORY', 'NAME_PORTFOLIO', 'NAME_PRODUCT_TYPE',
       'CHANNEL_TYPE', 'NAME_SELLER_INDUSTRY', 'NAME_YIELD_GROUP',
       'PRODUCT_COMBINATION'],
      dtype='object')

In [26]:
# Impute missing values in numeric columns
imputer = SimpleImputer(strategy='mean')
pre_app_cleaned[numeric_cols] = imputer.fit_transform(pre_app_cleaned[numeric_cols])

In [27]:
# Impute missing values in non-numeric columns using a different strategy, e.g., 'most_frequent'
imputer_non_numeric = SimpleImputer(strategy='most_frequent')
pre_app_cleaned[non_numeric_cols] = imputer_non_numeric.fit_transform(pre_app_cleaned[non_numeric_cols])

In [28]:
pre_app_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1670214 entries, 0 to 1670213
Data columns (total 22 columns):
 #   Column                  Non-Null Count    Dtype  
---  ------                  --------------    -----  
 0   SK_ID_PREV              1670214 non-null  float64
 1   SK_ID_CURR              1670214 non-null  float64
 2   NAME_CONTRACT_TYPE      1670214 non-null  object 
 3   AMT_ANNUITY             1670214 non-null  float64
 4   AMT_APPLICATION         1670214 non-null  float64
 5   AMT_CREDIT              1670214 non-null  float64
 6   AMT_GOODS_PRICE         1670214 non-null  float64
 7   NAME_CASH_LOAN_PURPOSE  1670214 non-null  object 
 8   NAME_CONTRACT_STATUS    1670214 non-null  object 
 9   DAYS_DECISION           1670214 non-null  float64
 10  NAME_PAYMENT_TYPE       1670214 non-null  object 
 11  CODE_REJECT_REASON      1670214 non-null  object 
 12  NAME_CLIENT_TYPE        1670214 non-null  object 
 13  NAME_GOODS_CATEGORY     1670214 non-null  object 
 14  NA

In [29]:
pre_app_cleaned.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_GOODS_PRICE,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION
0,2030495.0,271877.0,Consumer loans,1730.43,17145.0,17145.0,17145.0,XAP,Approved,73.0,Cash through the bank,XAP,Repeater,Mobile,POS,XNA,Country-wide,35.0,Connectivity,12.0,middle,POS mobile with interest
1,2802425.0,108129.0,Cash loans,25188.615,607500.0,679671.0,607500.0,XNA,Approved,164.0,XNA,XAP,Repeater,XNA,Cash,x-sell,Contact center,1.0,XNA,36.0,low_action,Cash X-Sell: low
2,2523466.0,122040.0,Cash loans,15060.735,112500.0,136444.5,112500.0,XNA,Approved,301.0,Cash through the bank,XAP,Repeater,XNA,Cash,x-sell,Credit and cash offices,1.0,XNA,12.0,high,Cash X-Sell: high
3,2819243.0,176158.0,Cash loans,47041.335,450000.0,470790.0,450000.0,XNA,Approved,512.0,Cash through the bank,XAP,Repeater,XNA,Cash,x-sell,Credit and cash offices,1.0,XNA,12.0,middle,Cash X-Sell: middle
4,1784265.0,202054.0,Cash loans,31924.395,337500.0,404055.0,337500.0,Repairs,Refused,781.0,Cash through the bank,HC,Repeater,XNA,Cash,walk-in,Credit and cash offices,1.0,XNA,24.0,high,Cash Street: high


In [30]:
pre_app_cleaned.nunique().sort_values()

NAME_PRODUCT_TYPE               3
NAME_CONTRACT_TYPE              4
NAME_CONTRACT_STATUS            4
NAME_CLIENT_TYPE                4
NAME_PAYMENT_TYPE               4
NAME_PORTFOLIO                  5
NAME_YIELD_GROUP                5
CHANNEL_TYPE                    8
CODE_REJECT_REASON              9
NAME_SELLER_INDUSTRY           11
PRODUCT_COMBINATION            17
NAME_CASH_LOAN_PURPOSE         25
NAME_GOODS_CATEGORY            28
CNT_PAYMENT                    50
SELLERPLACE_AREA             2096
DAYS_DECISION                2922
AMT_CREDIT                  86804
AMT_APPLICATION             93885
AMT_GOODS_PRICE             93886
SK_ID_CURR                 338857
AMT_ANNUITY                357960
SK_ID_PREV                1670214
dtype: int64

Saving Cleaned Dataset

In [5]:
pre_app_cleaned.to_csv("C:/Users/rajpu/OneDrive/Desktop/Bank Loan Case Study/Cleaned Datasets/Previous_applicants_cleaned.csv", index = False)