In [65]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)


df_raw = pd.read_csv(r'https://support.sas.com/documentation/onlinedoc/viya/exampledatasets/hmeq.csv')

df = (df_raw
      .fillna(df_raw[df_raw.select_dtypes(include = np.number).columns.to_list()].mean())         ## Fill all missing values with the mean
      .fillna(df_raw[df_raw.select_dtypes(include = object).columns.to_list()].mode().iloc[0])
      .assign(DIFF = lambda _df: _df.MORTDUE - _df.VALUE,                                         ## Difference between mortgage due and value
              BAD_CAT = lambda _df: _df.BAD.map({1:'Good Loan', 0:'Bad Loan'})                    ## Map values of 1 and 0
       )
      .rename(columns=lambda colName:colName.lower().replace("_",""))                             ## Lowercase column names and remove underscores
)

In [67]:
df_raw.isna().sum()

BAD           0
LOAN          0
MORTDUE     518
VALUE       112
REASON      252
JOB         279
YOJ         515
DEROG       708
DELINQ      580
CLAGE       308
NINQ        510
CLNO        222
DEBTINC    1267
dtype: int64

In [68]:
df.isna().sum()

bad        0
loan       0
mortdue    0
value      0
reason     0
job        0
yoj        0
derog      0
delinq     0
clage      0
ninq       0
clno       0
debtinc    0
diff       0
badcat     0
dtype: int64

In [53]:
df_raw.head()

Unnamed: 0,BAD,LOAN,MORTDUE,VALUE,REASON,JOB,YOJ,DEROG,DELINQ,CLAGE,NINQ,CLNO,DEBTINC
0,1,1100,25860.0,39025.0,HomeImp,Other,10.5,0.0,0.0,94.366667,1.0,9.0,
1,1,1300,70053.0,68400.0,HomeImp,Other,7.0,0.0,2.0,121.833333,0.0,14.0,
2,1,1500,13500.0,16700.0,HomeImp,Other,4.0,0.0,0.0,149.466667,1.0,10.0,
3,1,1500,,,,,,,,,,,
4,0,1700,97800.0,112000.0,HomeImp,Office,3.0,0.0,0.0,93.333333,0.0,14.0,


In [41]:
df.head()

Unnamed: 0,bad,loan,mortdue,value,reason,job,yoj,derog,delinq,clage,ninq,clno,debtinc,diff,badcat
0,1,1100,25860.0,39025.0,HomeImp,Other,10.5,0.0,0.0,94.366667,1.0,9.0,33.779915,-13165.0,Good Loan
1,1,1300,70053.0,68400.0,HomeImp,Other,7.0,0.0,2.0,121.833333,0.0,14.0,33.779915,1653.0,Good Loan
2,1,1500,13500.0,16700.0,HomeImp,Other,4.0,0.0,0.0,149.466667,1.0,10.0,33.779915,-3200.0,Good Loan
3,1,1500,73760.8172,101776.048741,,,8.922268,0.25457,0.449442,179.766275,1.186055,21.296096,33.779915,-28015.231542,Good Loan
4,0,1700,97800.0,112000.0,HomeImp,Office,3.0,0.0,0.0,93.333333,0.0,14.0,33.779915,-14200.0,Bad Loan


In [42]:
df_raw.isna().sum()

BAD           0
LOAN          0
MORTDUE     518
VALUE       112
REASON      252
JOB         279
YOJ         515
DEROG       708
DELINQ      580
CLAGE       308
NINQ        510
CLNO        222
DEBTINC    1267
dtype: int64

In [43]:
df.isna().sum()

bad          0
loan         0
mortdue      0
value        0
reason     252
job        279
yoj          0
derog        0
delinq       0
clage        0
ninq         0
clno         0
debtinc      0
diff         0
badcat       0
dtype: int64

In [30]:
df.mean()

bad             0.199497
loan        18607.969799
mortdue     73760.817200
value      101776.048741
yoj             8.922268
derog           0.254570
delinq          0.449442
clage         179.766275
ninq            1.186055
clno           21.296096
debtinc        33.779915
diff       -28015.231542
dtype: float64

In [26]:
df.head()

Unnamed: 0,bad,loan,mortdue,value,reason,job,yoj,derog,delinq,clage,ninq,clno,debtinc,diff,badcat
0,1,1100,25860.0,39025.0,HomeImp,Other,10.5,0.0,0.0,94.366667,1.0,9.0,33.779915,-13165.0,Good Loan
1,1,1300,70053.0,68400.0,HomeImp,Other,7.0,0.0,2.0,121.833333,0.0,14.0,33.779915,1653.0,Good Loan
2,1,1500,13500.0,16700.0,HomeImp,Other,4.0,0.0,0.0,149.466667,1.0,10.0,33.779915,-3200.0,Good Loan
3,1,1500,73760.8172,101776.048741,DebtCon,Other,8.922268,0.25457,0.449442,179.766275,1.186055,21.296096,33.779915,-28015.231542,Good Loan
4,0,1700,97800.0,112000.0,HomeImp,Office,3.0,0.0,0.0,93.333333,0.0,14.0,33.779915,-14200.0,Bad Loan


In [18]:
df.fillna(df.mode().iloc[0])

Unnamed: 0,bad,loan,mortdue,value,reason,job,yoj,derog,delinq,clage,ninq,clno,debtinc,diff,badcat
0,1,1100,25860.0000,39025.000000,HomeImp,Other,10.500000,0.00000,0.000000,94.366667,1.000000,9.000000,33.779915,-13165.000000,Good Loan
1,1,1300,70053.0000,68400.000000,HomeImp,Other,7.000000,0.00000,2.000000,121.833333,0.000000,14.000000,33.779915,1653.000000,Good Loan
2,1,1500,13500.0000,16700.000000,HomeImp,Other,4.000000,0.00000,0.000000,149.466667,1.000000,10.000000,33.779915,-3200.000000,Good Loan
3,1,1500,73760.8172,101776.048741,DebtCon,Other,8.922268,0.25457,0.449442,179.766275,1.186055,21.296096,33.779915,-28015.231542,Good Loan
4,0,1700,97800.0000,112000.000000,HomeImp,Office,3.000000,0.00000,0.000000,93.333333,0.000000,14.000000,33.779915,-14200.000000,Bad Loan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5955,0,88900,57264.0000,90185.000000,DebtCon,Other,16.000000,0.00000,0.000000,221.808718,0.000000,16.000000,36.112347,-32921.000000,Bad Loan
5956,0,89000,54576.0000,92937.000000,DebtCon,Other,16.000000,0.00000,0.000000,208.692070,0.000000,15.000000,35.859971,-38361.000000,Bad Loan
5957,0,89200,54045.0000,92924.000000,DebtCon,Other,15.000000,0.00000,0.000000,212.279697,0.000000,15.000000,35.556590,-38879.000000,Bad Loan
5958,0,89800,50370.0000,91861.000000,DebtCon,Other,14.000000,0.00000,0.000000,213.892709,0.000000,16.000000,34.340882,-41491.000000,Bad Loan


In [19]:
df.reason.value_counts()

DebtCon    3928
HomeImp    1780
Name: reason, dtype: int64

In [20]:
df.job.value_counts()

Other      2388
ProfExe    1276
Office      948
Mgr         767
Self        193
Sales       109
Name: job, dtype: int64

In [4]:
df.mode()

Unnamed: 0,bad,loan,mortdue,value,reason,job,yoj,derog,delinq,clage,ninq,clno,debtinc,diff,badcat
0,0,15000,73760.8172,101776.048741,DebtCon,Other,8.922268,0.0,0.0,179.766275,0.0,16.0,33.779915,-28015.231542,Bad Loan


In [9]:
df.reason.mode().iloc[0]

'DebtCon'