### Example of Data Waterfall
---

In [1]:
import sys, os
sys.path.insert(1, '..')

import pandas as pd
import numpy as np
from IPython.display import display

%load_ext autoreload
%autoreload 2

from rdsutils.data_waterfall import DataWaterfallReport

In [2]:
modeling_df = pd.read_parquet('artifact/ach_modeling_data.parquet')


Define a sequence of functions that will result in the waterfall. 

In [4]:
# drop non ACH types
def drop_non_ach(df):
    df = df[df['transaction_code'].isin(['ACHDD']) & (df['transaction_amount'] > 0)]
    return df

def drop_bad_but_pos_balance(df):
    return df[~(df[f'target_10d'] & (df[f'bal_after_10d'] > 0))]

def drop_good_but_has_returns(df):
    condition = (~df['target_10d'] & (df[f'nr_returns_in_next_10d'] > 0))
    return df[~condition]

def drop_good_but_chg_wrt_off(df):
    return df[~(df[f'target_10d'] & ~df['target_10d'])]

def drop_good_but_neg_balance(df):
    return df[~(~df['target_10d'] & (df[f'bal_after_10d'] < 0))]

def drop_good_but_closed_by_risk(df):
    condition = (~df['target_10d'] & df[f'account_closed_by_risk_in_next_10d'])
    return df[~condition]

In [5]:
fns = [drop_non_ach,
       drop_bad_but_pos_balance, 
       drop_good_but_has_returns,
       drop_good_but_chg_wrt_off,
       drop_good_but_neg_balance,
       drop_good_but_closed_by_risk]
wf = DataWaterfallReport(modeling_df, fns, 'is_returned')

Note: class does not automatically take care of counting `None`s. Please do so manually.

e.g. `df["# Remain"] - df["# False"] - df["# True"]`

In [6]:
waterfall = wf.get_report(False)
waterfall

Unnamed: 0,# Excluded,# Remain,# False,# True
Starting Population,,3349862,108825,7424
Drop Non Ach,3237263.0,112599,108825,3774
Drop Bad But Pos Balance,1961.0,110638,108825,1813
Drop Good But Has Returns,667.0,109971,108158,1813
Drop Good But Chg Wrt Off,0.0,109971,108158,1813
Drop Good But Neg Balance,74.0,109897,108084,1813
Drop Good But Closed By Risk,11.0,109886,108073,1813


In [7]:
# convert all cells to string for better visualization
waterfall = wf.get_report(True)
waterfall

Unnamed: 0,# Excluded,# Remain,# False,# True
Starting Population,,3349862,108825,7424
Drop Non Ach,3237263.0,112599,108825,3774
Drop Bad But Pos Balance,1961.0,110638,108825,1813
Drop Good But Has Returns,667.0,109971,108158,1813
Drop Good But Chg Wrt Off,0.0,109971,108158,1813
Drop Good But Neg Balance,74.0,109897,108084,1813
Drop Good But Closed By Risk,11.0,109886,108073,1813
