In [2]:
#Vega Setup
import logging
import pandas as pd
import numpy as np
from afterpay_gdp_interfaces import RedshiftHook

import datetime
import pytz
CST = pytz.timezone('Asia/Shanghai')

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
vega = RedshiftHook(cluster='vega', okta_username='songli@squareup.com') # for vega connection
def vega_execute(query):
    import datetime
    """
        vega execute SQL wrapper to commit everytime
    """
    t_start = datetime.datetime.now()
    with vega.get_conn() as vega_conn:
        with vega_conn.cursor() as cur:
            cur.execute(query)
        vega_conn.commit()
        t_end = datetime.datetime.now()
        logging.info("Vega Query Finished. Time used: {}".format(str(t_end - t_start)))
        # vega_conn.close()

# Get last score ind

In [11]:
#Create Feature Table
query = """
drop table if exists sandbox_analytics_au.ato_v1_model_last_time_result;
create table if not exists sandbox_analytics_au.ato_v1_model_last_time_result (
    last_time_model_version varchar(20),
    rule_name varchar(60),
    threshold_a varchar(20),
    threshold_b varchar(20),
    update_time varchar(20)
);
"""
vega_execute(query)

query = """
GRANT ALL PRIVILEGES ON TABLE sandbox_analytics_au.ato_v1_model_last_time_result TO feature_science_team_all;
"""
vega_execute(query)
#this file is produced by eng
query = """
COPY sandbox_analytics_au.ato_v1_model_last_time_result 
    FROM 's3://risk-data-prod/risk-strategy-refresh/ato_v1_model_last_time_result.csv'
    IAM_ROLE 'arn:aws:iam::900305707269:role/prod-datalake_redshift_access_to_unload_s3_analytics,arn:aws:iam::545788091248:role/RedshiftAssumedIAMRole'
    IGNOREHEADER 1
    FORMAT AS csv
    ;
"""

vega_execute(query)

INFO:root:Vega Query Finished. Time used: 0:00:11.155857
INFO:root:Vega Query Finished. Time used: 0:00:03.473528
INFO:root:Vega Query Finished. Time used: 0:17:40.703828


In [26]:
#Parameters Setting - sample month
import datetime

today = datetime.date.today()
first = (today.replace(day=1))# - datetime.timedelta(days=1)).replace(day=1)
end_time = first - datetime.timedelta(days=1)
start_time = first - datetime.timedelta(days=80)
start_time = start_time.replace(day=1)
Sample_window = "'" + str(start_time) + "' and '" + str(end_time) + "'"
last_month = end_time.strftime("%Y%m")
this_month = first.strftime("%Y%m")

#Get last version parameters
params = {
     'last_month': last_month,
    'this_month': this_month
}



In [27]:
query = """
    select * from sandbox_analytics_au.ato_v1_model_last_time_result where update_time = {this_month}
""".format(**params)

last_version_info = vega.get_pandas_df(query)
#Version Check
import sys

if len(last_version_info) == 0:
    sys.exit();

Live_model_version = last_version_info['last_time_model_version'][0][-1:].upper()

In [28]:
Live_model_version

'B'

set up rule name and model name

In [36]:
if Live_model_version == 'A':
    Model_version = 'B'
elif Live_model_version == 'B':
    Model_version = 'A'
else:
    sys.exit()

#Parameters Setting - Model, rule and last version
Model_name = 'ato_v1_model'
Model_feature_name = 'model_online_ato_global_auto_v1'
Main_strategy_word = '_fraud_online_ato_online_model_V1'
Strategy_key_word = '_fraud_online_ato_online_model_auto_V1_'

Live_rule_name = ['AU' + Strategy_key_word + Live_model_version,
                 'GB' + Strategy_key_word + Live_model_version,
                 'US' + Strategy_key_word + Live_model_version]

New_rule_name = ['AU' + Strategy_key_word + Model_version,
                 'GB' + Strategy_key_word + Model_version,
                 'US' + Strategy_key_word + Model_version]


rule_name = ['AU' + Strategy_key_word[:-1],
                 'GB' + Strategy_key_word[:-1],
                 'US' + Strategy_key_word[:-1]]

last_version_info['rule_name'] = last_version_info.rule_name.str.upper()
Live_model_threshold_list = last_version_info.loc[last_version_info.rule_name.str[-1:] == Live_model_version].sort_values('rule_name')[['threshold_a','threshold_b']]

In [37]:
Live_rule_name

['AU_fraud_online_ato_online_model_auto_V1_B',
 'GB_fraud_online_ato_online_model_auto_V1_B',
 'US_fraud_online_ato_online_model_auto_V1_B']

In [38]:
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

In [39]:
last_version_info

Unnamed: 0,last_time_model_version,rule_name,threshold_a,threshold_b,update_time
0,using_b,AU_FRAUD_ONLINE_ATO_ONLINE_MODEL_AUTO_V1_A,780,780,202306
1,using_b,AU_FRAUD_ONLINE_ATO_ONLINE_MODEL_AUTO_V1_B,870,900,202306
2,using_b,GB_FRAUD_ONLINE_ATO_ONLINE_MODEL_AUTO_V1_A,750,990,202306
3,using_b,GB_FRAUD_ONLINE_ATO_ONLINE_MODEL_AUTO_V1_B,830,840,202306
4,using_b,US_FRAUD_ONLINE_ATO_ONLINE_MODEL_AUTO_V1_A,680,820,202306
5,using_b,US_FRAUD_ONLINE_ATO_ONLINE_MODEL_AUTO_V1_B,790,790,202306


# model data

In [None]:
query="""select * from sandbox_analytics_au.ato_model_base_202306  """
#this table is generated based on sample data + tagging
model_data=vega.get_pandas_df(query)
#control decline ind
ind=((model_data['control_grp_catq']=='Control Group') & (model_data['control_group'].str.contains(',', na=False)) & (model_data['control_group'].str.contains('global_control_group|account_control_group', na=False)) \
     & (model_data['control_group'] != '["global_control_group", "account_control_group"]'))
# model_data.loc[ind, 'weight']=\
#     40*model_data.loc[ind, 'weight']
#Pre Treatment
region_list = ['AU', 'GB', 'US']
Main_model_threshold = [800, 410, 970]

gmv='order_amount_local'
cb_amt='chargeback_utd_local'
gw_cost='gwr_net'
main_scr='online_ato_global_v1'
old_scr='online_ato_global_v1_auto_old'
new_scr='online_ato_global_v1_auto_new'

In [15]:
model_data['tfa_days']=(pd.to_datetime(model_data['order_date'])-pd.to_datetime((pd.to_numeric(model_data['c_latest_login_2fa_success_timestamp']))\
                                                                                .apply(lambda x:datetime.datetime.fromtimestamp(0 if np.isnan(x) else x)))).dt.days
model_data['pw_days']=(pd.to_datetime(model_data['order_date'])-pd.to_datetime((pd.to_numeric(model_data['c_latest_pwd_reset_success_timestamp']))\
                                                                               .apply(lambda x:datetime.datetime.fromtimestamp(0 if np.isnan(x) else x)))).dt.days
from pandas.tseries.offsets import MonthEnd
model_data['order_month'] = pd.to_datetime(model_data['order_date'], format="%Y%m") + MonthEnd(0)
#model_data['order_month']=model_data['order_month'].dt.date

pre treatment, define some columns

In [16]:
model_data[gmv].fillna(0, inplace = True)
model_data[cb_amt].fillna(0, inplace = True)
model_data[gw_cost].fillna(0, inplace = True)

In [17]:
model_data.loc[:, 'ato_related_loss'] = ((model_data[cb_amt]>0) | (model_data[gw_cost]>0))*model_data[gmv]
#model_data.loc[:, 'ato_related_loss'] = model_data[[gmv, 'ato_related_loss']].min(1)

# define rule here

In [94]:
#mode='2fa'
score=new_scr
Live_model_version='B'
if Live_model_version == 'A':
    Model_version = 'B'
elif Live_model_version == 'B':
    Model_version = 'A'
else:
    sys.exit()
inc=''

In [131]:
for mode in ['2fa', 'n2fa']:
    if mode=='2fa':
        Live_model_threshold=Live_model_threshold_list['threshold_a'].to_list()
        raw = model_data.query(f""" (tfa_days<=7 or pw_days<=7) {inc}""").copy()

        #rejected by the old/main rule.
        raw['old_rule_reject'] = 0
        for i in [0, 1, 2]:
            raw['old_rule_reject'] = np.where(
                (raw.par_region == region_list[i]) & (raw[old_scr] * 1000 > Live_model_threshold[i]) & ((raw['tfa_days']<=7) | (raw['pw_days']<=7)) , 1, raw['old_rule_reject'])

        raw['main_rule_reject'] = 0
        for i in [0, 1, 2]:
            raw['main_rule_reject'] = np.where(
                (raw.par_region == region_list[i]) & (raw[main_scr] * 1000 > Main_model_threshold[i]) & ((raw['tfa_days']<=7) | (raw['pw_days']<=7)), 1, raw['main_rule_reject'])

        raw.loc[:, 'score'] = raw[score] * 1000
        def ato_rule(data, score_cutoff):
            return (data['score'] >= score_cutoff) & ((data['tfa_days']<=7) | (data['pw_days']<=7))
    else:
        Live_model_threshold=Live_model_threshold_list['threshold_b'].to_list()
        raw = model_data.query(f""" ~(tfa_days<=7 or pw_days<=7) {inc}""").copy()

        #rejected by the old/main rule.
        raw['old_rule_reject'] = 0
        for i in [0, 1, 2]:
            raw['old_rule_reject'] = np.where(
                (raw.par_region == region_list[i]) & (raw[old_scr] * 1000 > Live_model_threshold[i]) & ~((raw['tfa_days']<=7) | (raw['pw_days']<=7)), 1, raw['old_rule_reject'])

        raw['main_rule_reject'] = 0
        for i in [0, 1, 2]:
            raw['main_rule_reject'] = np.where(
                (raw.par_region == region_list[i]) & (raw[main_scr] * 1000 > Main_model_threshold[i]) & ~((raw['tfa_days']<=7) | (raw['pw_days']<=7)), 1, raw['main_rule_reject'])

        raw.loc[:, 'score'] = raw[score] * 1000
        def ato_rule(data, score_cutoff):
            return (data['score'] >= score_cutoff) & ~((data['tfa_days']<=7) | (data['pw_days']<=7)) 

    #Calculate perfromance by region and by cutoff
    from report import metrics
    from itertools import product
    from report.rule_reporter import RuleReporter

    reporter = RuleReporter()

    reporter.register_metric('ato_related_loss', metrics.BaseMetric)

    amt_name = gmv
    weight_name = 'weight'#,'weight'
    benchmark_name = 'old_rule_reject'
    dcl_dcsn_names = [benchmark_name]

    metric_names = ['ato_related_loss']
    extra = [
        {
            'metric_name': 'ato_related_loss',
            'base_name': 'ato_related_loss',
            'accuracy_denominator_name': gmv, # if not provided, accuracy would not be computed
            'coverage_denominator_name': 'ato_related_loss',
            'impact_denominator_name': gmv,
        }
    ]
    def ato_rule(data, score_cutoff):
        return (data['score'] >= score_cutoff) 

    rules = {
        'ato_rule': {
            'rule': ato_rule,
        }
    }

    # set values you wanna try
    rule_params = []
    score_cutoffs = list(range(0,1000,10))
    score_cutoffs = [x for x in score_cutoffs]
    for score_cutoff in product(score_cutoffs):
        rule_params.append(
            {
                'ato_rule': {
                    'score_cutoff': score_cutoff
                }
            }
        )

    model_data.loc[:, 'score'] = model_data[score] * 1000



    res = reporter.evaluate(
        raw, amt_name, params=rule_params, metric_names=metric_names, 
        weight_name=weight_name, 
        dcl_dcsn_names=dcl_dcsn_names, 
        rules=rules, 
        benchmark_name=benchmark_name, 
        n_jobs=10, 
        extra=extra, group_name='par_region'
    )

    #res

    #res.to_csv('data.csv',index=False)

    #Performance Table Pretreatment
    res['acc_diff'] = abs(res[('ato_rule', 'ato_related_loss','acc')] - res[(benchmark_name, 'ato_related_loss','acc')]) 
    res['cov_diff'] = abs(res[('ato_rule', 'ato_related_loss','diff')]) 
    res['gmv_diff'] = abs(res[('ato_rule', 'gmv','diff')]) 

    #Get best threshold by region
    par_region = region_list
    off_perf = pd.DataFrame(columns = ['model_name', 'rule_set_version', 'performance_level', 'rule_name',
           'action_rate', 'loss_coverage', 'accuracy', 'update_time'])

    rule_content = pd.DataFrame(columns = ['model_name', 'model_version', 'model_score_name_set', 'rule_set_version',
           'rule_name', 'threshold_a', 'rule_content', 'update_time'])

    threshold = pd.DataFrame(columns = ['par_region', 'version1', 'version2', 'version3'])

    #by region
    for i in list(range(0, 3, 1)):
        best = res.loc[res[('group_name', 'group_name', 'par_region')] == par_region[i]]
        old_perf1 = pd.DataFrame(best[[(benchmark_name, 'op', 'op'), 
                   (benchmark_name, 'ato_related_loss', 'cov'), 
                   (benchmark_name, 'ato_related_loss', 'acc')]].reset_index(drop = True).loc[0,].values).T
        old_perf1.columns = ['action_rate', 'loss_coverage', 'accuracy']
        old_perf1['rule_set_version'] = 'Version_1'
        old_perf1['rule_name'] = Live_rule_name[i]

        old_perf2 = pd.DataFrame(best[[(benchmark_name, 'op', 'op'), 
                   (benchmark_name, 'ato_related_loss', 'cov'), 
                   (benchmark_name, 'ato_related_loss', 'acc')]].reset_index(drop = True).loc[0,].values).T
        old_perf2.columns = ['action_rate', 'loss_coverage', 'accuracy']
        old_perf2['rule_set_version'] = 'Version_2'
        old_perf2['rule_name'] = Live_rule_name[i]

        old_perf3 = pd.DataFrame(best[[(benchmark_name, 'op', 'op'), 
                   (benchmark_name, 'ato_related_loss', 'cov'), 
                   (benchmark_name, 'ato_related_loss', 'acc')]].reset_index(drop = True).loc[0,].values).T
        old_perf3.columns = ['action_rate', 'loss_coverage', 'accuracy']
        old_perf3['rule_set_version'] = 'Version_3'
        old_perf3['rule_name'] = Live_rule_name[i]
        #accuracy
        best1 = best.loc[best.acc_diff == best.acc_diff.min()]
        best1 = best1.reset_index(drop = True).loc[best1.shape[0] - 1]
        best1_perf = pd.DataFrame(best1[[('ato_rule', 'op', 'op'), 
                   ('ato_rule', 'ato_related_loss', 'cov'), 
                   ('ato_rule', 'ato_related_loss', 'acc')]].values).T
        best1_perf.columns = ['action_rate', 'loss_coverage', 'accuracy']
        best1_thresh = best1[('ato_rule', 'param', 'score_cutoff')][0]
        best1_perf['rule_set_version'] = 'Version_1'
        best1_perf['rule_name'] = New_rule_name[i]
        #coverage
        best2 = best.loc[best.cov_diff == best.cov_diff.min()]
        best2 = best2.reset_index(drop = True).loc[best2.shape[0] - 1]
        best2_perf = pd.DataFrame(best2[[('ato_rule', 'op', 'op'), 
                   ('ato_rule', 'ato_related_loss', 'cov'), 
                   ('ato_rule', 'ato_related_loss', 'acc')]].values).T
        best2_perf.columns = ['action_rate', 'loss_coverage', 'accuracy']
        best2_thresh = best2[('ato_rule', 'param', 'score_cutoff')][0]
        best2_perf['rule_set_version'] = 'Version_2'
        best2_perf['rule_name'] = New_rule_name[i]
        #action
        best3 = best.loc[best.gmv_diff == best.gmv_diff.min()]
        best3 = best3.reset_index(drop = True).loc[best3.shape[0] - 1]
        best3_perf = pd.DataFrame(best3[[('ato_rule', 'op', 'op'), 
                   ('ato_rule', 'ato_related_loss', 'cov'), 
                   ('ato_rule', 'ato_related_loss', 'acc')]].values).T
        best3_perf.columns = ['action_rate', 'loss_coverage', 'accuracy']
        best3_thresh = best3[('ato_rule', 'param', 'score_cutoff')][0]
        best3_perf['rule_set_version'] = 'Version_3'
        best3_perf['rule_name'] = New_rule_name[i]

        out2 = pd.concat([old_perf1, best1_perf, old_perf2, best2_perf, old_perf3,best3_perf])
        out2['model_name'] = Model_name
        out2['performance_level'] = 'Rule_level'
        out2['update_time'] = ''
        out2 = out2[['model_name', 'rule_set_version', 'performance_level', 'rule_name', 
                     'action_rate', 'loss_coverage', 'accuracy', 'update_time']]
        #offline performance
        off_perf = pd.concat([off_perf, out2]) 
        #for rule content
        out1 = pd.DataFrame([[best1_thresh,'Version_1'],
                             [best2_thresh, 'Version_2'],
                             [best3_thresh, 'Version_3'],
                             [Live_model_threshold[i],'Version_1'],
                             [Live_model_threshold[i], 'Version_2'],
                             [Live_model_threshold[i], 'Version_3']], columns = ['threshold_a', 'rule_set_version'])
        out1['model_name'] = Model_name

        out1['model_version'] = [Model_version, Model_version, Model_version, 
                                 Live_model_version, Live_model_version, Live_model_version]   
        out1['model_score_name_set'] = Model_feature_name
        out1['rule_name'] = rule_name[i]    
        out1['rule_content'] = ''
        out1['update_time'] = ''
        out1 = out1[['model_name', 'model_version', 'model_score_name_set', 'rule_set_version', 
           'rule_name', 'threshold_a', 'rule_content', 'update_time']]
        rule_content =  pd.concat([rule_content, out1]) 
        #threshold
        out3 =  pd.DataFrame([par_region[i], best1_thresh, best2_thresh, best3_thresh]).T
        out3.columns = ['par_region', 'version1', 'version2', 'version3']
        threshold = pd.concat([threshold, out3])



    off_perf[['action_rate',	'loss_coverage', 'accuracy']]=off_perf[['action_rate',	'loss_coverage', 'accuracy']].applymap("{:,.2f}".format)

    threshold2=threshold.copy()

    threshold2.columns = pd.MultiIndex.from_product([threshold2.columns, [''],['']])

    res_w_thresh=pd.merge(res, threshold2, 'left', left_on= [('group_name','group_name','par_region')], right_on=[('par_region','','')])

    res_w_thresh['score']=[i[0] for i in res_w_thresh['ato_rule']['param']['score_cutoff']]

    # Rule Set Level
    #Input rule content here
    rule_content_text = """def execute_rule():
    actions = []
    if not uuid_split_placeholder : 
        return actions
        
    if (days_since_first_order_date > 14 
        and (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) 
        and model_online_ato_global_auto_v1 > threshold_a):
        
        actions.append({
        ''action_name'': ''is_rejected_assign''
        })
    if (days_since_first_order_date > 14 
        and not (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) 
        and model_online_ato_global_auto_v1 > threshold_b):
        
        actions.append({
        ''action_name'': ''is_rejected_assign''
        })
    return actions"""


    rule_content_text = rule_content_text.replace('model_score', Model_feature_name)

    #rule_content_text = rule_content_text.replace('\n', '\\n')
    rule_content['rule_content'] = rule_content_text

    #Rule set level result
    raw['old_rule_set'] = np.where((raw.old_rule_reject > 0)|(raw.main_rule_reject > 0), 1, 0) 
    raw = pd.merge(raw, threshold, 'left', 'par_region')
    raw['version1_set_reject'] = np.where(raw.score >= raw.version1, 1, 0)
    raw['version2_set_reject'] = np.where(raw.score >= raw.version2, 1, 0)
    raw['version3_set_reject'] = np.where(raw.score >= raw.version3, 1, 0)

    raw['version1_set_reject_combined'] = np.where((raw.version1_set_reject > 0)|(raw.main_rule_reject > 0), 1, 0)
    raw['version2_set_reject_combined'] = np.where((raw.version2_set_reject > 0)|(raw.main_rule_reject > 0), 1, 0)
    raw['version3_set_reject_combined'] = np.where((raw.version3_set_reject > 0)|(raw.main_rule_reject > 0), 1, 0)

    #Performance for rule set level - by region

    from report.rule_reporter import RuleReporter

    reporter = RuleReporter()

    amt_name = gmv
    weight_name = 'weight'
    benchmark_name = None
    dcl_dcsn_names = ['old_rule_set','version1_set_reject_combined', 'version2_set_reject_combined', 'version3_set_reject_combined']

    reporter.register_metric('ato_related_loss', metrics.BaseMetric)

    metric_names = ['ato_related_loss']
    extra = [
        {
            'metric_name': 'ato_related_loss',
            'base_name': 'ato_related_loss',
            'accuracy_denominator_name': gmv, # if not provided, accuracy would not be computed
            'coverage_denominator_name': 'ato_related_loss',
            'impact_denominator_name': gmv,
        }
    ]

    res = reporter.evaluate(
        raw, amt_name, metric_names=metric_names, weight_name=weight_name, 
        dcl_dcsn_names=dcl_dcsn_names, 
        benchmark_name=benchmark_name, n_jobs=10, 
        extra=extra , group_name='par_region'
    )

    iters = ['old_rule_set', 'version1_set_reject_combined', 'old_rule_set', 'version2_set_reject_combined', 'old_rule_set', 'version3_set_reject_combined']

    for k in range(0, len(iters)):
        a = res[[('group_name', 'group_name', 'par_region'),
                        (iters[k], 'op', 'op'), 
                       (iters[k], 'ato_related_loss', 'cov'), 
                       (iters[k], 'ato_related_loss', 'acc')]].copy()
        a.columns = ['rule_name','action_rate', 'loss_coverage', 'accuracy']
        a['rule_name'] = a.rule_name + '_' + iters[k]
        if k == 0:
            off_perf_set_by_region = a
        else:
            off_perf_set_by_region = pd.concat([off_perf_set_by_region, a])


    off_perf_set_by_region = off_perf_set_by_region.sort_values('rule_name')

    off_perf_set_by_region['model_name'] = Model_name
    off_perf_set_by_region['rule_set_version'] = ['Version_1', 'Version_2', 'Version_3'] * 6
    off_perf_set_by_region['performance_level'] = 'Rule_set_level'
    off_perf_set_by_region['update_time'] = ''
    off_perf_set_by_region = off_perf_set_by_region[['model_name', 'rule_set_version', 'performance_level', 'rule_name', 'action_rate', 'loss_coverage', 'accuracy', 'update_time']]

    #Performance for rule set level, not by region

    from report.rule_reporter import RuleReporter

    reporter = RuleReporter()

    amt_name = gmv
    weight_name = 'weight'
    benchmark_name = None
    dcl_dcsn_names = ['old_rule_reject','version1_set_reject', 'version2_set_reject', 'version3_set_reject',
                      'old_rule_set','version1_set_reject_combined', 'version2_set_reject_combined', 'version3_set_reject_combined']

    reporter.register_metric('ato_related_loss', metrics.BaseMetric)

    metric_names = ['ato_related_loss']
    extra = [
        {
            'metric_name': 'ato_related_loss',
            'base_name': 'ato_related_loss',
            'accuracy_denominator_name': gmv, # if not provided, accuracy would not be computed
            'coverage_denominator_name': 'ato_related_loss',
            'impact_denominator_name': gmv,
        }
    ]

    res = reporter.evaluate(
        raw, amt_name, metric_names=metric_names, weight_name=weight_name, 
        dcl_dcsn_names=dcl_dcsn_names, 
        benchmark_name=benchmark_name, n_jobs=10, 
        extra=extra
    )

    #Rule set performance table
    off_perf_set = pd.DataFrame([res[[('old_rule_reject', 'op', 'op'), 
                   ('old_rule_reject', 'ato_related_loss', 'cov'), 
                   ('old_rule_reject', 'ato_related_loss', 'acc')]].loc[0,].tolist(),
                                 res[[('version1_set_reject', 'op', 'op'), 
                   ('version1_set_reject', 'ato_related_loss', 'cov'), 
                   ('version1_set_reject', 'ato_related_loss', 'acc')]].loc[0,].tolist(),
                                 res[[('old_rule_reject', 'op', 'op'), 
                   ('old_rule_reject', 'ato_related_loss', 'cov'), 
                   ('old_rule_reject', 'ato_related_loss', 'acc')]].loc[0,].tolist(),
                                 res[[('version2_set_reject', 'op', 'op'), 
                   ('version2_set_reject', 'ato_related_loss', 'cov'), 
                   ('version2_set_reject', 'ato_related_loss', 'acc')]].loc[0,].tolist(),
                                 res[[('old_rule_reject', 'op', 'op'), 
                   ('old_rule_reject', 'ato_related_loss', 'cov'), 
                   ('old_rule_reject', 'ato_related_loss', 'acc')]].loc[0,].tolist(),
                                 res[[('version3_set_reject', 'op', 'op'), 
                   ('version3_set_reject', 'ato_related_loss', 'cov'), 
                   ('version3_set_reject', 'ato_related_loss', 'acc')]].loc[0,].tolist(),
                                 res[[('old_rule_set', 'op', 'op'), 
                   ('old_rule_set', 'ato_related_loss', 'cov'), 
                   ('old_rule_set', 'ato_related_loss', 'acc')]].loc[0,].tolist(),                             
                                 res[[('version1_set_reject_combined', 'op', 'op'), 
                   ('version1_set_reject_combined', 'ato_related_loss', 'cov'), 
                   ('version1_set_reject_combined', 'ato_related_loss', 'acc')]].loc[0,].tolist(),
                                 res[[('old_rule_set', 'op', 'op'), 
                   ('old_rule_set', 'ato_related_loss', 'cov'), 
                   ('old_rule_set', 'ato_related_loss', 'acc')]].loc[0,].tolist(),                               
                                 res[[('version2_set_reject_combined', 'op', 'op'), 
                   ('version2_set_reject_combined', 'ato_related_loss', 'cov'), 
                   ('version2_set_reject_combined', 'ato_related_loss', 'acc')]].loc[0,].tolist(),
                                 res[[('old_rule_set', 'op', 'op'), 
                   ('old_rule_set', 'ato_related_loss', 'cov'), 
                   ('old_rule_set', 'ato_related_loss', 'acc')]].loc[0,].tolist(),  
                                 res[[('version3_set_reject_combined', 'op', 'op'), 
                   ('version3_set_reject_combined', 'ato_related_loss', 'cov'), 
                   ('version3_set_reject_combined', 'ato_related_loss', 'acc')]].loc[0,].tolist()]
                               )
    off_perf_set.columns = ['action_rate', 'loss_coverage', 'accuracy']
    off_perf_set['model_name'] = Model_name
    off_perf_set['rule_set_version'] = ['Version_1', 'Version_1', 'Version_2', 'Version_2',  'Version_3','Version_3',
                                        'Version_1','Version_1', 'Version_2', 'Version_2',  'Version_3', 'Version_3']
    off_perf_set['performance_level'] = 'Rule_set_level'
    off_perf_set['rule_name'] = ['Global_old', 'Global_new', 'Global_old', 'Global_new','Global_old', 'Global_new',
                                'Global_old - combined with main rule', 'Global_new - combined with main rule',
                                'Global_old - combined with main rule', 'Global_new - combined with main rule',
                            'Global_old - combined with main rule', 'Global_new - combined with main rule',]
    off_perf_set['update_time'] = ''
    off_perf_set = off_perf_set[['model_name', 'rule_set_version', 'performance_level', 'rule_name', 'action_rate', 'loss_coverage', 'accuracy', 'update_time']]



    #output 
    from datetime import datetime 
    rule_content['update_time'] = this_month
    off_perf_all = pd.concat([off_perf, off_perf_set_by_region, off_perf_set])
    off_perf_all['update_time'] = this_month
    off_perf_all['action_rate'] = off_perf_all.action_rate

    off_perf_all[[ 'loss_coverage', 'accuracy']]=((off_perf_all[[ 'loss_coverage', 'accuracy']]).apply(pd.to_numeric)/100).applymap("{:,.2%}".format)
    off_perf_all[[ 'action_rate']]=((off_perf_all[[ 'action_rate']]).apply(pd.to_numeric)*100).applymap("{:,.1f} bps".format)
    if mode=='2fa':
        off_perf_all_2fa=off_perf_all.copy()
        off_perf_all_2fa['segment']='2FA'
        rule_content_2fa=rule_content.copy()
    else: 
        off_perf_all_n2fa=off_perf_all.copy()
        off_perf_all_n2fa['segment']='Non-2FA'
        rule_content_n2fa=rule_content.copy()

In [169]:
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199
final_off_perf=pd.concat([off_perf_all_2fa, off_perf_all_n2fa]).sort_values(['segment','performance_level', 'rule_set_version',  'rule_name'])
final_off_perf.rule_name=final_off_perf.rule_name.str.lower()
#add region column

In [170]:
rule_content_2fa['threshold_b']=rule_content_n2fa['threshold_a']

rule_content_2fa['rule_region']=rule_content_2fa['rule_name'].str[:2]

rule_content_2fa=rule_content_2fa[['model_name', 'model_version', 'model_score_name_set',
       'rule_set_version', 'rule_name', 'threshold_a', 'threshold_b','rule_content',
       'update_time', 'rule_region']]
rule_content_2fa.rule_name=rule_content_2fa.rule_name.str.lower()

In [171]:
rule_content_2fa.head()

Unnamed: 0,model_name,model_version,model_score_name_set,rule_set_version,rule_name,threshold_a,threshold_b,rule_content,update_time,rule_region
0,ato_v1_model,A,model_online_ato_global_auto_v1,Version_1,au_fraud_online_ato_online_model_auto_v1,590,720,def execute_rule():\n actions = []\n if not uuid_split_placeholder : \n return actions\n \n if (days_since_first_order_date > 14 \n and (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) \n and model_online_ato_global_auto_v1 > threshold_a):\n \n actions.append({\n ''action_name'': ''is_rejected_assign''\n })\n if (days_since_first_order_date > 14 \n and not (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) \n and model_online_ato_global_auto_v1 > threshold_b):\n \n actions.append({\n ''action_name'': ''is_rejected_assign''\n })\n return actions,202305,AU
1,ato_v1_model,A,model_online_ato_global_auto_v1,Version_2,au_fraud_online_ato_online_model_auto_v1,780,780,def execute_rule():\n actions = []\n if not uuid_split_placeholder : \n return actions\n \n if (days_since_first_order_date > 14 \n and (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) \n and model_online_ato_global_auto_v1 > threshold_a):\n \n actions.append({\n ''action_name'': ''is_rejected_assign''\n })\n if (days_since_first_order_date > 14 \n and not (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) \n and model_online_ato_global_auto_v1 > threshold_b):\n \n actions.append({\n ''action_name'': ''is_rejected_assign''\n })\n return actions,202305,AU
2,ato_v1_model,A,model_online_ato_global_auto_v1,Version_3,au_fraud_online_ato_online_model_auto_v1,640,740,def execute_rule():\n actions = []\n if not uuid_split_placeholder : \n return actions\n \n if (days_since_first_order_date > 14 \n and (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) \n and model_online_ato_global_auto_v1 > threshold_a):\n \n actions.append({\n ''action_name'': ''is_rejected_assign''\n })\n if (days_since_first_order_date > 14 \n and not (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) \n and model_online_ato_global_auto_v1 > threshold_b):\n \n actions.append({\n ''action_name'': ''is_rejected_assign''\n })\n return actions,202305,AU
3,ato_v1_model,B,model_online_ato_global_auto_v1,Version_1,au_fraud_online_ato_online_model_auto_v1,870,900,def execute_rule():\n actions = []\n if not uuid_split_placeholder : \n return actions\n \n if (days_since_first_order_date > 14 \n and (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) \n and model_online_ato_global_auto_v1 > threshold_a):\n \n actions.append({\n ''action_name'': ''is_rejected_assign''\n })\n if (days_since_first_order_date > 14 \n and not (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) \n and model_online_ato_global_auto_v1 > threshold_b):\n \n actions.append({\n ''action_name'': ''is_rejected_assign''\n })\n return actions,202305,AU
4,ato_v1_model,B,model_online_ato_global_auto_v1,Version_2,au_fraud_online_ato_online_model_auto_v1,870,900,def execute_rule():\n actions = []\n if not uuid_split_placeholder : \n return actions\n \n if (days_since_first_order_date > 14 \n and (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) \n and model_online_ato_global_auto_v1 > threshold_a):\n \n actions.append({\n ''action_name'': ''is_rejected_assign''\n })\n if (days_since_first_order_date > 14 \n and not (c_latest_login_2fa_success_timestamp >= (time.time() - 7*24*60*60) or c_latest_pwd_reset_success_timestamp >= (time.time() - 7*24*60*60)) \n and model_online_ato_global_auto_v1 > threshold_b):\n \n actions.append({\n ''action_name'': ''is_rejected_assign''\n })\n return actions,202305,AU


In [None]:
#perf
table_name_perf='ato_v1_model_auto_offline_perf'

# query = f"""
# drop table if exists sandbox_analytics_au.{table_name_perf} ;

# """
# vega_execute(query)


value_list = []

for i in list(range(0, len(final_off_perf))):
    value_list.append(tuple(final_off_perf.reset_index(drop= True).loc[i]))
    
query = f"""
create table if not exists sandbox_analytics_au.{table_name_perf} (
                                                                     model_name varchar(2000),
                                                                     rule_set_version varchar(2000),
                                                                     performance_level varchar(2000),
                                                                     rule_name varchar(2000),
                                                                     action_rate varchar(2000),
                                                                     loss_coverage varchar(2000),
                                                                     accuracy varchar(2000),
                                                                     update_time varchar(2000),
                                                                     segment varchar(2000))
"""
vega_execute(query)

query = f"""
GRANT ALL PRIVILEGES ON TABLE sandbox_analytics_au.{table_name_perf} TO feature_science_team_all;
;
"""
vega_execute(query)

params = {
     'insert_val': str(value_list)[1:-1],
    'table_name': table_name_perf
}

query = """
    insert into sandbox_analytics_au.{table_name}
    (model_name, rule_set_version, performance_level, rule_name,
    action_rate, loss_coverage, accuracy, update_time, segment)
        VALUES {insert_val}
""".format(**params)

vega_execute(query)



In [151]:
table_name_cont='ato_v1_model_auto_rule_content'

#output to vega
query = f"""
create table if not exists sandbox_analytics_au.{table_name_cont}
(
    model_name         varchar(2000),
    model_version      varchar(2000),
    model_score_name_set varchar(2000),
    rule_set_version   varchar(2000),
    rule_name          varchar(2000),
    threshold_a         float,
    threshold_b         float,
    rule_content       varchar(5000),
    update_time        varchar(2000),
    rule_region       varchar(5000)
);
"""
vega_execute(query)

query = f"""
GRANT ALL PRIVILEGES ON TABLE sandbox_analytics_au.{table_name_cont} TO feature_science_team_all;
;
"""
vega_execute(query)

value_list = []

for i in list(range(0, len(rule_content))):
    value_list.append(tuple(rule_content_2fa.reset_index(drop= True).loc[i]))


params = {
     'insert_val': str(value_list)[1:-1].replace('"', "'"),
    'table_name': table_name_cont
}

query = """
    insert into sandbox_analytics_au.{table_name}
    (model_name, model_version, model_score_name_set, rule_set_version, rule_name,
    threshold_a,threshold_b, rule_content, update_time,rule_region)
        VALUES {insert_val}
""".format(**params)

vega_execute(query)

In [152]:
#Upload model_perf
import datetime 
today = datetime.date.today()
this_month = today.strftime("%Y%m")
params = {
     'this_month': this_month,
}

query = f"""
unload ('select distinct * from sandbox_analytics_au.{table_name_perf} where update_time = {this_month}')
    to 's3://risk-data-prod/risk-strategy-refresh/{Model_name}_simu_offline_performance_'
    iam_role 'arn:aws:iam::900305707269:role/prod-datalake_redshift_access_to_unload_s3_analytics,arn:aws:iam::545788091248:role/RedshiftAssumedIAMRole'
    ALLOWOVERWRITE
    header
    parallel off
    csv;
"""

vega_execute(query)

query = f"""
unload ('select distinct * from sandbox_analytics_au.{table_name_cont} where update_time = {this_month}')
    to 's3://risk-data-prod/risk-strategy-refresh/{Model_name}_simu_result_'
    iam_role 'arn:aws:iam::900305707269:role/prod-datalake_redshift_access_to_unload_s3_analytics,arn:aws:iam::545788091248:role/RedshiftAssumedIAMRole'
    ALLOWOVERWRITE
    header
    parallel off
    csv;
"""
vega_execute(query)