In [None]:
# !pip install s3fs

# Have to install vowpal wabbit using the wheels in this link
# https://github.com/VowpalWabbit/vowpal_wabbit/issues/1764#issuecomment-701379336

import pandas as pd
import sklearn as sk
import numpy as np
import boto3
from sagemaker import get_execution_role
pd.set_option('max.columns', None)
pd.set_option('max.rows',None)

# data location
# role = get_execution_role() Dont run with SM Studio, yes for NB Instances
bucket_name = 'sagemaker-datascience-payments-dev'
data_key = 'VW_CMAB_data_NoRetry.csv'
data_location = 's3://{}/{}'.format(bucket_name, data_key)

# Input S3 data into dataframe
df = pd.read_csv(data_location, index_col=0)
print(df.shape)

# Subset out manual pays
df = df[df['if_mannual_pay']==0]
print(f"Length of DF for only auto pay: {df.shape}")

# Find historical probability of each arm
df_pct = pd.DataFrame(df.groupby(['arms', 'reward'])['reward'].count()).rename(columns={'reward':'probability'})
df_pct = df_pct.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
df_pct = df_pct.reset_index()

# Merge original df with probability of each arm
df_merged = df.merge(df_pct, on=['arms', 'reward'], how='inner')

# Probability to decimal
df_merged['probability'] = df_merged['probability'] / 100

# Move action(arms), cost(reward), probability calculation to front as vowpal wabbit expects
cols_to_move = ['arms', 'reward', 'probability']
df_merged = df_merged[ cols_to_move + [ col for col in df_merged.columns if col not in cols_to_move ] ]

# Adding sequential index as vowpal wabbit expects
df_merged['index'] = range(1, len(df_merged) + 1)
df_merged = df_merged.set_index("index")

# Dropping excess features already represented in data elsewhere
df_merged = df_merged.drop(['txn_timestamp', 'application_id'], axis=1)

# VW optimizes to minimize COST which is negative of reward. Therefore reward * -1 = cost.
df_merged['reward'] = df_merged['reward'] * -1
# display(df_merged[df_merged['arms']==310]) Yes probabilities are p & 1-p
df_merged.head(3)

# !pip install vowpalwabbit

import pandas as pd
import sklearn as sk
import numpy as np
from vowpalwabbit import pyvw

print(f"Length of total dataframe: {len(df_merged)}")
print(f"Number of Unique Arms: {len(df_merged.arms.unique())}") # 502 unique arms

feats = list(df_merged.columns)
exclude_feats = ['pre_txn_provider', 'pre_response_code', 'direction_change_processor']
train_feats = [x for x in feats if x not in exclude_feats]
train_feats

### Train/Learn

# initialize empty history 
# (offline eval means you can only add to history when rec matches historic data)
# history = pd.DataFrame(data=None, columns=df.columns)
# history = history.astype({ARM: 'int32', REWARD: 'float'})

import matplotlib.pyplot as plt

# decode_df # Chase 0-164, Stripe 165-332, Wells 333-500
def replay_score(history, df, t, batch_size, recs):
    """
    https://arxiv.org/pdf/1003.5956.pdf
    replay score. reward if rec matches logged data, ignore otherwise.
    I.E. If MAB recommended 5 movies & historical viewer data showed they watched 3 of the 5 then we'd
    only pull the rewards for the 3 movies they played; [1, 0, 1] = liked, disliked, liked for example.
    """
    # reward if rec matches logged data, ignore otherwise
    actions = df[t:t+batch_size] # 100 possible processor matches at once
    # Core of "Reply": Matching our bandit policy recommendations with actual viewer content at current timestep
    actions = actions.loc[actions[ARM].isin(recs)] # Number out of 100 movies that matched
    actions['scoring_round'] = t
    # add row to history if recs match logging policy
    history = history.append(actions) # cumulatively grows as algo steps through time
    action_liked = actions[[ARM, REWARD]]
    return history, action_liked

def train_vw(vw, train_df):
    
    cost_sum = 0.
    ctr = []
    for i in train_df.index:
        action = train_df.loc[i, "arms"]
        cost = train_df.loc[i, "reward"]
        probability = train_df.loc[i, "probability"]
        f0 = train_df.loc[i, 'loan_id']
        f1 = train_df.loc[i, "hour"]
        f2 = train_df.loc[i, "dayofweek"]
        f3 = train_df.loc[i, "transaction_provider"]
        f4 = train_df.loc[i, "txn_duration"]
        f5 = train_df.loc[i, "type"]
        f6 = train_df.loc[i, "bank"]
        f7 = train_df.loc[i, "transaction_amount"]
        f8 = train_df.loc[i, "financed_amount"]
        f9 = train_df.loc[i, "term"]
        f10 = train_df.loc[i, "fico"]
        f11 = train_df.loc[i, "payment_index"]
        f12 = train_df.loc[i, "destination_account"]
        f13 = train_df.loc[i, "if_mannual_pay"]
        f14 = train_df.loc[i, "if_change_card"]
        f15 = train_df.loc[i, "if_change_processor"]
        f16 = train_df.loc[i, "monthly_vintage"]

        # Add to cumulative sum for each cost
        cost_sum += cost

        # Construct the example in the required vw format.
        learn_example = str(action) + ":" + str(cost) + ":" + str(probability) + " | " + \
                        str(f0) + " " + str(f1) + " " + str(f2) + " " + str(f3) + " " + \
                        str(f4) + " " + str(f5) + " " + str(f6) + " " + \
                        str(f7) + " " + str(f8) + " " + str(f9) + " " + \
                        str(f10) + " " + str(f11) + " " + str(f12) + " " + \
                        str(f13) + " " + str(f14) + " " + str(f15) + " " + \
                        str(f16)
    #     print(learn_example)
    
        # Here we do the actual learning.
        vw.learn(learn_example)

        # Add reward = -1*cost to ctr list
        ctr.append(-1*cost_sum/i)
        
    return vw, ctr
    
def plot_ctr(num_iterations, ctr):
    plt.plot(range(1,num_iterations+1), ctr)
    plt.xlabel('num_iterations', fontsize=14)
    plt.ylabel('ctr', fontsize=14)
    plt.ylim([0,1])

# 20% exploration w/ epsilon greedy policy
vw = pyvw.vw("--cb_explore 502 -q UA --quiet --epsilon 0.1")

# Learn
train_df = df_merged # [-100000:]
vw, ctr = train_vw(vw, train_df)

# Plot Learning
num_iterations = len(train_df)
plot_ctr(num_iterations, ctr)

### Need to print CTR explicitly

# Test on last 20 records
exclude = ['arms', 'reward', 'probability']
test_df = df_merged[ [ col for col in df_merged.columns if col not in exclude ] ]
test_df = test_df[-20:].reset_index(drop=True)
test_df['index'] = range(1, len(test_df) + 1)
test_df = test_df.set_index("index")
test_df.head(3)

for i in test_df.index:
        f0 = train_df.loc[i, 'loan_id']
        f1 = train_df.loc[i, "hour"]
        f2 = train_df.loc[i, "dayofweek"]
        f3 = train_df.loc[i, "transaction_provider"]
        f4 = train_df.loc[i, "txn_duration"]
        f5 = train_df.loc[i, "type"]
        f6 = train_df.loc[i, "bank"]
        f7 = train_df.loc[i, "transaction_amount"]
        f8 = train_df.loc[i, "financed_amount"]
        f9 = train_df.loc[i, "term"]
        f10 = train_df.loc[i, "fico"]
        f11 = train_df.loc[i, "payment_index"]
        f12 = train_df.loc[i, "destination_account"]
        f13 = train_df.loc[i, "if_mannual_pay"]
        f14 = train_df.loc[i, "if_change_card"]
        f15 = train_df.loc[i, "if_change_processor"]
        f16 = train_df.loc[i, "monthly_vintage"]

        # Construct the example in the required vw format.
        test_example = " | " + \
                        str(f0) + " " + str(f1) + " " + str(f2) + " " + str(f3) + " " + \
                        str(f4) + " " + str(f5) + " " + str(f6) + " " + \
                        str(f7) + " " + str(f8) + " " + str(f9) + " " + \
                        str(f10) + " " + str(f11) + " " + str(f12) + " " + \
                        str(f13) + " " + str(f14) + " " + str(f15) + " " + \
                        str(f16)
    #     print(test_example)

        choice = vw.predict(test_example)
    #     print(choice)
        arm = choice.index(max(choice))
        print(i, arm)

# Save CMAB
# vw.save('/root/kevink/payments/cmab_full_autopay.model')
# del vw

# Load CMAB for more predictions
# vw = pyvw.vw("--cb 4 -i cb.model")
# print(vw.predict('| a b'))

!pip install vowpalwabbit

import pandas as pd
import sklearn as sk
import numpy as np
import boto3
from sagemaker import get_execution_role
pd.set_option('max.columns', None)
pd.set_option('max.rows',None)

# data location
# role = get_execution_role() Dont run with SM Studio, yes for NB Instances
bucket_name = 'sagemaker-datascience-payments-dev'
data_key = 'last20txn.csv'
data_location = 's3://{}/{}'.format(bucket_name, data_key)

# Input S3 data into dataframe
df = pd.read_csv(data_location)
df = df.sort_values(by='TXN_TIMESTAMP').reset_index(drop=True)
display(df.head(3))

feat_list = ["hour", "dayofweek","transaction_provider","txn_duration","type","bank",
             "transaction_amount","financed_amount","term","fico","payment_index",
             "destination_account","if_mannual_pay", "if_change_card","if_change_processor",
             "monthly_vintage"]
feat_list = [x.upper() for x in feat_list]

# Subset to features only
test_df = df[feat_list]

# Load locally saved cmab_full.model
from vowpalwabbit import pyvw
vw = pyvw.vw("--cb 502 -i /root/kevink/payments/cmab_full_autopay.model")

predictions=[]
for i in test_df.index:
    f1 = test_df.loc[i, feat_list[0]]
    f2 = test_df.loc[i, feat_list[1]]
    f3 = test_df.loc[i, feat_list[2]]
    f4 = test_df.loc[i, feat_list[3]]
    f5 = test_df.loc[i, feat_list[4]]
    f6 = test_df.loc[i, feat_list[5]]
    f7 = test_df.loc[i, feat_list[6]]
    f8 = test_df.loc[i, feat_list[7]]
    f9 = test_df.loc[i, feat_list[8]]
    f10 = test_df.loc[i, feat_list[9]]
    f11 = test_df.loc[i, feat_list[10]]
    f12 = test_df.loc[i, feat_list[11]]
    f13 = test_df.loc[i, feat_list[12]]
    f14 = test_df.loc[i, feat_list[13]]
    f15 = test_df.loc[i, feat_list[14]]
    f16 = test_df.loc[i, feat_list[15]] # Maybe just previous processor?
#     f17 = test_df.loc[i, feat_list[16]]
    
    test_example = "| " + str(f1) + " " + str(f2) + " " + str(f3) + " " + \
                    str(f4) + " " + str(f5) + " " + str(f6) + " " + \
                    str(f7) + " " + str(f8) + " " + str(f9) + " " + \
                    str(f10) + " " + str(f11) + " " + str(f12) + " " + \
                    str(f13) + " " + str(f14) + " " + str(f15) + " " + \
                    str(f16)
#     print(test_example)

    choice = vw.predict(test_example)
#     print(choice)
    arm = choice.index(max(choice))
    predictions.append(arm)
    
preds_df = pd.DataFrame(predictions, columns={'predicted_arm'})

# Chase 0-164, Stripe 165-332, Wells 333-500

p_df = preds_df.copy()
# Add What Transaction Actions we actually took
p_df['actual_provider'] = list(df['TRANSACTION_PROVIDER'])
p_df['actual_success?'] = list(df['TRANSACTION_STATUS'])
p_df['actual_arm'] = df['HOUR'].astype(str)+'_'+df['DAYOFWEEK']+'_'+df['TRANSACTION_PROVIDER']

print(p_df.dtypes)
p_df.head()

def arm_mapping(arm):
    if arm <= 164:
        return "Chase"
    elif arm <= 332:
        return "Stripe"
    else:
        return "Wells Fargo"

# Add Theoretical Provider
p_df['pred_provider'] = p_df['predicted_arm'].apply(arm_mapping)
p_df['processor_match'] = np.where(p_df['actual_provider']==p_df['pred_provider'], "YES", "NO")

# Get decode_df of arms
bucket_name = 'sagemaker-datascience-payments-dev'
data_key = 'decode_arms.csv'
data_location = 's3://{}/{}'.format(bucket_name, data_key)

# Input S3 data into dataframe
decode_df = pd.read_csv(data_location, index_col=0)
decode_df.columns = [str(col) + '_encoded' for col in decode_df.columns]
decode_df['encode_arm'] = decode_df['hour_encoded'].astype(str)+'_'+decode_df['dayofweek_encoded']+'_'+decode_df['transaction_provider_encoded']
print(decode_df.shape, decode_df.dtypes)
display(decode_df.head(3))

# Merge decoded_df so we can decode predicted arm
p_df = p_df.merge(decode_df, left_on=['predicted_arm'], right_on=['arms_encoded'])
p_df['pred_arm'] = p_df['hour_encoded'].astype(str)+'_'+p_df['dayofweek_encoded']+'_'+p_df['transaction_provider_encoded']
p_df = p_df.drop(['hour_encoded', 'dayofweek_encoded', 'transaction_provider_encoded',
                  'arms_encoded', 'actual_provider', 'pred_provider'], axis=1)

p_df = p_df[['actual_arm', 'actual_success?', 'predicted_arm', 'pred_arm', 'processor_match']]


# actual arm -> 3 columns, merged decode_df to get arms_encoded, delete old cols
p_df[['act_hour', 'act_trans', 'act_day']] = p_df.apply(lambda x: [x['actual_arm'].split('_')[0],
                                                                     x['actual_arm'].split('_')[1],
                                                                     x['actual_arm'].split('_')[2]], 
                                                                     axis = 1,
                                                                     result_type='expand')
# Make sure same types
p_df['act_hour'] = p_df['act_hour'].astype(int)
p_df = p_df.merge(decode_df, 
                  left_on=['actual_arm'], 
                  right_on=['encode_arm'],
                  how='left')

# Subset to cols we care about
p_df = p_df[['actual_arm', 'arms_encoded', 'actual_success?', 'predicted_arm', 'pred_arm', 'processor_match']]
p_df