# The Adyen Contextual Multi-Armed Bandit (CMAB) Challenge

- CMAB as better alternative to A/B testing
- CMAB best described as a repeated interaction over a number of rounds $T$. Formally, at each round $t=1,2, \ldots, T$:
* The environment (i.e. the real world) reveals a context $x_t$ (i.e. payment features). 
* The learner chooses an action $a_t$ (i.e. an optimization). 
* The environment reveals a reward $r_t \in \{0,1\}$ (i.e. an non-successful/successful payment). 

- Goal is to choose actions which maximize cumulative reward, that is $\sum_{t=1}^T r_t$.
- Specific Aim
    - Build action selection algorithm that can pick best actions on the basis of past data 
    - Select actions for unseen data
    - Output = `unseen_data.csv` with columns attached `y_pred`, `p(a)` and `a` 
- Data descriptions
    - reference column (index: `ref`)
    - reward label (`y`, a payment getting authorized)
    - chosen action (`a`, an optimization decision)
    - probability of an action being chosen (`p(a)`, the latent probability of an optimization being chosen)
    - contextual features (`x`, payment related features)

- Ordinarily would have a feedback loop to test algorithm, however in the context of this challenge, there is no explicit feedback loopB


# Summary Conclusions

- New action selection strategy when applied to OOT Test set performs better than current approach
- Uplift of 17% with predicted Success rate of 74% using new strategy vs 63% using current approach
- OOT Test Set refers to 10% of historic data that occurred on day  8

__Install Packages__

In [None]:
# !pip install lightgbm
# !pip install sklearn

__Load packages__

In [None]:
from typing import Any, Optional, Dict

import pandas as pd
import lightgbm as lgbm
from sklearn.metrics import roc_auc_score,accuracy_score
import numpy as np
import pickle
import _pickle as cPickle
from datetime import datetime

from sklearn.model_selection import train_test_split

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Set parameters

In [None]:
today_date = datetime.today().strftime("%Y%m%d")

### Load user-defined functions

- Used for training the oracle and finding the action selection policy based on the historic train set

In [None]:
def softmax(probas_array):
    """
    Computes softmax values for each sets of scores
    Scaled probabilities sum to 1, to allow us to interpret each action probabilistically
    If one value is far higher than the other it will dominate, squeezing the other probabilities within a small range
    """
    exp_x = np.exp(probas_array - np.max(probas_array))
    return exp_x / exp_x.sum(axis=0)

def softmax_temp(probas_array, temp):
    """
    Computes softmax values for each sets of scores
    Scaled probabilities sum to 1, to allow us to interpret each action probabilistically
    If one value is far higher than the other it will dominate, squeezing the other probabilities within a small range
    """
    probas = [0]*len(probas_array)
    for i in range(0,len(probas)):
        probas[i] = np.exp(probas_array[i]/temp)
    mapped_probas = np.array(probas/np.sum(probas))
    return mapped_probas

def action_selection_rec(df,
                         method:str,
                         epsilon=0.1,
                         temp=0.02,
                         seed=42,
                         output=False,
                         file=None) -> pd.DataFrame:
    """ 
    df: dataframe containing refs, actions and probability of success for each respective action
    method: eps_uniform, eps_weighted, softmax_temp
        eps_uniform = Epsilon-greedy with uniformly sampled exploratory action
        eps_weighted = Epsilon-greedy with weight-based sampling of exploratory action
        softmax_temp = After applying the softmax transformation (including the addition of a 
                        temperature parameter that trades off between exploit vs explore), will sample actions using weights
    return: dataframe with the recommended action for each ref, associated probability of success, and latent action probability
    """
    
    np.random.seed(seed)
    if method in ['eps_uniform','eps_weighted']:
        
        winning_indexes = df.groupby('ref')['y_pred'].idxmax().values
        df.loc[winning_indexes,'max_action'] = 1
        # To get scaled probas after excluding the winning strategy #
        df.loc[df['max_action'] != 1,'p(a)_exploratory'] = df.loc[df['max_action'] != 1,].groupby('ref')['y_pred'].apply(lambda x: softmax(x))
        exploit_refs = np.random.randint(df['ref'].min(), df['ref'].max(), int(len(df['ref'].unique())*(1-epsilon)) )
        explore_refs = [i for i in df['ref'].unique() if i not in exploit_refs]
        # For 1-epsilon of refs, choose action according to greedy approach
        exploit_df = df.loc[(df['ref'].isin(exploit_refs)) & (df['max_action']==1),]

        if method == 'eps_uniform':
            explore_df = df.loc[(df['ref'].isin(explore_refs)) & (df['max_action']!=1), ].groupby('ref').apply(lambda x: x.sample(n=1))
            
        else:
            # eps_weighted # 
            exploratory_idx = df.loc[(df['ref'].isin(explore_refs)) & (df['max_action'] != 1), ].groupby('ref').apply(lambda x: np.random.choice(x.index,1,p=x['p(a)_exploratory'].values))
            explore_df = df.copy().loc[df.index.isin(exploratory_idx),]
            
        df_rec = pd.concat([explore_df,exploit_df],axis=0)
        df_rec['a'] = df_rec[[i for i in df_rec.columns if 'choice' in i]].idxmax(axis=1)
    
    elif method == 'softmax_temp':
    
        df['p(a)'] = df.groupby('ref')['y_pred'].apply(lambda x: softmax_temp(x.values,temp)).explode().values
        choice_indexes = df.groupby('ref').apply(lambda x: np.random.choice(x.index,1,p=x['p(a)']))
        df_rec = df.copy().loc[df.index.isin(choice_indexes),]
        df_rec['a'] = df_rec[[i for i in df_rec.columns if 'choice' in i]].idxmax(axis=1)
        # What proportion of the times is the winning strategy chosen ? #
        winning_strategy_prop = (df_rec['max_action'] == 1).sum() / len(df_rec)
        print('Proportion of times winning strategy is chosen is: {:.1%}'.format(winning_strategy_prop))
    
    else:
        raise ValueError('This method does not exist. Choose from eps_uniform, eps_weighted, softmax_temp')
        
    if (len(df.ref.unique()) == df_rec.shape[0]):
        pass
    else:
        print('Recommendations = {}, Expected recommendations = {}'.format(df_rec.shape[0],len(df.ref.unique()) ))
        raise ValueError('Incorrect number of recommendations')
        
        
    df_rec = df_rec.copy().loc[:,['ref','a','y_pred','p(a)']]
    
    if output:
        df_rec.to_csv('{}.csv'.format(file))
        
    return df_rec

def action_exploder(df,
                    distinct_actions:list,
                    prefix_col:str) -> pd.DataFrame:
    """
    Creates a counterfactual dataframe i.e. has all the possible action policies for each ref so as to evaluate each scenario
    Are then able to separately calculate probabilties for each & incorporate into action selection strategy
    """
    action_df = pd.DataFrame({'Action':distinct_actions})
    dummy_combinations = pd.get_dummies(action_df['Action'],prefix=prefix_col)
    dummy_combinations['joining_key'] = 1
    df['joining_key'] = 1
    all_combos = dummy_combinations.merge(df[['ref','joining_key']], on='joining_key',how='outer')
    # JOIN DUMMIES SO INPUT SPACE ALIGNS WITH LGBM CLASSIFIER FOR PREDICTIONS #
    df_exploded = pd.merge(df,all_combos,how='inner',on='ref').drop(columns=['joining_key_x','joining_key_y'])
    # Check # 
    if len(distinct_actions)*df.shape[0] == df_exploded.shape[0]:
        print('Correct number of actions created')
    else:
        raise ValueError('Incorrect number of actions created')
    
    return df_exploded

def preprocess(df,
               cat_col:str,
               prefix_col:str) -> pd.DataFrame:
    df = pd.concat([df.drop(columns=cat_col, inplace=False),
                    pd.get_dummies(df[cat_col],prefix=prefix_col)],
                   axis=1)
    return df

def model_eval(df,
               target:pd.Series,
               model_features:list,
               model:any,
               data_descr:str):
    """
    """
    probas = model.predict_proba(df[model_features])[:,1]
    preds = model.predict(df[model_features])
    print('{} AUC: {}'.format(data_descr, roc_auc_score(target,probas)))
    print('{} Accuracy: {}'.format(data_descr, accuracy_score(target,preds)))

### Load Data

In [None]:
# Historic data to learn from #
training_data = pd.read_csv('https://hr-projects-assets-prod.s3.amazonaws.com/dh0sr099ohk/d1509ac0ad592a793b6b56456fe226aa/training_data.csv', index_col=0)
training_data['ref'] = training_data.index

# Future-looking data to apply my policy on #
unseen_data = pd.read_csv('https://hr-projects-assets-prod.s3.amazonaws.com/dh0sr099ohk/fb501b17739e3c5457da79dd1b155b8c/unseen_data.csv', index_col=0)
unseen_data['ref'] = unseen_data.index

### Explore the data
- Ordinarily would spend more time here where real world variables & domain knowledge but given CMAB task priority and data synthesised, will proceed


In [None]:
 # historic_data # 
training_data.head(5)
training_data.shape # 506K 
training_data.day.value_counts(normalize=True)
training_data['a'].value_counts() # 1-4
training_data['P(a)'].value_counts() # only 4 distinct probabilities 
training_data.groupby('a')['P(a)'].mean() # 2 seems to have biggest payoff on avg across all observations, followed by 3
training_data['P(a)'].max() # 31%
training_data['P(a)'].min() # 16%
# Unseen & unlabelled data #
unseen_data.head(5)
unseen_data.shape # 65K

In [None]:
distinct_actions = training_data['a'].unique().tolist()
distinct_actions.sort()

__Create dummy features using the action/policy field__

In [None]:
training_data = preprocess(training_data,'a','choice')

# Train test split & Temporal Considerations

- Want to assess model performance on out-of-sample & in-time set (OOS) & out-of-time set (OOT)

- Options
    1. OOS & OOT Split (CHOSEN APPROACH)
        - Approx 70% Train, 20% Test, 10% OOT Test
        - Can then better assess how well the model will generalise to unseen data in current period
        - And will then be able to assess my action-selection approach vs the current existing CMAB approach
            - would not be fair to use data after the event to train the model as a form of leakage
            - hence why train model on subperiod and compare performance in OOT so can better benchmark against current approach
            - benchmarking done using probability predictions since cannot of course obtain counterfactual outcomes
    2. OOS Split Only
        - 70% Train : 30% Test
        - Allows me to train data on most up-to-date period, so should better predict current + future period(s)
        - N.B Could potentially weight samples when fitting LGBM using 'day'

In [None]:
# Temporal Split #
oot_validation_df = training_data.copy().loc[training_data['day'] == 8,].drop(columns='day')
intime_df = training_data.copy().loc[training_data['day'] < 8,].drop(columns='day')

# shuffle data #
intime_df = intime_df.copy().sample(frac=1,random_state=42)

# Split into X:y #
X = intime_df.copy().drop(columns=['y','ref'])
y = intime_df['y']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

__Model Fitting__

- Would ordinarily compare multiple off-the-shelf algorithms using their default config to see if one algorithm was starkly more performant:
    - logistic
    - random forest
    - Boosting (XGB/LGBM/....)
    
- Might consider including a weighting in the model fitting process according to the recency of the results 
    - i.e. the larger the 'day' feature, the higher importance placed on correcting any residual error

In [None]:
contextual_features = [i for i in X.columns if 'x' in i]
decision_variable = [i for i in X.columns if 'choice' in i]

lgb_clf = lgbm.LGBMClassifier()
lgb_clf.fit(X_train[contextual_features + decision_variable], y_train) # ,sample_weight = X_train['day']) # or could weight according to p(a) 


EVALUATION

In [None]:
model_eval(X_train,target=y_train,model_features=lgb_clf.feature_name_,model=lgb_clf,data_descr='Train')
model_eval(X_test,target=y_test,model_features=lgb_clf.feature_name_,model=lgb_clf,data_descr='Test InTime')
model_eval(oot_validation_df,target=oot_validation_df['y'],model_features=lgb_clf.feature_name_,model=lgb_clf,data_descr='Test OOT')

# Need probas for later #
oot_validation_df['y_pred'] = lgb_clf.predict_proba(oot_validation_df[lgb_clf.feature_name_])[:,1]

# Save Model

In [None]:
with open('models/oracle_{date}.pkl'.format(date=today_date), 'wb') as file:
        pickle.dump(lgb_clf, file = file)

__Additional Steps (with more time)__

- To enhance model performance 
    - Hyperparameter Tuning
- To better validate robustness of results use different splits
    - Cross validation

# Action Selection Policy #

- The ultimate action is then randomly chosen using these scaled/normalized probabilities as weights
- We take the normalized probabilities (i.e. weights) into account by sample weighting the observations by the inverse of the probability of the action that was taken

### Softmax Action Selection Rules
- Softmax maps our actions to a set of associated probabilities as is common in reinforcement learning, which is then used to randomly select the next action
- Issue with epsilon-greedy is that when it explores it chooses equally among all actions (i.e. as likely to pick worst action as second best - the implications of the worst action could be very bad so not a good choice)

In [None]:
# Explode out rows to consider all possibile action scenarios #
oot_simulation = oot_validation_df.copy().loc[:,contextual_features+['ref']]
oot_simulation = action_exploder(oot_simulation,distinct_actions,prefix_col='choice')

# Predict probas for each simulation #
oot_simulation['y_pred'] = lgb_clf.predict_proba(oot_simulation[lgb_clf.feature_name_])[:,1]
# Normalise these so can interpret actions as probabilities #
oot_simulation['p(a)'] = oot_simulation.groupby('ref')['y_pred'].apply(lambda x: softmax(x))

# Consider subset of action selection policies #
oot_recs_eps = action_selection_rec(oot_simulation,method='eps_uniform',epsilon=0.1,temp=None)
oot_recs_eps_w = action_selection_rec(oot_simulation,method='eps_weighted',epsilon=0.1,temp=None)
oot_recs_softmax = action_selection_rec(oot_simulation,method='softmax_temp',epsilon=0.1,temp=0.02) # Ideally would tune this temp param if more time #

strategies_dict = {'eps_uniform':oot_recs_eps,
                   'eps_weighted':oot_recs_eps_w,
                   'softmax_temp':oot_recs_softmax}

# Expected success rate using these strategies #
max_success_rate = 0
for k,v in strategies_dict.items():
    print('######', k, '#######')
    new_success_rate = strategies_dict[k]['y_pred'].mean()
    print('Expected success rate: ',new_success_rate,'\n')
    if new_success_rate>max_success_rate:
        max_success_rate = new_success_rate
        top_strategy = k

actual_success_rate = oot_validation_df['y'].mean()
print('actual_success_rate: ',actual_success_rate)
pred_success_rate = oot_validation_df['y_pred'].mean()
print('pred_success_rate: ',pred_success_rate)

print('Action strategy is {}'.format(top_strategy))
print('Uplift in success rate for OOT sample of {:.2%}'.format((max_success_rate-pred_success_rate)/pred_success_rate))

# Apply the chosen strategy on the unseen data set ##

- The top strategy was to use the softmax temperature parameter setup
- However, this was starkly different to the current action selection policy in place and may have led to too much of a radical change
- Instead, have opted for a more conservative approach by choosing the greedy epsilon with weighted sampling approach
- In this way can better balance exploit vs explore trade-off, and if finding performing well in real world can move onto something perhaps more radical 

In [None]:
unseen_data = action_exploder(unseen_data,distinct_actions,prefix_col='choice')
# Predict probas 
unseen_data['y_pred'] = lgb_clf.predict_proba(unseen_data[lgb_clf.feature_name_])[:,1]
# Normalise these so can interpret actions as probabilities # 
unseen_data['p(a)'] = unseen_data.groupby('ref')['y_pred'].apply(lambda x: softmax(x))

##################################################
unseen_recs_eps = action_selection_rec(unseen_data,method='eps_weighted',epsilon=0.1,temp=None,seed=42,output=True,file='QA_output')

### Create a class so this can easily be implemented in a productionised pipeline
- To "serve" the model in production & provides recommended actions for new data

In [None]:
class ContextualBanditScoringObject():
    
    """
    Given an input feature set, recommends the best action/policy per observation (be it user/customer etc.) 
    where best is defined according to our exploitation vs exploration appetite
    """
    
    def __init__(self,
                 model_object: Any,
                 df: pd.DataFrame):
        """ """
        self.model_object = model_object
        self.df = df

    def preprocess(self,
                   cat_col:str,
                   prefix_col:str):
        """
        This would ordinarily contain a bunch of preprocessing steps,
        but since data is synthetically generated (and w/o domain knowledge) limited preprocessing is applied
        """
        self.df = pd.concat([self.df.drop(columns=cat_col, inplace=False),
                             pd.get_dummies(self.df[cat_col],prefix=prefix_col)],
                            axis=1)
        
    def action_exploder(self,
                        distinct_actions:list,
                        prefix_col:str) -> pd.DataFrame:
        """
        Creates a counterfactual dataframe i.e. has all the possible action policies for each ref so as to evaluate each scenario
        Are then able to separately calculate probabilties for each & incorporate into action selection strategy
        """
        action_df = pd.DataFrame({'Action':distinct_actions})
        dummy_combinations = pd.get_dummies(action_df['Action'],prefix=prefix_col)
        dummy_combinations['joining_key'] = 1
        self.df['joining_key'] = 1
        all_combos = dummy_combinations.merge(self.df[['ref','joining_key']], on='joining_key',how='outer')
        # JOIN DUMMIES SO INPUT SPACE ALIGNS WITH LGBM CLASSIFIER FOR PREDICTIONS #
        df_exploded = pd.merge(self.df,all_combos,how='inner',on='ref').drop(columns=['joining_key_x','joining_key_y'])
        # Check # 
        if len(distinct_actions)*self.df.shape[0] == df_exploded.shape[0]:
            print('Correct number of actions created')
        else:
            raise ValueError('Incorrect number of actions created')
            
        self.df = df_exploded.copy()
    
    def predict(self):
        """ 
        Predict probabilities of contextual information and associated action given a fitted model
        Also predicts latent probabilities of actions
        """
        self.df['y_pred'] = self.model_object.predict_proba(self.df[self.model_object.feature_name_])[:,1] # may not work if different object type
        # Normalise these so can interpret actions as probabilities #
        self.df['p(a)'] = self.df.groupby('ref')['y_pred'].apply(lambda x: softmax(x))
    
    def action_selection_rec(self,
                         method:str,
                         epsilon=0.1,
                         temp=0.02,
                         seed=42,
                         output=False,
                         file=None) -> pd.DataFrame:
        """ 
        df: dataframe containing refs, actions and probability of success for each respective action
        method: eps_uniform, eps_weighted, softmax_temp
        eps_uniform = Epsilon-greedy with uniformly sampled exploratory action
        eps_weighted = Epsilon-greedy with weight-based sampling of exploratory action
        softmax_temp = After applying the softmax transformation (including the addition of a 
                        temperature parameter that trades off between exploit vs explore), will sample actions using weights
        return: dataframe with the recommended action for each ref, associated probability of success, and latent action probability
        """
    
        np.random.seed(seed)
        
        rec_df = self.df
        if method in ['eps_uniform','eps_weighted']:
        
            winning_indexes = rec_df.groupby('ref')['y_pred'].idxmax().values
            rec_df.loc[winning_indexes,'max_action'] = 1
            # To get scaled probas after excluding the winning strategy #
            rec_df.loc[rec_df['max_action'] != 1,'p(a)_exploratory'] = rec_df.loc[rec_df['max_action'] != 1,].groupby('ref')['y_pred'].apply(lambda x: softmax(x))
            exploit_refs = np.random.randint(rec_df['ref'].min(), rec_df['ref'].max(), int(len(rec_df['ref'].unique())*(1-epsilon)) )
            explore_refs = [i for i in rec_df['ref'].unique() if i not in exploit_refs]
            # For 1-epsilon of refs, choose action according to greedy approach
            exploit_df = rec_df.loc[(rec_df['ref'].isin(exploit_refs)) & (rec_df['max_action']==1),]

            if method == 'eps_uniform':
                explore_df = rec_df.loc[(rec_df['ref'].isin(explore_refs)) & (rec_df['max_action']!=1), ].groupby('ref').apply(lambda x: x.sample(n=1))
            
            else:
                # eps_weighted # 
                exploratory_idx = rec_df.loc[(rec_df['ref'].isin(explore_refs)) & (rec_df['max_action'] != 1), ].groupby('ref').apply(lambda x: np.random.choice(x.index,1,p=x['p(a)_exploratory'].values))
                explore_df = rec_df.copy().loc[rec_df.index.isin(exploratory_idx),]
            
            df_rec = pd.concat([explore_df,exploit_df],axis=0)
            df_rec['a'] = df_rec[[i for i in df_rec.columns if 'choice' in i]].idxmax(axis=1)
    
        elif method == 'softmax_temp':
    
            rec_df['p(a)'] = rec_df.groupby('ref')['y_pred'].apply(lambda x: softmax_temp(x.values,temp)).explode().values
            choice_indexes = rec_df.groupby('ref').apply(lambda x: np.random.choice(x.index,1,p=x['p(a)']))
            df_rec = rec_df.copy().loc[rec_df.index.isin(choice_indexes),]
            df_rec['a'] = df_rec[[i for i in df_rec.columns if 'choice' in i]].idxmax(axis=1)
            #What proportion of the times is the winning strategy chosen ? #
            winning_strategy_prop = (df_rec['max_action'] == 1).sum() / len(df_rec)
            print('Proportion of times winning strategy is chosen is: {:.1%}'.format(winning_strategy_prop))
    
        else:
            raise ValueError('This method does not exist. Choose from eps_uniform, eps_weighted, softmax_temp')
        
        if (len(rec_df.ref.unique()) == df_rec.shape[0]):
            pass
        else:
            print('Recommendations = {}, Expected recommendations = {}'.format(df_rec.shape[0],len(rec_df.ref.unique()) ))
            raise ValueError('Incorrect number of recommendations')
        
        # Make sure it has y_pred, p(a) and a as columns #
        df_rec = df_rec.copy().loc[:,['ref','a','y_pred','p(a)']]
    
        if output:
            df_rec.to_csv('{}.csv'.format(file))
        
        return df_rec

## Mockup of implementation in a production-style setting (.py file to run in the console)

- Need to flesh out more to run properly, but underlines the general idea!!!

In [None]:
cmab_config = {
    'input_path':'https://hr-projects-assets-prod.s3.amazonaws.com/dh0sr099ohk/fb501b17739e3c5457da79dd1b155b8c/unseen_data.csv',
    'model_path':'models/oracle_20230219.pkl',
    'preprocessing_dict':{'cat_col':'a','prefix_col':'choice'},
    'distinct_actions':[1.0,2.0,3.0,4.0],
    'action_dict':{'method':'eps_weighted','epsilon':0.1,'temp':None,'seed':42,'output':True,'file':'unseen_data'}}

__This just verifies that this runs correctly in notebook before implementing in python script__

In [None]:
# def cmab_orchestration(input_data,model_path,preprocessing_dict,distinct_actions,action_dict):
    
#     # Load Model #
#     with open("models/oracle_20230219.pkl", "rb") as input_file:
#         oracle_model = cPickle.load(input_file)
#     # model_file = open(model_path,'rb').close()
#     # oracle_model = pickle.load(file)
    
#     # Load Data #
#     unseen_data = pd.read_csv(input_data, index_col=0)
#     unseen_data['ref'] = unseen_data.index
    
#     # Instantiate Contextual Multi-armed Bandit Class
#     cmab = ContextualBanditScoringObject(oracle_model,unseen_data)
#     # cmab.preprocess(**preprocessing_dict)
#     cmab.action_exploder(distinct_actions=distinct_actions,prefix_col='choice')
#     cmab.predict()
#     cmab_recommendations = cmab.action_selection_rec(**action_dict)

# cmab_orchestration(cmab_config['input_path'],
#                    cmab_config['model_path'],
#                    cmab_config['preprocessing_dict'],
#                    cmab_config['distinct_actions'],
#                    cmab_config['action_dict'])

### QA TO CHECK THAT THIS METHOD PRODUCES SAME OUTPUTS AS NOTEBOOK APPROACH
# merged_foo = pd.merge(foo,foo2,how='inner',on='ref')
# np.where(merged_foo['a_x']!=merged_foo['a_y'],1,0).sum()
# np.where(merged_foo['y_pred_x']!=merged_foo['y_pred_y'],1,0).sum()
# np.where(merged_foo['p(a)_x']!=merged_foo['p(a)_y'],1,0).sum()

__Define params in config file called cmab_config.json - see cmab_config dict above__``

In [None]:
import argsparse
import pickle
import _pickle as cPickle
# etc. # 

def cmab_orchestration(input_data,model_path,preprocessing_dict,distinct_actions,action_dict):
    
    # Load Model #
    with open(model_path, 'rb') as input_file:
        oracle_model = cPickle.load(input_file)
    
    # Load Data #
    unseen_data = pd.read_csv(input_data, index_col=0)
    unseen_data['ref'] = unseen_data.index
    
    # Instantiate Contextual Multi-armed Bandit Class
    cmab = ContextualBanditScoringObject(oracle_model,unseen_data)
    # cmab.preprocess(**preprocessing_dict) # this step is only required for the historic train set #
    cmab.action_exploder(distinct_actions=distinct_actions,prefix_col='choice')
    cmab.predict()
    cmab_recommendations = cmab.action_selection_rec(**action_dict)

def main():
    
    parser = argparse.ArgumentParser(description='CMAB Recommendations')
    parser.add_argument('--setup_file', type=str, help='Path to the json configuration file, to determine inputs and preference for exploitation vs exploration')
    
    args = parser.parse_args()
    
    with open(args.setup_file) as json_file:
        config_dictionary = json.load(json_file) # this would be the cmab_config dictionary as per above
    
    cmab_orchestration(config_dictionary['input_path'],
                       config_dictionary['model_path'],
                       config_dictionary['preprocessing_dict'],
                       config_dictionary['distinct_actions'],
                       config_dictionary['action_dict']
                      )
    
if __name__ == "__main__":
    
    main()
    
# $ python3 cmab_script.py --setup_file cmab_config.json