In [8]:
import vowpal_wabbit_next as vw
import random
import matplotlib.pyplot as plt
import pandas as pd
import itertools

from typing import List, Tuple

In [9]:
# VW tries to minimize loss/cost, therefore we will pass cost as -reward
USER_LIKED_ARTICLE = -1.0
USER_DISLIKED_ARTICLE = 0.0

In [10]:
def get_cost(context, action):
    '''
    Tom prefers politics in the morning and music in the afternoon, 
    while Anna enjoys sports in the morning and politics later; 
    the dense reward function simulates this in a format the 
    learner recognizes as cost, rewarding article recommendations 
    that match preferences with a simulated "click."
    '''
    if context["user"] == "Tom":
        if context["time_of_day"] == "morning" and action == "politics":
            return USER_LIKED_ARTICLE
        elif context["time_of_day"] == "afternoon" and action == "music":
            return USER_LIKED_ARTICLE
        else:
            return USER_DISLIKED_ARTICLE
    elif context["user"] == "Anna":
        if context["time_of_day"] == "morning" and action == "sports":
            return USER_LIKED_ARTICLE
        elif context["time_of_day"] == "afternoon" and action == "politics":
            return USER_LIKED_ARTICLE
        else:
            return USER_DISLIKED_ARTICLE

In [11]:
def parse_lines(parser: vw.TextFormatParser, input_str: str) -> List[vw.Example]:
    return [parser.parse_line(line) for line in input_str.split("\n")]

In [14]:
# This function modifies (context, action, cost, probability) to VW friendly format
def to_vw_example_format(context, actions, cb_label=None):
    if cb_label is not None:
        chosen_action, cost, prob = cb_label
    example_string = ""
    example_string += "shared |User user={} time_of_day={}\n".format(
        context["user"], context["time_of_day"]
    )
    
    # import pdb; pdb.set_trace()

    for action in actions:
        if cb_label is not None and action == chosen_action:
            example_string += "0:{}:{} ".format(cost, prob)
        example_string += "|Action article={} \n".format(action)
    # Strip the last newline
    return example_string[:-1]

In [None]:
context = {
    "user": "Tom", 
    "time_of_day": "morning"
}
actions = ["politics", "sports", "music", "food"]

print(to_vw_example_format(context, actions))

> [0;32m/var/folders/j4/6wq0rt0j1d7d92jfxsxrxh680000gn/T/ipykernel_96990/2238426710.py[0m(11)[0;36mto_vw_example_format[0;34m()[0m
[0;32m      9 [0;31m    [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     10 [0;31m[0;34m[0m[0m
[0m[0;32m---> 11 [0;31m    [0;32mfor[0m [0maction[0m [0;32min[0m [0mactions[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     12 [0;31m        [0;32mif[0m [0mcb_label[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m [0;32mand[0m [0maction[0m [0;34m==[0m [0mchosen_action[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m            [0mexample_string[0m [0;34m+=[0m [0;34m"0:{}:{} "[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0mcost[0m[0;34m,[0m [0mprob[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  print(example_string)


shared |User user=Tom time_of_day=morning



ipdb>  print(cb_label)


None


ipdb>  n


> [0;32m/var/folders/j4/6wq0rt0j1d7d92jfxsxrxh680000gn/T/ipykernel_96990/2238426710.py[0m(12)[0;36mto_vw_example_format[0;34m()[0m
[0;32m     10 [0;31m[0;34m[0m[0m
[0m[0;32m     11 [0;31m    [0;32mfor[0m [0maction[0m [0;32min[0m [0mactions[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 12 [0;31m        [0;32mif[0m [0mcb_label[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m [0;32mand[0m [0maction[0m [0;34m==[0m [0mchosen_action[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m            [0mexample_string[0m [0;34m+=[0m [0;34m"0:{}:{} "[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0mcost[0m[0;34m,[0m [0mprob[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     14 [0;31m        [0mexample_string[0m [0;34m+=[0m [0;34m"|Action article={} \n"[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0maction[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  print(action)


politics


ipdb>  n


> [0;32m/var/folders/j4/6wq0rt0j1d7d92jfxsxrxh680000gn/T/ipykernel_96990/2238426710.py[0m(14)[0;36mto_vw_example_format[0;34m()[0m
[0;32m     12 [0;31m        [0;32mif[0m [0mcb_label[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m [0;32mand[0m [0maction[0m [0;34m==[0m [0mchosen_action[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m            [0mexample_string[0m [0;34m+=[0m [0;34m"0:{}:{} "[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0mcost[0m[0;34m,[0m [0mprob[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 14 [0;31m        [0mexample_string[0m [0;34m+=[0m [0;34m"|Action article={} \n"[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0maction[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     15 [0;31m    [0;31m# Strip the last newline[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     16 [0;31m    [0;32mreturn[0m [0mexample_string[0m[0;34m[[0m[0;34m:[0m[0;34m-[0m[0;36m1[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m

ipdb>  print(example_string)


shared |User user=Tom time_of_day=morning



ipdb>  n


> [0;32m/var/folders/j4/6wq0rt0j1d7d92jfxsxrxh680000gn/T/ipykernel_96990/2238426710.py[0m(11)[0;36mto_vw_example_format[0;34m()[0m
[0;32m      9 [0;31m    [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     10 [0;31m[0;34m[0m[0m
[0m[0;32m---> 11 [0;31m    [0;32mfor[0m [0maction[0m [0;32min[0m [0mactions[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     12 [0;31m        [0;32mif[0m [0mcb_label[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m [0;32mand[0m [0maction[0m [0;34m==[0m [0mchosen_action[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m            [0mexample_string[0m [0;34m+=[0m [0;34m"0:{}:{} "[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0mcost[0m[0;34m,[0m [0mprob[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  print(example_string)


shared |User user=Tom time_of_day=morning
|Action article=politics 



ipdb>  n


> [0;32m/var/folders/j4/6wq0rt0j1d7d92jfxsxrxh680000gn/T/ipykernel_96990/2238426710.py[0m(12)[0;36mto_vw_example_format[0;34m()[0m
[0;32m     10 [0;31m[0;34m[0m[0m
[0m[0;32m     11 [0;31m    [0;32mfor[0m [0maction[0m [0;32min[0m [0mactions[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 12 [0;31m        [0;32mif[0m [0mcb_label[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m [0;32mand[0m [0maction[0m [0;34m==[0m [0mchosen_action[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m            [0mexample_string[0m [0;34m+=[0m [0;34m"0:{}:{} "[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0mcost[0m[0;34m,[0m [0mprob[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     14 [0;31m        [0mexample_string[0m [0;34m+=[0m [0;34m"|Action article={} \n"[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0maction[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  n


> [0;32m/var/folders/j4/6wq0rt0j1d7d92jfxsxrxh680000gn/T/ipykernel_96990/2238426710.py[0m(14)[0;36mto_vw_example_format[0;34m()[0m
[0;32m     12 [0;31m        [0;32mif[0m [0mcb_label[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m [0;32mand[0m [0maction[0m [0;34m==[0m [0mchosen_action[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m            [0mexample_string[0m [0;34m+=[0m [0;34m"0:{}:{} "[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0mcost[0m[0;34m,[0m [0mprob[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 14 [0;31m        [0mexample_string[0m [0;34m+=[0m [0;34m"|Action article={} \n"[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0maction[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     15 [0;31m    [0;31m# Strip the last newline[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     16 [0;31m    [0;32mreturn[0m [0mexample_string[0m[0;34m[[0m[0;34m:[0m[0;34m-[0m[0;36m1[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m

ipdb>  n


> [0;32m/var/folders/j4/6wq0rt0j1d7d92jfxsxrxh680000gn/T/ipykernel_96990/2238426710.py[0m(11)[0;36mto_vw_example_format[0;34m()[0m
[0;32m      9 [0;31m    [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     10 [0;31m[0;34m[0m[0m
[0m[0;32m---> 11 [0;31m    [0;32mfor[0m [0maction[0m [0;32min[0m [0mactions[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     12 [0;31m        [0;32mif[0m [0mcb_label[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m [0;32mand[0m [0maction[0m [0;34m==[0m [0mchosen_action[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m            [0mexample_string[0m [0;34m+=[0m [0;34m"0:{}:{} "[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0mcost[0m[0;34m,[0m [0mprob[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  r


--Return--
'shared |User...article=food '
> [0;32m/var/folders/j4/6wq0rt0j1d7d92jfxsxrxh680000gn/T/ipykernel_96990/2238426710.py[0m(16)[0;36mto_vw_example_format[0;34m()[0m
[0;32m     12 [0;31m        [0;32mif[0m [0mcb_label[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m [0;32mand[0m [0maction[0m [0;34m==[0m [0mchosen_action[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m            [0mexample_string[0m [0;34m+=[0m [0;34m"0:{}:{} "[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0mcost[0m[0;34m,[0m [0mprob[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     14 [0;31m        [0mexample_string[0m [0;34m+=[0m [0;34m"|Action article={} \n"[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0maction[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     15 [0;31m    [0;31m# Strip the last newline[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 16 [0;31m    [0;32mreturn[0m [0mexample_string[0m[0;34m[[0m[0;34m:[0m[0;34m-[0m[0;36m1

ipdb>  r


shared |User user=Tom time_of_day=morning
|Action article=politics 
|Action article=sports 
|Action article=music 
|Action article=food 
--Return--
None
> [0;32m/var/folders/j4/6wq0rt0j1d7d92jfxsxrxh680000gn/T/ipykernel_96990/3267972512.py[0m(7)[0;36m<module>[0;34m()[0m
[0;32m      3 [0;31m    [0;34m"time_of_day"[0m[0;34m:[0m [0;34m"morning"[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      4 [0;31m}
[0m[0;32m      5 [0;31m[0mactions[0m [0;34m=[0m [0;34m[[0m[0;34m"politics"[0m[0;34m,[0m [0;34m"sports"[0m[0;34m,[0m [0;34m"music"[0m[0;34m,[0m [0;34m"food"[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      6 [0;31m[0;34m[0m[0m
[0m[0;32m----> 7 [0;31m[0mprint[0m[0;34m([0m[0mto_vw_example_format[0m[0;34m([0m[0mcontext[0m[0;34m,[0m [0mactions[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  r


[0;31m    [... skipped 1 hidden frame][0m

> [0;32m/Users/cjw/Development/code/vowpal-wabbit-notebook/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py[0m(3511)[0;36mrun_code[0;34m()[0m
[0;32m   3509 [0;31m            [0;32mfinally[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3510 [0;31m                [0;31m# Reset our crash handler in place[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 3511 [0;31m                [0msys[0m[0;34m.[0m[0mexcepthook[0m [0;34m=[0m [0mold_excepthook[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3512 [0;31m        [0;32mexcept[0m [0mSystemExit[0m [0;32mas[0m [0me[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3513 [0;31m            [0;32mif[0m [0mresult[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  r


[0;31m    [... skipped 1 hidden frame][0m

[0;31m    [... skipped 1 hidden frame][0m

[0;31m    [... skipped 1 hidden frame][0m

StopIteration: <ExecutionResult object at 11d915390, execution_count=13 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 11d915d50, raw_cell="context = {
    "user": "Tom", 
    "time_of_day":.." store_history=True silent=False shell_futures=True cell_id=4fb6260a-6e51-4ba1-8b37-55852fbc0974> result=None>
> [0;32m/Users/cjw/Development/code/vowpal-wabbit-notebook/venv/lib/python3.11/site-packages/IPython/core/async_helpers.py[0m(129)[0;36m_pseudo_sync_runner[0;34m()[0m
[0;32m    127 [0;31m    """
[0m[0;32m    128 [0;31m    [0;32mtry[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 129 [0;31m        [0mcoro[0m[0;34m.[0m[0msend[0m[0;34m([0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    130 [0;31m    [0;32mexcept[0m [0mStopIteration[0m [0;32mas[0m [0mexc[0m[0;34m:[0m[0;34m

ipdb>  r


--Return--
<ExecutionRes...> result=None>
> [0;32m/Users/cjw/Development/code/vowpal-wabbit-notebook/venv/lib/python3.11/site-packages/IPython/core/async_helpers.py[0m(131)[0;36m_pseudo_sync_runner[0;34m()[0m
[0;32m    129 [0;31m        [0mcoro[0m[0;34m.[0m[0msend[0m[0;34m([0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    130 [0;31m    [0;32mexcept[0m [0mStopIteration[0m [0;32mas[0m [0mexc[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 131 [0;31m        [0;32mreturn[0m [0mexc[0m[0;34m.[0m[0mvalue[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    132 [0;31m    [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    133 [0;31m        [0;31m# TODO: do not raise but return an execution result with the right info.[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  r


> [0;32m/Users/cjw/Development/code/vowpal-wabbit-notebook/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py[0m(3073)[0;36m_run_cell[0;34m()[0m
[0;32m   3071 [0;31m            [0mself[0m[0;34m.[0m[0mshowtraceback[0m[0;34m([0m[0mrunning_compiled_code[0m[0;34m=[0m[0;32mTrue[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3072 [0;31m        [0;32mfinally[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 3073 [0;31m            [0;32mreturn[0m [0mresult[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3074 [0;31m[0;34m[0m[0m
[0m[0;32m   3075 [0;31m    def should_run_async(
[0m


ipdb>  q
