In [1]:
from transformers import GPT2Tokenizer, GPT2Model
import torch
import numpy as np
import pandas as pd
import json
import re

Init Plugin
Init Graph Optimizer
Init Kernel


In [2]:
X_train = np.load('data/bandit_train.npy', allow_pickle=True)
X_test = np.load('data/bandit_test.npy', allow_pickle=True)

In [4]:
X_train.shape

(100, 100, 3)

In [7]:
action_mapper = {i: chr(i+97) for i in range(10)}
print(action_mapper)

{0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j'}


In [11]:
ep_start_token = "Start: "

In [31]:
prompt = "The goal is to chose the action which maximizes the reward."

In [14]:
def flatten(ep):
    rep = ep_start_token
    for t in ep:
        a, r= t
        a = action_mapper[a]
        rep += f"{a} {r}" + " "
    return rep.rstrip()

In [25]:
def flatten_ar(ep):
    rep = ep_start_token
    for t in ep:
        a, r= t
        a = action_mapper[a]
        rep += f"action {a} reward {r}" + " "
    return rep.rstrip()

In [16]:
def flatten_normal(ep1, ep2):
    rep = flatten(ep1) + " " + flatten(ep2)
    return rep

In [29]:
def flatten_explicit(ep1, ep2):
    return flatten_ar(ep1) + " " + flatten_ar(ep2)

In [33]:
def flatten_prompt(ep1, ep2):
    return prompt + flatten_ar(ep1) + " " + flatten_ar(ep2)

In [34]:
def preprocess(X, data_size=300, flattener=flatten_normal):
    if X.shape[-1] == 3:
        X = X[:, :, 1:]
    indexes = []
    while len(indexes) < data_size:
        index = np.random.randint(0, 100, size=2).tolist()
        if index not in indexes: indexes.append(index)

    data = []
    for i in range(len(indexes)):
        i1, i2 = indexes[i]
        ep1, ep2 = X[i1], X[i2]
        rep = flattener(ep1, ep2)
        data.append(rep)

    df = pd.DataFrame({'history': data})
    return df


In [35]:
data_sizes = [25, 100, 500]

In [40]:
for size in data_sizes:
    flatteners = [(flatten_normal, 'normal')]
    if size < 200:
        flatteners += [(flatten_explicit, 'explicit'), (flatten_prompt, 'prompt')]
    for flattener, fl_name in flatteners:
        for typ in ['train', 'test']:
            if typ == 'train': X = X_train
            else: X = X_test

            df = preprocess(X, data_size=size, flattener=flattener)
            fname = f'data/{typ}_{fl_name}_{size}.csv'
            df.to_csv(fname, index=False)