In [None]:
import pandas as pd

df = pd.concat((
    pd.read_csv('./data/October_missions_full.csv'),
    pd.read_csv('./data/November_1stW_missions_full.csv')
), ignore_index=True)

df['mission'] = df['type'] + '_' + df['target'].astype(str)

df = df[['user', 'mission', 'createdAtT', 'type', 'target', 'performance']]
df['createdAtT'] = pd.to_datetime(df['createdAtT'], unit='ms').dt.date
df = df.groupby('user').filter(lambda x: len(x['createdAtT'].unique()) > 1)

df['user'] = df['user'].astype('category').cat.codes
df['mission'] = df['mission'].astype('category')
df['missionID'] = df['mission'].cat.codes
df['type'] = df['type'].astype('category')

def reward(x):
    if x <= 1:
        return x
    return 2 - x**2

df['reward'] = df['performance'].apply(reward)
df.rename(columns={'createdAtT': 'date'}, inplace=True)

df.sort_values(by=['date', 'user'], inplace=True, ignore_index=True)
df

In [None]:
n_users = df['user'].nunique()
n_missions = df['mission'].nunique()

n_users, n_missions

In [None]:
from src.tree import TreeNode

missions = df[['missionID', 'type', 'target']].drop_duplicates()
root = TreeNode('root')
for name, round in missions.groupby('type', observed=True):
    node = TreeNode(name)
    root.add_child(node)
    for _, mission in round.iterrows():
        node.add_child(TreeNode(mission.to_dict()))

print(root)

In [None]:
from src import policy as pol
from src import models as mod
from src.tree import TreeBandit
from tqdm.auto import tqdm

def replay(df: pd.DataFrame, policy: pol.Policy, root: TreeNode):
    history = pd.DataFrame()
    tree_bandit = TreeBandit(root, policy)
    for _, round in tqdm(df.groupby('date')):
        day_recs = []
        for u in tqdm(round['user'].unique(), leave=False):
            policy.init()
            recs = [{'user': u, 'missionID': m.value['missionID']} for m in tree_bandit.select(n = (3, 1), user=u)]
            day_recs += recs

        actions = round.merge(pd.DataFrame(day_recs), on=['user', 'missionID'], how='inner')
        history = pd.concat((history, actions), ignore_index=True)
        policy.update(train_df=history)
            
    return history

In [None]:
def evaluate(policy) -> pd.DataFrame:
    rewards = replay(df[['user', 'missionID', 'date', 'reward', 'performance']], policy, root)
    rewards = rewards.groupby('date')['reward'].sum().cumsum()

    return rewards

In [None]:
import torch
import numpy

torch.manual_seed(0)
numpy.random.seed(0)

policies = {
    'Random':               pol.RandomBandit(),
    'Epsilon-Greedy':       pol.MeanEpsilonGreedy(epsilon=0.1),
    'MF':                   pol.ModelEpsilonGreedy(model=mod.MissionMatrixFactorization(n_users, n_missions, embedding_dim=8), epsilon=0),
    'Softmax-MF':           pol.SoftmaxBandit(model=mod.MissionMatrixFactorization(n_users, n_missions, embedding_dim=8)),
    'Epsilon-Greedy-MF':    pol.ModelEpsilonGreedy(model=mod.MissionMatrixFactorization(n_users, n_missions, embedding_dim=8), epsilon=0.1),
}

results = pd.concat([
    pd.concat({name: evaluate(policy) for name, policy in tqdm(policies.items())})
    for _ in tqdm(range(10))
], axis=1)

results
results.to_csv('./results/missions.csv')

In [None]:
from matplotlib import pyplot as plt

plt.figure(figsize=(10, 5))
for name, group in results.groupby(level=0):
    plt.plot(group.index.get_level_values(1), group.mean(axis=1), label=name)
    plt.fill_between(group.index.get_level_values(1), group.mean(axis=1) - group.std(axis=1), group.mean(axis=1) + group.std(axis=1), alpha=0.2)

plt.title('Cumulative reward over time')
plt.ylabel('Cumulative reward')
plt.xlabel('Date')
plt.legend()
plt.xticks(rotation=45)
plt.show()