In [1]:
import pandas as pd

df = pd.read_csv('./data/October_November_missions_full.csv')
df = df[~df['missionCatalog_id'].isin(['f1455712-fa2b-4feb-b7f9-ab8ddfa29e8d', 'd090d147-1ac9-4963-9c2b-1f5e663bad44'])]

df.rename(columns={'sub': 'user'}, inplace=True)
df['mission'] = df['kind'] + '_' + df['TARGET'].astype(str)

df = df[['user', 'mission', 'createdAt', 'kind', 'TARGET', 'performance']]
df = df.groupby('user').filter(lambda x: x['createdAt'].nunique() > 7)
df['createdAt'] = pd.to_datetime(df['createdAt']).dt.date

df['user'] = df['user'].astype('category').cat.codes
df['mission'] = df['mission'].astype('category')
df['missionID'] = df['mission'].cat.codes
df['kind'] = df['kind'].astype('category')

def reward(x):
    if x <= 1:
        return x
    return max(0, 2 - x**2)

df['reward'] = df['performance'].apply(reward)

df.sort_values(by=['createdAt', 'user'], inplace=True, ignore_index=True)
df

Unnamed: 0,user,mission,createdAt,kind,TARGET,performance,missionID,reward
0,4,quiz_2,2024-10-02,quiz,2,2.500000,25,0.0
1,4,episode_3,2024-10-02,episode,3,2.333333,14,0.0
2,4,activity_8,2024-10-02,activity,8,2.125000,10,0.0
3,4,activity_10,2024-10-02,activity,10,1.700000,3,0.0
4,4,exp_50,2024-10-02,exp,50,3.200000,19,0.0
...,...,...,...,...,...,...,...,...
127594,1564,activity_8,2024-11-30,activity,8,0.000000,10,0.0
127595,1564,quiz_3,2024-11-30,quiz,3,0.000000,26,0.0
127596,1566,activity_3,2024-11-30,activity,3,0.000000,5,0.0
127597,1566,episode_5,2024-11-30,episode,5,0.000000,16,0.0


In [None]:
n_users = df['user'].max() + 1
n_missions = df['missionID'].max() + 1

print(n_users, n_missions)

# MABTree

In [None]:
from src.tree import TreeNode

missions = df[['missionID', 'kind', 'TARGET']].drop_duplicates()

root = TreeNode('root')
for name, round in missions.groupby('kind', observed=True):
    node = TreeNode(name)
    root.add_child(node)
    for _, mission in round.iterrows():
        node.add_child(TreeNode(mission.to_dict()))

print(root)

In [3]:
from src import policy as pol
from src import models as mod
from src.tree import TreeBandit
from tqdm.auto import tqdm

def replay(df: pd.DataFrame, policy: pol.Policy, root: TreeNode):
    history = pd.DataFrame()
    tree_bandit = TreeBandit(root, policy)
    for t, round in tqdm(df.groupby('createdAt'), leave=False):
        day_recs = []
        for u in tqdm(round['user'].unique(), leave=False):
            policy.init()
            recs = [{'user': u, 'missionID': m.value['missionID']} for m in tree_bandit.select(n = (3, 1), user=u)]
            day_recs += recs

        actions = round.merge(pd.DataFrame(day_recs), on=['user', 'missionID'], how='inner')
        history = pd.concat((history, actions), ignore_index=True)
        policy.update(train_df=history, day=t)
            
    return history


def evaluate(policy) -> pd.DataFrame:
    rewards = replay(df[['user', 'missionID', 'createdAt', 'reward']], policy, root)
    rewards = rewards.groupby('createdAt')['reward'].sum().cumsum()

    return rewards

In [None]:
import torch
import numpy

torch.manual_seed(0)
numpy.random.seed(0)

policies = {
    '\u03B5-Greedy-MF':      pol.ModelEpsilonGreedy(model=mod.MF(n_users, n_missions, embedding_dim=8)),
    '\u03B5-Greedy-NeuMF':   pol.ModelEpsilonGreedy(model=mod.NeuMF(n_users, n_missions, embedding_dim=8, hidden_dim=8, dropout=0.1)),
    '\u03B5-Greedy-Mean':    pol.MeanEpsilonGreedy(),
    'Random':                pol.RandomBandit(),
}

results = pd.concat([
    pd.concat({name: evaluate(policy) for name, policy in tqdm(policies.items(), leave=False)})
    for _ in tqdm(range(5))
], axis=1)

results
results.to_csv('./out/replay_results.csv', index=True)

# Baseline

In [None]:
missions = df[['missionID', 'kind', 'TARGET']].drop_duplicates().set_index('missionID')
missions

In [None]:
from src import contextual as ctx
from tqdm.auto import tqdm

def recommend(rank: list, missions: pd.DataFrame, n=1) -> list:
    ranked_missions = missions.loc[rank] # ranks missions
    top_missions = ranked_missions.groupby('kind', observed=True, sort=False).head(1) # selects the top mission of each kind
    return top_missions.index.tolist()[:n] # returns the top n missions

def replay(df: pd.DataFrame, policy: ctx.LinUCB):
    history = pd.DataFrame()
    for t, round in tqdm(df.groupby('createdAt'), leave=False):
        day_recs = []
        for u in tqdm(round['user'].unique(), leave=False):
            rank = policy.select(u)
            recs = [{'user': u, 'missionID': rec} for rec in recommend(rank, missions, n=3)]
            day_recs += recs

        actions = round.merge(pd.DataFrame(day_recs), on=['user', 'missionID'], how='inner')
        history = pd.concat((history, actions), ignore_index=True)
        policy.update(train_df=history, day=t)
            
    return history


def evaluate(policy) -> pd.DataFrame:
    rewards = replay(df[['user', 'missionID', 'createdAt', 'reward']], policy)
    rewards = rewards.groupby('createdAt')['reward'].sum().cumsum()

    return rewards

In [None]:
import torch
import numpy

torch.manual_seed(0)
numpy.random.seed(0)

policies = {
    'LinUCB': ctx.LinUCB(n_missions, context_manager=ctx.ContextManager(n_users, n_missions)),
}

results = pd.concat([
    pd.concat({name: evaluate(policy) for name, policy in tqdm(policies.items(), leave=False)})
    for _ in tqdm(range(5))
], axis=1)

results
results.to_csv('./out/replay_results_baseline.csv', index=True)