In [1]:
import pandas as pd

df = pd.concat((
    pd.read_csv('./data/October_missions_full.csv'),
    pd.read_csv('./data/November_1stW_missions_full.csv')
), ignore_index=True)

df['mission'] = df['type'] + '_' + df['target'].astype(str)

df = df[['user', 'mission', 'createdAtT', 'type', 'target', 'performance']]
df['createdAtT'] = pd.to_datetime(df['createdAtT'], unit='ms').dt.date
df = df.groupby('user').filter(lambda x: len(x['createdAtT'].unique()) > 1)

df['user'] = df['user'].astype('category').cat.codes
df['mission'] = df['mission'].astype('category')
df['missionID'] = df['mission'].cat.codes
df['type'] = df['type'].astype('category')
df['performance'] = df['performance'].apply(lambda x: 0.7 <= x <= 1.05).astype(int)
df.rename(columns={'createdAtT': 'date', 'performance': 'reward'}, inplace=True)

df.sort_values(by=['date', 'user'], inplace=True, ignore_index=True)
df

Unnamed: 0,user,mission,date,type,target,reward,missionID
0,0,episode_2,2024-10-01,episode,2,0,13
1,0,action_2,2024-10-01,action,2,0,1
2,0,episode_1,2024-10-01,episode,1,1,12
3,0,quiz_1,2024-10-01,quiz,1,1,24
4,0,quiz_4,2024-10-01,quiz,4,1,27
...,...,...,...,...,...,...,...
93879,2960,activity_7,2024-11-07,activity,7,0,9
93880,2960,quiz_1,2024-11-07,quiz,1,0,24
93881,2968,mobility_3,2024-11-07,mobility,3,0,22
93882,2968,quiz_3,2024-11-07,quiz,3,0,26


In [2]:
n_users = df['user'].nunique()
n_missions = df['mission'].nunique()

n_users, n_missions

(2969, 31)

In [3]:
from src.tree import TreeNode

missions = df[['missionID', 'type', 'target']].drop_duplicates()
root = TreeNode('root')
for name, group in missions.groupby('type', observed=True):
    node = TreeNode(name)
    root.add_child(node)
    for _, mission in group.iterrows():
        node.add_child(TreeNode(mission.to_dict()))

print(root)

root
	action
		{'missionID': 1, 'type': 'action', 'target': 2}
		{'missionID': 0, 'type': 'action', 'target': 1}
	activity
		{'missionID': 11, 'type': 'activity', 'target': 9}
		{'missionID': 10, 'type': 'activity', 'target': 8}
		{'missionID': 7, 'type': 'activity', 'target': 5}
		{'missionID': 2, 'type': 'activity', 'target': 1}
		{'missionID': 3, 'type': 'activity', 'target': 10}
		{'missionID': 9, 'type': 'activity', 'target': 7}
		{'missionID': 5, 'type': 'activity', 'target': 3}
		{'missionID': 8, 'type': 'activity', 'target': 6}
		{'missionID': 6, 'type': 'activity', 'target': 4}
		{'missionID': 4, 'type': 'activity', 'target': 2}
	episode
		{'missionID': 13, 'type': 'episode', 'target': 2}
		{'missionID': 12, 'type': 'episode', 'target': 1}
		{'missionID': 16, 'type': 'episode', 'target': 5}
		{'missionID': 17, 'type': 'episode', 'target': 6}
		{'missionID': 15, 'type': 'episode', 'target': 4}
		{'missionID': 14, 'type': 'episode', 'target': 3}
	exp
		{'missionID': 18, 'type': 

In [4]:
from src.bandit.policy import Policy
from src.tree_bandit import TreeBandit
from tqdm.auto import tqdm

def replay_score(history: pd.DataFrame, df: pd.DataFrame, recs: pd.DataFrame):
    actions = df.merge(recs, on=['user', 'missionID'], how='inner')
    history = pd.concat((history, actions), ignore_index=True)
    return history, len(actions) / len(df)


def replay(df: pd.DataFrame, policy: Policy, root: TreeNode):
    history = pd.DataFrame()
    tree_bandit = TreeBandit(root, policy)
    for _, group in tqdm(df.groupby('date')):
        day_recs = pd.DataFrame()
        for u in tqdm(group['user'].unique(), leave=False):
            recs = [{'user': u, 'missionID': m.value['missionID']} for m in tree_bandit.select(n = (3, 1), user=u)]
            recs = pd.DataFrame(recs)
            day_recs = pd.concat((day_recs, recs), ignore_index=True)
        
        history, _ = replay_score(history, group, day_recs)
        policy.update(train_df=history)
            
    return history

In [None]:
def evaluate(policy):
    rewards = replay(df[['user', 'missionID', 'date', 'reward']], policy, root)
    rewards = rewards.groupby('date').agg({'reward': ('sum', 'mean')})
    rewards['reward', 'sum'] = rewards['reward', 'sum'].cumsum()

    return rewards

In [None]:
from src.bandit.softmax import SoftmaxBandit
from src.bandit.model_greedy import EpsilonGreedyWithModel
from src.models.mf import MissionMatrixFactorization

policies = [
    ('MF-Softmax', SoftmaxBandit(model=MissionMatrixFactorization(n_users, n_missions, 8).to('cuda'))),
    ('MF-Random', EpsilonGreedyWithModel(model=MissionMatrixFactorization(n_users, n_missions, 8).to('cuda'), epsilon=0)),
    ('MF-only', EpsilonGreedyWithModel(model=MissionMatrixFactorization(n_users, n_missions, 8).to('cuda'), epsilon=1))
]