In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('./data/bq-results-20241223-153559-1734968168534.csv')
df = df[~df['missionCatalog_id'].isin(['f1455712-fa2b-4feb-b7f9-ab8ddfa29e8d', 'd090d147-1ac9-4963-9c2b-1f5e663bad44'])]

df.rename(columns={'sub': 'user', 'missionCatalog_id': 'missionID'}, inplace=True)
df['mission'] = df['kind'] + '_' + df['TARGET'].astype(str)

df = df[['user', 'missionID', 'createdAt', 'kind', 'TARGET', 'performance']]
df = df.groupby('user').filter(lambda x: x['createdAt'].nunique() > 7)
df['createdAt'] = pd.to_datetime(df['createdAt']).dt.date

df['user'] = df['user'].astype('category').cat.codes
df['missionID'] = df['missionID'].astype('category').cat.codes
df['kind'] = df['kind'].astype('category')

def reward(x):
    if x <= 1:
        return x
    return max(0, 2 - np.pow(x, 2))

df['reward'] = df['performance'].apply(reward)

df.sort_values(by=['createdAt', 'user'], inplace=True, ignore_index=True)
df

Unnamed: 0,user,missionID,createdAt,kind,TARGET,performance,reward
0,4,26,2024-10-02,quiz,2,2.500000,0.0
1,4,28,2024-10-02,episode,3,2.333333,0.0
2,4,6,2024-10-02,activity,8,2.125000,0.0
3,4,14,2024-10-02,activity,10,1.700000,0.0
4,4,18,2024-10-02,exp,50,3.200000,0.0
...,...,...,...,...,...,...,...
186390,1883,12,2024-12-22,episode,5,1.600000,0.0
186391,1883,18,2024-12-22,exp,50,1.700000,0.0
186392,1884,0,2024-12-22,episode,2,2.500000,0.0
186393,1884,10,2024-12-22,quiz,1,126.000000,0.0


In [2]:
n_users = df['user'].max() + 1
n_missions = df['missionID'].max() + 1

print(n_users, n_missions)

1898 30


In [3]:
from src.tree import TreeNode

missions = df[['missionID', 'kind', 'TARGET']].drop_duplicates()

root = TreeNode('root')
for name, round in missions.groupby('kind', observed=True):
    node = TreeNode(name)
    root.add_child(node)
    for _, mission in round.iterrows():
        node.add_child(TreeNode(mission.to_dict()))

print(root)

missions = missions.set_index('missionID')
print(missions)

root
	action
		{'missionID': 13, 'kind': 'action', 'TARGET': 1}
		{'missionID': 19, 'kind': 'action', 'TARGET': 2}
	activity
		{'missionID': 6, 'kind': 'activity', 'TARGET': 8}
		{'missionID': 14, 'kind': 'activity', 'TARGET': 10}
		{'missionID': 3, 'kind': 'activity', 'TARGET': 9}
		{'missionID': 17, 'kind': 'activity', 'TARGET': 2}
		{'missionID': 5, 'kind': 'activity', 'TARGET': 5}
		{'missionID': 24, 'kind': 'activity', 'TARGET': 4}
		{'missionID': 1, 'kind': 'activity', 'TARGET': 3}
		{'missionID': 27, 'kind': 'activity', 'TARGET': 1}
		{'missionID': 7, 'kind': 'activity', 'TARGET': 7}
		{'missionID': 22, 'kind': 'activity', 'TARGET': 6}
	episode
		{'missionID': 28, 'kind': 'episode', 'TARGET': 3}
		{'missionID': 0, 'kind': 'episode', 'TARGET': 2}
		{'missionID': 12, 'kind': 'episode', 'TARGET': 5}
		{'missionID': 16, 'kind': 'episode', 'TARGET': 6}
		{'missionID': 8, 'kind': 'episode', 'TARGET': 4}
		{'missionID': 2, 'kind': 'episode', 'TARGET': 1}
	exp
		{'missionID': 18, 'kind'

In [4]:
from src import policy as pol
from src.tree import TreeBandit
from tqdm.auto import tqdm
 
class MABRecommender:
    def __init__(self, policy: pol.Policy):
        self.policy = policy

    def recommend(self):
        pass

class MABTreeRecommender(MABRecommender):
    def __init__(self, policy: pol.Policy, root: TreeNode):
        super().__init__(policy)
        self.root = root
        self.tree_bandit = TreeBandit(self.root, self.policy)
    
    def recommend(self, user, n=(3, 1)):
        recs = self.tree_bandit.select(n=n, user=user)
        return [m.value['missionID'] for m in recs]

class MABPolicyRecommender(MABRecommender):
    def __init__(self, policy: pol.Policy, missions: pd.DataFrame):
        super().__init__(policy)
        self.missions = missions
    
    def recommend(self, user, n=3):
        rank = self.policy.select(user)
        ranked_missions = self.missions.loc[rank] # ranks missions
        top_missions = ranked_missions.groupby('kind', observed=True, sort=False).head(1) # selects the top mission of each kind
        return top_missions.index.tolist()[:n] # returns the top n missions


def replay(df: pd.DataFrame, recommeder: MABRecommender):
    history = pd.DataFrame()
    policy_recommendations = []

    recommeder.policy.init()
    for t, round in tqdm(df.groupby('createdAt'), leave=False):
        day_recs = []
        for u in tqdm(round['user'].unique(), leave=False):
            user_recs = [{'user': u, 'missionID': m} for m in recommeder.recommend(u)]
            day_recs += user_recs
        
        policy_recommendations += day_recs
        actions = round.merge(pd.DataFrame(day_recs), on=['user', 'missionID'], how='inner')
        history = pd.concat((history, actions), ignore_index=True)
        recommeder.policy.update(train_df=history, day=t)
    
    return history, pd.DataFrame(policy_recommendations)

In [5]:
def avg_coverage(df: pd.DataFrame):
    return df.groupby('user')['missionID'].nunique().apply(lambda x: x / n_missions).mean()

def evaluate(recommender: MABRecommender):
    history, policy_recommendations = replay(df[['user', 'missionID', 'createdAt', 'reward']], recommender)

    cumulated_reward = history.groupby('createdAt')['reward'].sum().cumsum()
    return cumulated_reward, avg_coverage(policy_recommendations)

In [6]:
import torch
import numpy
from src import models as mod
from src import baseline as base

torch.manual_seed(0)
numpy.random.seed(0)

policies = {
    'MABTree Random': MABTreeRecommender(pol.MABTReeRandom(), root),
    'MABTree UCB': MABTreeRecommender(pol.MABTreeUCB(), root),
    'MABTree 0.1-greedy': MABTreeRecommender(pol.MABTreeEpsilonGreedy(0.1), root),
    'MABTree 0.1-greedy MF': MABTreeRecommender(pol.MABTreeEpsilonGreedyML(model_class=mod.MF, num_users=n_users, num_missions=n_missions, embedding_dim=8), root),
    '0.1-greedy': MABPolicyRecommender(base.EpsilonGreedy(num_arms=n_missions, epsilon=0.1), missions),
    'UCB': MABPolicyRecommender(base.UCB1(num_arms=n_missions), missions),
    'LinUCB': MABPolicyRecommender(base.LinUCB(num_arms=n_missions, context_manager=base.ContextManager(n_users, n_missions)), missions),
}

In [None]:
metrics = [
    {name: evaluate(recommender) for name, recommender in tqdm(policies.items(), leave=False)}
    for _ in tqdm(range(10))
]

In [8]:
# Cumulated reward dataframe

pd.concat([
    pd.concat({
        name: reward for name, (reward, _) in metric.items()
    }) for metric in metrics
], axis=1).to_csv('./out/cumulated_reward.csv', index=True)

Unnamed: 0,Unnamed: 1,0,1,0.1,1.1,0.2,1.2,0.3,1.3,0.4,1.4,0.5,1.5,0.6,1.6,0.7,1.7,0.8,1.8,0.9,1.9
MABTree Random,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
MABTree Random,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
MABTree UCB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
MABTree UCB,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
MABTree 0.1-greedy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
MABTree 0.1-greedy,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
MABTree 0.1-greedy MF,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
MABTree 0.1-greedy MF,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0.1-greedy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0.1-greedy,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [9]:
# Coverage dataframe

pd.DataFrame([
    {name: coverage for name, (_, coverage) in metric.items()}
    for metric in metrics
]).to_csv('./out/coverage.csv', index=True)