In [None]:
import pandas as pd

df = pd.concat((
    pd.read_csv('./data/October_missions_full.csv'),
), ignore_index=True)

df['mission'] = df['type'] + '_' + df['target'].astype(str)

df = df[['user', 'mission', 'createdAtT', 'type', 'target', 'performance']]
df['createdAtT'] = pd.to_datetime(df['createdAtT'], unit='ms').dt.date
df = df.groupby('user').filter(lambda x: len(x['createdAtT'].unique()) > 2)

df['user'] = df['user'].astype('category').cat.codes
df['mission'] = df['mission'].astype('category')
df['missionID'] = df['mission'].cat.codes
df['type'] = df['type'].astype('category')
df['performance'] = df['performance'].apply(lambda x: 0.7 <= x <= 1.05).astype(int)

df.sort_values(by=['createdAtT', 'user'], inplace=True, ignore_index=True)
df

In [None]:
from src.tree import TreeNode

missions = df[['missionID', 'type', 'target']].drop_duplicates()
root = TreeNode('root')
for name, group in missions.groupby('type', observed=True):
    node = TreeNode(name)
    root.add_child(node)
    for _, mission in group.iterrows():
        node.add_child(TreeNode(mission.to_dict()))

print(root)

In [None]:
import torch
import numpy as np
import torch.nn as nn
from scipy.special import softmax
from src.policy import Policy
from src import mf

class EpsilonGreedyWithModel(Policy):
    def __init__(self, model: nn.Module, epsilon=0.1):
        super().__init__()
        self.model = model
        self.epsilon = epsilon
        self.__round_stats = {}
    
    def init(self, **kwargs):
        """Reset the round stats"""
        self.__round_stats = {}

    def select(self, nodes, n, **kwargs):
        user = kwargs['user']
        selectable_nodes = {node: self.__retrieve_node_data(user, node) for node in nodes}
        selected_nodes = set()

        for _ in range(n):
            if np.random.rand() < self.epsilon:
                node = np.random.choice(list(selectable_nodes.keys()))
            else:
                node = max(selectable_nodes, key=selectable_nodes.get)
            selected_nodes.add(node)
            selectable_nodes.pop(node)
        
        return selected_nodes
    
    def __retrieve_node_data(self, user, node: TreeNode):
        # calculate the expected reward of the node for this round if not already calculated
        if node not in self.__round_stats and node.is_leaf:
            ids = torch.tensor([node.value['missionID']], dtype=torch.long, device=mf.DEVICE)
            user = torch.tensor([user], dtype=torch.long, device=mf.DEVICE)
            data = self.model(user, ids).item()
            self.__round_stats[node] = data
        
        if node.is_leaf:
            return self.__round_stats[node]
        
        return max(self.__retrieve_node_data(user, child) for child in node.children)
    
    def update(self, **kwargs):
        train_df: pd.DataFrame = kwargs['train_df']
        dataset = mf.MissionDataset(train_df['missionID'].values, train_df['user'].values, train_df['performance'].values)
        self.model = mf.fit(self.model, dataset)

    
class SoftmaxBandit(Policy):
    def __init__(self, model: nn.Module):
        super().__init__()
        self.model = model
        self.__round_stats = {}

    def init(self, **kwargs):
        """Reset the round stats"""
        self.__round_stats = {}

    def select(self, nodes, n, **kwargs):
        user = kwargs['user']
        p = softmax([self.__retrieve_node_data(user, node) for node in nodes])
        selected_nodes = np.random.choice(nodes, n, p=p, replace=False)
        return selected_nodes
    
    def __retrieve_node_data(self, user, node: TreeNode):
        # calculate the expected reward of the node for this round if not already calculated
        if node not in self.__round_stats and node.is_leaf:
            ids = torch.tensor([node.value['missionID']], dtype=torch.long, device=mf.DEVICE)
            user = torch.tensor([user], dtype=torch.long, device=mf.DEVICE)
            data = self.model(user, ids).item()
            self.__round_stats[node] = data
        
        if node.is_leaf:
            return self.__round_stats[node]
        
        return max(self.__retrieve_node_data(user, child) for child in node.children)
    
    def update(self, **kwargs):
        train_df: pd.DataFrame = kwargs['train_df']
        dataset = mf.MissionDataset(train_df['missionID'].values, train_df['user'].values, train_df['performance'].values)
        self.model = mf.fit(self.model, dataset)