In [None]:
import pandas as pd

df = pd.concat((
    pd.read_csv('./data/October_missions_full.csv'),
    pd.read_csv('./data/November_1stW_missions_full.csv')
), ignore_index=True)

df['mission'] = df['type'] + '_' + df['target'].astype(str)

df = df[['user', 'mission', 'createdAtT', 'type', 'target', 'performance']]
df['createdAtT'] = pd.to_datetime(df['createdAtT'], unit='ms').dt.date
df = df.groupby('user').filter(lambda x: len(x['createdAtT'].unique()) > 2)

df['user'] = df['user'].astype('category').cat.codes
df['mission'] = df['mission'].astype('category')
df['missionID'] = df['mission'].cat.codes
df['type'] = df['type'].astype('category')

def reward(x):
    if x <= 1:
        return x
    return max(0, 2 - x**2)

df['reward'] = df['performance'].apply(reward)
df.rename(columns={'createdAtT': 'date'}, inplace=True)

df.sort_values(by=['date', 'user'], inplace=True, ignore_index=True)
display(df)

In [None]:
import torch
import numpy as np
from src import models as m
from sklearn.metrics import mean_squared_error

df.drop_duplicates(subset=['user', 'mission'], keep='last', inplace=True, ignore_index=True)
def fold(d):
    test_df: pd.DataFrame = df[df['date'] == d]
    train_df = df[df['date'] < d]

    # Remove users from test set that are not in the training set
    test_df = test_df[test_df['user'].isin(train_df['user'])]

    n_users = train_df['user'].max() + 1
    n_missions = train_df['missionID'].max() + 1

    mf = m.MF(n_users, n_missions, embedding_dim=8).fit(train_df, lr=0.001, epochs=10, weight_decay=1e-4)
    autorec = m.AutoRec(d=n_missions, k=16, dropout=0.1).fit(train_df, lr=0.0001, epochs=100, weight_decay=1e-4)
    mlp = m.MLP(n_users, n_missions, embedding_dim=16, hidden_dim=32, dropout=0.1).fit(train_df, lr=0.001, epochs=10, weight_decay=1e-4)

    y_hat_autorec = np.clip(autorec.predict(
        torch.tensor(test_df['user'].values, dtype=torch.long),
        torch.tensor(test_df['missionID'].values, dtype=torch.long)
    ).cpu().detach().numpy(), a_min=0, a_max=None)

    y_hat_mf = np.clip(mf(
        torch.tensor(test_df['user'].values, dtype=torch.long),
        torch.tensor(test_df['missionID'].values, dtype=torch.long)
    ).cpu().detach().numpy(), a_min=0, a_max=None)

    y_hat_mlp = np.clip(mlp(
        torch.tensor(test_df['user'].values, dtype=torch.long),
        torch.tensor(test_df['missionID'].values, dtype=torch.long)
    ).cpu().detach().numpy(), a_min=0, a_max=None)

    return pd.DataFrame(
        index=['AutoRec', 'MF', 'MLP'],
        columns=[str(d)],
        data=[
            mean_squared_error(test_df['performance'], y_hat_autorec),
            mean_squared_error(test_df['performance'], y_hat_mf),
            mean_squared_error(test_df['performance'], y_hat_mlp)
        ],
    )

In [None]:
from tqdm.auto import tqdm

torch.manual_seed(0)
np.random.seed(0)

pd.concat((
    fold(d.date()) for d in tqdm(pd.date_range('2024-11-01', '2024-11-07'))
), axis=1)