In [1]:
import pandas as pd

df = pd.concat((
    pd.read_csv('./data/October_missions_full.csv'),
    pd.read_csv('./data/November_1stW_missions_full.csv')
), ignore_index=True)

df['mission'] = df['type'] + '_' + df['target'].astype(str)

df = df[['user', 'mission', 'createdAtT', 'type', 'target', 'performance']]
df['createdAtT'] = pd.to_datetime(df['createdAtT'], unit='ms').dt.date
df = df.groupby('user').filter(lambda x: len(x['createdAtT'].unique()) > 2)

df['user'] = df['user'].astype('category').cat.codes
df['mission'] = df['mission'].astype('category')
df['missionID'] = df['mission'].cat.codes
df['type'] = df['type'].astype('category')

def reward(x):
    if x <= 1:
        return x
    return max(0, 2 - x**2)

df['reward'] = df['performance'].apply(reward)
df.rename(columns={'createdAtT': 'date'}, inplace=True)

df.sort_values(by=['date', 'user'], inplace=True, ignore_index=True)
display(df)

n_users = df['user'].nunique()
n_missions = df['mission'].nunique()

n_users, n_missions

Unnamed: 0,user,mission,date,type,target,performance,missionID,reward
0,0,action_1,2024-10-01,action,1,1.000000,0,1.000000
1,0,quiz_5,2024-10-01,quiz,5,1.000000,28,1.000000
2,0,exp_100,2024-10-01,exp,100,1.200000,18,0.560000
3,2,episode_1,2024-10-01,episode,1,1.000000,12,1.000000
4,2,quiz_1,2024-10-01,quiz,1,1.000000,24,1.000000
...,...,...,...,...,...,...,...,...
87477,2043,activity_7,2024-11-07,activity,7,0.000000,9,0.000000
87478,2043,quiz_1,2024-11-07,quiz,1,0.000000,24,0.000000
87479,2049,mobility_3,2024-11-07,mobility,3,0.000000,22,0.000000
87480,2049,quiz_3,2024-11-07,quiz,3,0.333333,26,0.333333


(2050, 31)

In [2]:
df.drop_duplicates(subset=['user', 'mission'], keep='last', inplace=True, ignore_index=True)
display(df)

test_df = df[df['date'] == df['date'].max()]
train_df = df.drop(test_df.index)

display(train_df)
display(test_df)

Unnamed: 0,user,mission,date,type,target,performance,missionID,reward
0,0,action_1,2024-10-01,action,1,1.000000,0,1.000000
1,0,quiz_5,2024-10-01,quiz,5,1.000000,28,1.000000
2,2,episode_1,2024-10-01,episode,1,1.000000,12,1.000000
3,2,quiz_1,2024-10-01,quiz,1,1.000000,24,1.000000
4,2,mobility_3,2024-10-01,mobility,3,0.666667,22,0.666667
...,...,...,...,...,...,...,...,...
38199,2043,activity_7,2024-11-07,activity,7,0.000000,9,0.000000
38200,2043,quiz_1,2024-11-07,quiz,1,0.000000,24,0.000000
38201,2049,mobility_3,2024-11-07,mobility,3,0.000000,22,0.000000
38202,2049,quiz_3,2024-11-07,quiz,3,0.333333,26,0.333333


Unnamed: 0,user,mission,date,type,target,performance,missionID,reward
0,0,action_1,2024-10-01,action,1,1.000000,0,1.000000
1,0,quiz_5,2024-10-01,quiz,5,1.000000,28,1.000000
2,2,episode_1,2024-10-01,episode,1,1.000000,12,1.000000
3,2,quiz_1,2024-10-01,quiz,1,1.000000,24,1.000000
4,2,mobility_3,2024-10-01,mobility,3,0.666667,22,0.666667
...,...,...,...,...,...,...,...,...
35753,2045,activity_1,2024-11-06,activity,1,0.000000,2,0.000000
35754,2045,quiz_4,2024-11-06,quiz,4,0.000000,27,0.000000
35755,2049,episode_3,2024-11-06,episode,3,0.333333,14,0.333333
35756,2049,activity_6,2024-11-06,activity,6,1.000000,8,1.000000


Unnamed: 0,user,mission,date,type,target,performance,missionID,reward
35758,1,action_1,2024-11-07,action,1,1.000000,0,1.000000
35759,1,activity_8,2024-11-07,activity,8,1.000000,10,1.000000
35760,1,exp_100,2024-11-07,exp,100,0.750000,18,0.750000
35761,4,episode_1,2024-11-07,episode,1,1.000000,12,1.000000
35762,4,mobility_3,2024-11-07,mobility,3,1.000000,22,1.000000
...,...,...,...,...,...,...,...,...
38199,2043,activity_7,2024-11-07,activity,7,0.000000,9,0.000000
38200,2043,quiz_1,2024-11-07,quiz,1,0.000000,24,0.000000
38201,2049,mobility_3,2024-11-07,mobility,3,0.000000,22,0.000000
38202,2049,quiz_3,2024-11-07,quiz,3,0.333333,26,0.333333


In [3]:
from src import models as m

mf = m.MF(n_users, n_missions, embedding_dim=8).fit(train_df, lr=0.001, epochs=15, weight_decay=1e-4)
autorec = m.AutoRec(d=n_missions, k=16, dropout=0.1).fit(train_df, lr=0.0001, epochs=100, weight_decay=1e-4)
mlp = m.MLP(n_users, n_missions, embedding_dim=16, hidden_dim=32, dropout=0.1).fit(train_df, lr=0.001, epochs=20, weight_decay=1e-5)

  0%|          | 0/15 [00:00<?, ?it/s]

Final loss: 0.052093904834467256


  0%|          | 0/100 [00:00<?, ?it/s]

Final loss: 0.054488312854216646


  0%|          | 0/20 [00:00<?, ?it/s]

Final loss: 0.06306093929698674


In [4]:
import torch
import numpy as np

test_df['autorec'] = autorec.predict(
    torch.tensor(test_df['user'].values, dtype=torch.long),
    torch.tensor(test_df['missionID'].values, dtype=torch.long)
).cpu().detach().numpy()

test_df['mf'] = mf(
    torch.tensor(test_df['user'].values, dtype=torch.long),
    torch.tensor(test_df['missionID'].values, dtype=torch.long)
).cpu().detach().numpy()

test_df['mlp'] = mlp(
    torch.tensor(test_df['user'].values, dtype=torch.long),
    torch.tensor(test_df['missionID'].values, dtype=torch.long)
).cpu().detach().numpy()

test_df['autorec'] = np.clip(test_df['autorec'], a_min=0, a_max=None)
test_df['mf'] = np.clip(test_df['mf'], a_min=0, a_max=None)
test_df['mlp'] = np.clip(test_df['mlp'], a_min=0, a_max=None)

display(test_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['autorec'] = autorec.predict(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['mf'] = mf(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['mlp'] = mlp(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

S

Unnamed: 0,user,mission,date,type,target,performance,missionID,reward,autorec,mf,mlp
35758,1,action_1,2024-11-07,action,1,1.000000,0,1.000000,0.703925,1.568632,0.988914
35759,1,activity_8,2024-11-07,activity,8,1.000000,10,1.000000,0.642404,0.763437,0.890753
35760,1,exp_100,2024-11-07,exp,100,0.750000,18,0.750000,0.487858,0.657557,0.432683
35761,4,episode_1,2024-11-07,episode,1,1.000000,12,1.000000,0.809174,0.830259,0.954841
35762,4,mobility_3,2024-11-07,mobility,3,1.000000,22,1.000000,0.158183,0.075821,0.136447
...,...,...,...,...,...,...,...,...,...,...,...
38199,2043,activity_7,2024-11-07,activity,7,0.000000,9,0.000000,0.253497,0.258476,0.283198
38200,2043,quiz_1,2024-11-07,quiz,1,0.000000,24,0.000000,0.226557,0.376103,0.029994
38201,2049,mobility_3,2024-11-07,mobility,3,0.000000,22,0.000000,0.055343,0.143017,0.037506
38202,2049,quiz_3,2024-11-07,quiz,3,0.333333,26,0.333333,0.190988,0.760945,0.894267


In [5]:
from sklearn.metrics import mean_squared_error

pd.DataFrame({
    'Model': ['AutoRec', 'MF', 'MLP'],
    'MSE': [
        mean_squared_error(test_df['reward'], test_df['autorec']),
        mean_squared_error(test_df['reward'], test_df['mf']),
        mean_squared_error(test_df['reward'], test_df['mlp'])
    ]
})


Unnamed: 0,Model,MSE
0,AutoRec,0.113442
1,MF,0.128569
2,MLP,0.122445
