In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('./October_missions_full.csv')
df['mission'] = df['type'] + '_' + df['target'].astype(str)

df = df[['user', 'mission', 'createdAtT', 'type', 'target', 'completed', 'performance']]
df['createdAtT'] = pd.to_datetime(df['createdAtT'], unit='ms')
df['user'] = df['user'].astype('category').cat.codes
df['mission'] = df['mission'].astype('category')
df['type'] = df['type'].astype('category')

df.sort_values(by=['createdAtT', 'user'], inplace=True, ignore_index=True)
df

Unnamed: 0,user,mission,createdAtT,type,target,completed,performance
0,4299,quiz_2,2024-10-01 00:00:07.698,quiz,2,False,0.000000
1,4299,mobility_1,2024-10-01 00:00:07.698,mobility,1,True,1.000000
2,4299,activity_6,2024-10-01 00:00:07.698,activity,6,False,0.666667
3,2566,mobility_1,2024-10-01 00:00:17.857,mobility,1,False,0.000000
4,2566,episode_3,2024-10-01 00:00:17.857,episode,3,False,0.666667
...,...,...,...,...,...,...,...
91353,4978,mobility_2,2024-10-31 23:55:25.309,mobility,2,False,0.000000
91354,4978,quiz_5,2024-10-31 23:55:25.309,quiz,5,False,0.000000
91355,1593,episode_2,2024-10-31 23:59:41.228,episode,2,False,0.000000
91356,1593,activity_3,2024-10-31 23:59:41.228,activity,3,False,0.000000


In [14]:
summary = df.groupby(['user', 'mission', 'type', 'target'], observed=True).agg(
    completed=('completed', 'sum'),
    failed=('completed', lambda x: x.count() - x.sum()),
    success_ratio=('completed', lambda x: (x.sum() / x.count())), 
    avg_performance=('performance', 'mean'),
).reset_index()    

summary['score'] = np.clip(summary['avg_performance'] * (2 - summary['avg_performance']), a_min=0, a_max=None)  # 1 - (1-x)^2
summary['relevancy'], bins = pd.cut(summary['score'], bins=5, labels=[1, 2, 3, 4, 5], retbins=True)
summary

Unnamed: 0,user,mission,type,target,completed,failed,success_ratio,avg_performance,score,relevancy
0,0,action_2,action,2,0,1,0.0,0.000000,0.000000,1
1,0,activity_4,activity,4,1,0,1.0,1.000000,1.000000,5
2,0,activity_9,activity,9,0,1,0.0,0.555556,0.802469,5
3,0,episode_1,episode,1,1,0,1.0,1.000000,1.000000,5
4,0,episode_2,episode,2,0,1,0.0,0.000000,0.000000,1
...,...,...,...,...,...,...,...,...,...,...
53584,7276,quiz_2,quiz,2,1,1,0.5,0.500000,0.750000,4
53585,7276,quiz_3,quiz,3,1,1,0.5,0.500000,0.750000,4
53586,7276,quiz_4,quiz,4,0,2,0.0,0.375000,0.609375,4
53587,7276,quiz_5,quiz,5,0,1,0.0,0.400000,0.640000,4


In [18]:
user = np.random.choice(summary['user'].unique())

user_summary: pd.DataFrame = summary[summary['user'] == user]

display(user_summary.sort_values(by=['relevancy'], ascending=False))

Unnamed: 0,user,mission,type,target,completed,failed,success_ratio,avg_performance,score,relevancy
43557,5997,action_1,action,1,1,0,1.0,1.0,1.0,5
43559,5997,activity_1,activity,1,4,0,1.0,1.0,1.0,5
43583,5997,streak_1,streak,1,2,0,1.0,1.0,1.0,5
43560,5997,activity_2,activity,2,0,2,0.0,0.5,0.75,4
43558,5997,action_2,action,2,0,3,0.0,0.5,0.75,4
43561,5997,activity_3,activity,3,1,4,0.2,0.533333,0.782222,4
43563,5997,activity_6,activity,6,0,2,0.0,0.25,0.4375,3
43562,5997,activity_5,activity,5,0,2,0.0,0.2,0.36,2
43574,5997,mobility_2,mobility,2,0,4,0.0,0.0,0.0,1
43582,5997,quiz_6,quiz,6,0,5,0.0,0.0,0.0,1
