In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('./October_missions_full.csv')
df['mission'] = df['type'] + '_' + df['target'].astype(str)

df = df[['user', 'mission', 'createdAtT', 'type', 'target', 'completed', 'performance']]
df['createdAtT'] = pd.to_datetime(df['createdAtT'], unit='ms')
df['user'] = df['user'].astype('category').cat.codes
df['mission'] = df['mission'].astype('category')
df['type'] = df['type'].astype('category')

df['score'] = np.clip(1 - np.pow(1 - df['performance'], 2), 0, None) 

df.sort_values(by=['createdAtT', 'user'], inplace=True, ignore_index=True)
df

Unnamed: 0,user,mission,createdAtT,type,target,completed,performance,score
0,4299,quiz_2,2024-10-01 00:00:07.698,quiz,2,False,0.000000,0.000000
1,4299,mobility_1,2024-10-01 00:00:07.698,mobility,1,True,1.000000,1.000000
2,4299,activity_6,2024-10-01 00:00:07.698,activity,6,False,0.666667,0.888889
3,2566,mobility_1,2024-10-01 00:00:17.857,mobility,1,False,0.000000,0.000000
4,2566,episode_3,2024-10-01 00:00:17.857,episode,3,False,0.666667,0.888889
...,...,...,...,...,...,...,...,...
91353,4978,mobility_2,2024-10-31 23:55:25.309,mobility,2,False,0.000000,0.000000
91354,4978,quiz_5,2024-10-31 23:55:25.309,quiz,5,False,0.000000,0.000000
91355,1593,episode_2,2024-10-31 23:59:41.228,episode,2,False,0.000000,0.000000
91356,1593,activity_3,2024-10-31 23:59:41.228,activity,3,False,0.000000,0.000000


In [2]:
summary = df.groupby(['user', 'mission'], observed=True).agg(
    completed=('completed', 'sum'),
    failed=('completed', lambda x: x.count() - x.sum()),
    success_ratio=('completed', lambda x: (x.sum() / x.count())), 
    avg_performance=('performance', 'mean'),
    avg_score=('score', 'mean'),
).reset_index()    

summary

Unnamed: 0,user,mission,completed,failed,success_ratio,avg_performance,avg_score
0,0,action_2,0,1,0.0,0.000000,0.000000
1,0,activity_4,1,0,1.0,1.000000,1.000000
2,0,activity_9,0,1,0.0,0.555556,0.802469
3,0,episode_1,1,0,1.0,1.000000,1.000000
4,0,episode_2,0,1,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...
53584,7276,quiz_2,1,1,0.5,0.500000,0.500000
53585,7276,quiz_3,1,1,0.5,0.500000,0.500000
53586,7276,quiz_4,0,2,0.0,0.375000,0.593750
53587,7276,quiz_5,0,1,0.0,0.400000,0.640000


In [5]:
user = np.random.choice(summary['user'].unique())

user_summary = summary[summary['user'] == user]
user_summary.sort_values(by='avg_score', ascending=False)

Unnamed: 0,user,mission,completed,failed,success_ratio,avg_performance,avg_score
43037,5923,activity_2,1,0,1.0,1.0,1.0
43054,5923,mobility_3,0,1,0.0,0.666667,0.888889
43040,5923,activity_5,1,1,0.5,0.7,0.82
43053,5923,mobility_2,1,2,0.333333,0.5,0.583333
43052,5923,mobility_1,2,2,0.5,0.5,0.5
43061,5923,streak_1,2,2,0.5,0.5,0.5
43042,5923,activity_8,0,3,0.0,0.166667,0.25
43036,5923,activity_1,1,3,0.25,0.25,0.25
43041,5923,activity_7,0,3,0.0,0.142857,0.22449
43038,5923,activity_3,0,4,0.0,0.166667,0.222222
