In [37]:
import json
import pandas as pd
import numpy as np

In [84]:
d = json.load(open('/Users/stefan/Downloads/phase1-data.json'))

In [86]:
del d['Stefan, Wed Nov 14 2018 19:17:18 GMT-0800 (Pacific Standard Time)']

In [128]:
def iterate_replays(variant_d, key):
    for replay in [k for k in variant_d if k.startswith('replay')]:
        if key in variant_d[replay]:
            yield variant_d[replay][key]

In [161]:
variant_rows = []
user_rows = []

for user in d:
    u = user.split(',')[0]

    affective_variant = 3 if d[user]['variant-3']['replay-1']['game_type'] == 'Affective' else 4
    
    for variant in [k for k in d[user] if k.startswith('variant')]:
        ratings = d[user][variant]['ratings']

        scores = [
            len(xs)
            for xs in iterate_replays(d[user][variant], 'x')
        ]
        replay_durations = [
            (int(times[-1]) - int(times[0])) / 1000
            for times in iterate_replays(d[user][variant], 'time')
        ]
        
        
        variant_rows.append({
            'user': u,
            'variant': int(variant[-1]),
            
            'max_score': max(scores),
            'avg_score': sum(scores) / len(scores),
            'total_duration': sum(replay_durations),
            **ratings
        })
    
    user_rows.append({
        'user': u,
        'rated_best_variant': d[user]['subjectInfo']['bestVariant'],
        'affective_variant': affective_variant,  # it is the same across all replays, so we just look at the first one
        'affective_variant_first': affective_variant == 3,
    })

variant_df = pd.DataFrame(variant_rows)
user_df = pd.DataFrame(user_rows).set_index('user')

In [182]:
variant_df

Unnamed: 0,avg_score,difficulty,enjoyment,max_score,total_duration,user,valence,variant
0,15.333333,3,5,17,57.231,Anupriya,4,1
1,7.666667,5,5,11,30.487,Anupriya,5,2
2,7.5,4,5,10,42.732,Anupriya,5,3
3,6.6,5,4,8,51.013,Anupriya,4,4
4,23.333333,3,4,25,86.746,Daniel,4,1
5,9.666667,5,4,10,32.377,Daniel,3,2
6,18.8,4,4,24,133.398,Daniel,4,3
7,14.333333,4,4,16,53.78,Daniel,3,4
8,25.5,3,4,30,63.591,Jared Zhao,4,1
9,8.0,5,5,10,16.074,Jared Zhao,3,2


In [187]:
for user, user_rows in variant_df.groupby('user'):
    highest_score_rows = user_rows[user_rows.max_score == user_rows.max_score.max()]
    highest_score = highest_score_rows.max_score.iloc[0]
    highest_variant = highest_score_rows.variant.iloc[0]
    if len(highest_score_rows) > 1:
        print(f'{user} has the same highest score ({highest_score}) on multiple variants: {highest_score_rows.variant.tolist()}')
        
    user_df.loc[user, 'highest_score_variant'] = highest_variant
    
    affective_variant_row = user_rows[user_rows.variant == user_df.loc[user].affective_variant].iloc[0]
    classic_variant = 3 + 4-affective_variant_row.variant
    classic_variant_row = user_rows[user_rows.variant == classic_variant].iloc[0]
    
    user_df.loc[user, 'affective_variant_duration'] = affective_variant_row.total_duration
    user_df.loc[user, 'classic_variant_duration'] = classic_variant_row.total_duration

In [188]:
user_df

Unnamed: 0_level_0,affective_variant,affective_variant_first,rated_best_variant,highest_score_variant,affective_variant_duration,classic_variant_duration
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Anupriya,3,True,3,1.0,42.732,51.013
Daniel,4,False,3,1.0,53.78,133.398
Jared Zhao,4,False,1,1.0,121.694,160.301
Jiaoyang,4,False,1,1.0,103.286,192.293
Justin Ly,4,False,1,1.0,45.346,89.305
Minha,4,False,4,3.0,28.965,29.089
Serena Zhu,4,False,1,1.0,62.908,94.362
Timothy Wang,3,True,3,1.0,43.489,59.906
Yilei,4,False,1,1.0,71.69,76.686
jeffrey stone,3,True,1,1.0,98.578,167.149


In [165]:
f'{(user_df.rated_best_variant == user_df.highest_score_variant).mean():.0%} users rated their highest scoring variant as the best one'

'60% users rated their highest scoring variant as the best one'

In [168]:
f'{(user_df.highest_score_variant == 1).mean():.0%} got their highest score on the easiest variant'

'90% got their highest score on the easiest variant'

In [166]:
f'{(user_df.rated_best_variant == user_df.affective_variant).mean():.0%} users rated the affective variant as the best one'

'30% users rated the affective variant as the best one'

In [191]:
f'{(user_df.affective_variant_duration > user_df.classic_variant_duration).mean():.0%} users played the physiologically-adaptive variant longer than the classically-adaptive one'

'0% users played the physiologically-adaptive variant longer than the classically-adaptive one'