In [19]:
import pathlib
import json

In [20]:
from textrec.paths import paths

In [21]:
import glob
import os
import json
import dateutil.parser
import datetime
import toolz

In [32]:
INVALID = (
    '3vf5fg', #(already done as 9fmfm4),
    '73qq5q', #(already done as 77j4mf),
    'ffhgxm', #(something messed up in analysis, logfile is out of sync)
    'mhh838', # reconnected during experiment, messed up
)


In [33]:
def get_log_data(log_file, earliest):
    size = os.path.getsize(log_file)
    meta = None
    num_nexts = 0
    with open(log_file) as f:
        for idx, line in enumerate(f):
            if 'next' not in line and 'login' not in line and 'finalData' not in line:
                continue
            line = json.loads(line)
            if line.get('type') == 'next':
                num_nexts += 1
            elif line.get('type') == 'login':
                if 'jsTimestamp' in line:
                    timestamp = datetime.datetime.fromtimestamp(line['jsTimestamp'] / 1000)
                else:
                    timestamp = dateutil.parser.parse(line['timestamp'])
                if timestamp < earliest:
                    return
                platform_id = line['platform_id']
                meta = dict(
                    timestamp=timestamp,
                    batch=line.get('batch'),
                    config=line['config'],
                    platform_id=platform_id,
                    participant_id=line['participant_id'],
                    size=size,
                    complete=False) # will override
            elif line.get('type') == 'finalData':
#                 meta['finalData'] = line['finalData']
                meta['complete'] = True
    if meta:
        return dict(meta, num_nexts=num_nexts)


def get_logs(log_path, earliest):
    log_files = []
    for log_file in log_path.glob('*.jsonl'):
        data = get_log_data(log_file, earliest)
        if data is not None:
#             print(data)
            log_files.append(data)
    return log_files

In [34]:
log_files = get_logs(paths.top_level / 'logs-gcp1', earliest = datetime.datetime(2018, 5, 2))

In [35]:
not_invalid = [entry for entry in log_files if entry['participant_id'] not in INVALID]

In [36]:
did_some_work = [entry for entry in not_invalid if entry['num_nexts'] > 4] # arbitrary cutoff

In [37]:
incomplete = [entry for entry in did_some_work if not entry['complete']]

In [38]:
len(incomplete)

10

In [13]:
from textrec import analysis_util

In [23]:
an = analysis_util.get_log_analysis(incomplete[0]['participant_id'])

In [24]:
an.keys()

dict_keys(['participant_id', 'isComplete', 'byExpPage', 'pageSeq', 'screenTimes', 'allControlledInputs', 'git_rev'])

In [25]:
an['isComplete']

False

In [26]:
an['allControlledInputs']

[['intro-use_predictive', 'Yes'],
 ['intro-I like to solve complex problems.', 1],
 ['intro-I have difficulty understanding abstract ideas.', 1],
 ['intro-I feel comfortable around people.', 2],
 ['intro-I have little to say.', 2]]

In [32]:
%run -m textrec.batch_analysis

In [35]:
summarize([x['participant_id'] for x in incomplete], incomplete_ok=True)


3wjx7c
practice-0:general:a black cat napping on a sunny unpainted wood bench in front of a red wall 
final-0-0:general:there is a cat that is laying on top of a tablet in front of a cup of wine.? the cat is looking at its master. 
final-0-1:general:this is a small bathroom and there is someone in tne shower and the bathroom is also a modern toilet. 
final-0-2:general:

intro-use_predictive: Yes

Total time: 4.7m
ExperimentScreen: 245.0
IntroSurvey: 16.3
Welcome: 12.2
TaskDescription: 4.2
PostPractice: 1.9
StudyDesc: 1.8
Instructions: 1.1

5xc2f3
practice-0:specific:a black cat napping on a sunny unpainted wood bench in front of a red wall 
final-0-0:specific:a cat sitting in front of a glass of wine looking up at the camera. 
final-0-1:specific:a bathroom shower with dirty glass doors with a towel hanging on it 
final-0-2:specific:there is no image here 
practice-1:general:a man with black hair and glasses placing a large turkey into an upper oven

intro-use_predictive: Yes
postTask-

In [39]:
complete = [entry for entry in not_invalid if entry['complete']]
complete.sort(key=lambda x: x['timestamp'])

In [40]:
' '.join(entry['participant_id'] for entry in complete)

'h52x67 jvccx2 36x2r3 gg65g6 692c8j qmwvwv 77j4mf 4ggxj8 5c39rx fvwhpc 26w4jv 7g8xw8 533r6c 74v545 vxjcf7 9f5xwx 3267ww wf4c3m 7jcm37 cf9p8m phqcw9 5jj59g gw3w72 559x69 gvwqp6 8fwr56 qxqm88 pmr872 x9m97g 48mr5g h6c27m pfqcmf fcc4fp 3qjwxv 5p7cf7 xxxp65 94wwrq vg926m fjr9xf rp3jwh 46gc8v 8v9qq5 vj2c8r 7fwgf9 26f62q wxf9cx gmvv6r w3xwhj 3pcfjp 85c66x'

In [41]:
# Dump a list of participant_ids
for config, group in toolz.groupby('batch', complete).items():
    print()
    group = sorted(group, key=lambda x: x['timestamp'])
    print(f'{len(group)} completed in {config}')
    print(f'{config}:',  ' '.join(participant['participant_id'] for participant in group))


28 completed in None
None: h52x67 jvccx2 36x2r3 gg65g6 692c8j qmwvwv 77j4mf 4ggxj8 5c39rx fvwhpc 26w4jv 7g8xw8 533r6c 74v545 vxjcf7 9f5xwx 3267ww wf4c3m 7jcm37 cf9p8m phqcw9 5jj59g gw3w72 559x69 gvwqp6 8fwr56 qxqm88 pmr872

22 completed in gc1
gc1: x9m97g 48mr5g h6c27m pfqcmf fcc4fp 3qjwxv 5p7cf7 xxxp65 94wwrq vg926m fjr9xf rp3jwh 46gc8v 8v9qq5 vj2c8r 7fwgf9 26f62q wxf9cx gmvv6r w3xwhj 3pcfjp 85c66x


In [42]:
from textrec.counterbalancing import get_completion_data

In [44]:
import pandas as pd

In [65]:
completions = pd.DataFrame(get_completion_data('gc1'))
completions['login_timestamp'] = pd.to_datetime(completions.login_timestamp, unit='s')
completions = completions.sort_values('login_timestamp')

In [68]:
completions.login_timestamp.iloc[-5]

Timestamp('2018-06-29 19:31:11.057917')

In [70]:
completions[
    completions.completed
   & (completions.login_timestamp < pd.Timestamp(year=2018, month=6, day=29))].assignment.value_counts()


2    4
1    4
0    4
4    3
3    3
5    2
Name: assignment, dtype: int64

In [66]:
completions

Unnamed: 0,assignment,completed,login_timestamp,participant_id
5,0,False,2018-06-11 16:52:46.376846000,cc67cg
31,1,False,2018-06-11 17:02:25.574920000,h53736
46,2,True,2018-06-11 17:03:54.946232000,r6xh7g
35,0,False,2018-06-13 16:58:30.992824000,fh7wf7
6,1,True,2018-06-13 17:02:00.581406000,567vjj
21,2,False,2018-06-13 17:09:01.195205000,g22xw9
29,0,False,2018-06-14 15:56:20.811908000,vx6c3q
22,3,False,2018-06-14 15:58:52.523238000,8xm39m
36,1,False,2018-06-14 19:43:29.298186000,8v3fmf
10,1,False,2018-06-15 16:13:40.956185000,9qcf68
