In [1]:
import os
if 'mturk' not in os.getcwd():
    os.chdir('../mturk/')

In [19]:
from mturktools import open_mturk_connection, search_hits_by_title, adhoc_fix
from pyhocon import ConfigFactory
from tqdm import tqdm
from collections import Counter
import pandas as pd

In [3]:
os.environ['mturk_input_dir'] = '' # the conf files need it but we aren't going to use it

In [4]:
def disable_all_hits(conf_path):
    conf = ConfigFactory.parse_file(conf_path)
    conn = open_mturk_connection(conf)
    hit_title = conf.get_string('title')
    for hit in tqdm(search_hits_by_title(conn, hit_title), desc="Disabling", unit="HIT"):
        conn.disable_hit(hit.HITId)
    conn.close()    

In [5]:
def show_progress(conf_path):
    conf = ConfigFactory.parse_file(conf_path)
    conn = open_mturk_connection(conf)
    hit_title = conf.get_string('title')
    hit_count, progress = 0, 0

    for h in tqdm(search_hits_by_title(conn, hit_title), desc="Downloading", unit="HIT"):
        assgs = conn.get_assignments(h.HITId)
        progress += len(assgs) / float(h.MaxAssignments)
        hit_count += 1

    print("Completed %.1f of %d HITs" %(progress, hit_count))
    conn.close()    

In [6]:
show_progress('configs/cltl-2019-01-23-unmasked.conf')

Downloading: 28HIT [00:13,  2.15HIT/s]

Completed 13.0 of 28 HITs





In [7]:
show_progress('configs/cltl-2019-01-23-masked.conf')

Downloading: 96HIT [00:33,  2.86HIT/s]

Completed 30.0 of 96 HITs





## Check when people submitted

In [7]:
conf = ConfigFactory.parse_file('configs/cltl-2019-01-23-unmasked.conf')
conn = open_mturk_connection(conf)
hit_title = conf.get_string('title')

In [26]:
hits = [h for h in search_hits_by_title(conn, hit_title)]

In [12]:
hit2assgs = {h.HITId: conn.get_assignments(h.HITId) for h in hits}
submit_times = [assg.SubmitTime for assgs in hit2assgs.values() for assg in assgs]

In [30]:
Counter(t[:10] for t in submit_times)

Counter({'2019-01-23': 12, '2019-02-21': 1})

In [16]:
Counter(assg.WorkerId for assgs in hit2assgs.values() for assg in assgs)

Counter({'A19NP6Q2D7YHSV': 3,
         'A3V5LQ8PYCTKQB': 2,
         'A1WF8OS5DR3JRB': 3,
         'A2ZSJQ8UYSJ8LX': 1,
         'A1I8TJRTQ5NI2I': 3,
         'A12H2L507TPSUE': 1})

In [23]:
df = pd.read_csv('../output/mturk/student-assistants-results.csv')
df = df.rename(columns={'manipulation': 'worker'})
df

Unnamed: 0,auto_or_gold,dataset,f1_bcub,f1_ceafe,f1_conll,f1_muc,worker,mention_p,mention_r,system
0,,dev,84.52,69.4,81.433333,90.38,A12H2L507TPSUE,100,100,human
1,,dev,90.66,81.24,88.423333,93.37,A19NP6Q2D7YHSV,100,100,human
2,,dev,92.99,89.03,93.133333,97.38,A1I8TJRTQ5NI2I,100,100,human
3,,dev,87.13,77.67,85.763333,92.49,A1WF8OS5DR3JRB,100,100,human
4,,dev,91.75,82.13,88.59,91.89,A2ZSJQ8UYSJ8LX,100,100,human
5,,dev,96.98,92.95,95.493333,96.55,A3V5LQ8PYCTKQB,100,100,human


In [27]:
def get_conll_file(assg):
    (answers,) = assg.answers # for some reason it's wrapped in a singleton list
    answers = dict((qfa.qid, qfa.fields[0]) for qfa in answers)
    return answers['conll_file']

In [31]:
submission_df = pd.DataFrame(
    [{'date': assg.SubmitTime[:10], 
      'conll_file': get_conll_file(assg).replace('output/conll-2012-transformed.v2/', '')} 
     for assgs in hit2assgs.values() for assg in assgs])

In [32]:
submission_df

Unnamed: 0,conll_file,date
0,orig/dev/bn_cnn_02_cnn_0200___part_000.m_auto_...,2019-01-23
1,orig/dev/bn_voa_00_voa_0090___part_000.m_auto_...,2019-01-23
2,orig/dev/pt_nt_40_nt_4020___part_001.m_auto_conll,2019-01-23
3,orig/dev/mz_sinorama_10_ectb_1020___part_001.m...,2019-01-23
4,orig/dev/wb_eng_00_eng_0010___part_004.m_auto_...,2019-01-23
5,orig/dev/mz_sinorama_10_ectb_1050___part_004.m...,2019-01-23
6,orig/dev/tc_ch_00_ch_0030___part_001.m_auto_conll,2019-01-23
7,orig/dev/pt_nt_58_nt_5810___part_000.m_auto_conll,2019-01-23
8,orig/dev/nw_xinhua_01_chtb_0160___part_000.m_a...,2019-01-23
9,orig/dev/bc_cctv_00_cctv_0000___part_011.m_aut...,2019-01-23
