In [None]:
"""Scripts for managing the MTurk annotation piepline via a command line interface"""

from hit_info import *
from hit_results import *
import create
import delete
import log
import qualifications
import review
import scores
import mturk
import boto3
import logging

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

ANNOTATION_LOG_PATH = "logs/annotation_log.csv"
BANK_ANNOTATIONS_LOG = "logs/gold_annotations_bank.csv"
HIT_LOGS_DIR = "logs/hits_json"


def main():
    client = mturk.get_mturk_client('live') ####change to 'live'!!!
    dataset_names = ['GEO','ATIS','DROP','CLEVR','SPIDER','HOTPOT','CWQ','ACADEMIC', 'LOW_CWQ', 'LOW_DROP', \
                     'NLVR2', 'COMQA', 'TEMPQ']
    train = [name + '_train' for name in dataset_names]
    dev = [name + '_dev' for name in ['GEO','ATIS','DROP','CLEVR','SPIDER','HOTPOT','CWQ', 'LOW_CWQ', 'LOW_DROP',\
                                     'NLVR2', 'COMQA']]
    test = [name + '_test' for name in ['GEO','ATIS','CLEVR','CWQ', 'LOW_CWQ', 'NLVR2', 'COMQA']]
    valid_dataset_names = ['all']
    valid_dataset_names += train
    valid_dataset_names += dev
    valid_dataset_names += test
    valid_dataset_names += ['LOW_DROP_train_history', 'LOW_DROP_dev_history', 'LOW_DROP_train_nfl', 'LOW_DROP_dev_nfl']
    action = None
    exit = False
    while not exit:
        while action is None:
            user_input = input('Welcome to the MTurk annotation piepline \nWhat action would you like to take? \n[publish/republish/create/review/qualifications/stats/delete/expire/exit]').strip().lower()
            if user_input in ['publish', 'republish', 'create', 'review', 'qualifications', 'stats', 'delete', 'expire', 'exit']:
                action = user_input
                exit = user_input == 'exit'
            else:
                print('Please type either "publish", "republish", "create", "review", "qualifications", "stats", "delete", "expire", or "exit".')
        
        if exit:
            logger.info(f'-- Exiting pipeline CLI.')
        
        elif user_input == 'publish':
            logger.info(f'-- Publishing HITs to annotation log - via pipeline CLI.')
            hit_type = None
            while hit_type is None:
                user_input = input('Set to publish which HIT types? [gen/val]').strip().lower()
                if user_input in ['gen', 'val']:
                    hit_type = user_input
                else:
                    print('Please type either "gen" or "val".')
            dataset_name = None
            if hit_type == 'gen':
                while dataset_name is None:
                    user_input = input('Which dataset? [all/GEO/ATIS/DROP/CLEVR/SPIDER/HOTPOT/CWQ/ACADEMIC]').strip()
                    if user_input in valid_dataset_names:
                        dataset_name = "" if user_input == "all" else user_input
                    else:
                        print('Please type a valid dataset prefix ([DATASET]_[split]).')
            quota_to_publish = None
            while quota_to_publish is None:
                user_input = input('How many HITs to publish?').strip().lower()
                if user_input.isdigit():
                    quota_to_publish = int(user_input)
                else:
                    print('Please type in a valid number.')
            success = False
            success = log.set_to_publish_gen_hits(ANNOTATION_LOG_PATH, dataset_name, quota_to_publish) if hit_type == 'gen' else log.set_to_publish_val_hits(ANNOTATION_LOG_PATH, quota_to_publish)
            if success:
                logger.info(f'-- Set to publish HITs on log - successful.')
            else:
                logger.info(f'-- Set to publish HITs on log - failed.')
            user_input = None
            
        elif user_input == 'republish':
            logger.info(f'-- Republishing rejected annotations to annotation log - via pipeline CLI.')
            success = log.republish_rejected(ANNOTATION_LOG_PATH)
            if success:
                logger.info(f'-- Republishing of rejected annotations - successful.')
            else:
                logger.info(f'-- Republishing of rejected annotations - failed.')
            user_input = None
        
        elif user_input == 'create':
            logger.info(f'-- Creating HITs on MTurk - via pipeline CLI.')
            success = create.publish_hits_to_mturk(ANNOTATION_LOG_PATH, BANK_ANNOTATIONS_LOG, client, True, HIT_LOGS_DIR)
            if success:
                logger.info(f'-- Creating HITs on MTurk - successful.')
            else:
                logger.info(f'-- Creating HITs on MTurk - failed.')
            user_input = None
                
        elif user_input == 'review':
            logger.info(f'-- Reviewing pending HITs - via pipeline CLI.')
            manual_review = None
            while manual_review is None:
                user_input = input('Manually review HITs? [y/n]').strip().lower()
                if user_input in ['y', 'n']:
                    manual_review = user_input == 'y'
                else:
                    print('Please type either "y" or "n".')
            success = review.review_pending_hits(manual_review, client, ANNOTATION_LOG_PATH, BANK_ANNOTATIONS_LOG, True, HIT_LOGS_DIR)
            if success:
                logger.info(f'-- Reviewing pending HITs - successful.')
            else:
                logger.info(f'-- Reviewing pending HITs - failed.')
            user_input = None
                
        elif user_input == 'qualifications':
            logger.info(f'-- Updating MTurk workers qualification scores - via pipeline CLI.')
            score_type = None
            while score_type is None:
                user_input = input('Which score type to update? [gen/val]').strip().lower()
                if user_input in ['gen', 'val']:
                    score_type = user_input
                else:
                    print('Please type either "gen" or "val".')
            update_all = None
            while update_all is None:
                user_input = input('Update ' + score_type + ' scores for all workers? [y/n]').strip().lower()
                if user_input in ['y', 'n']:
                    update_all = user_input == 'y'
                else:
                    print('Please type either "y" or "no".')
            worker_ids = []
            if not update_all:
                worker_id = None
                while worker_id is None:
                    user_input = input('Please enter single worker id to update?').strip()
                    worker_id = user_input
                worker_ids = [worker_id]
            else:
                worker_ids = scores.get_all_worker_scores(ANNOTATION_LOG_PATH, score_type).keys()
            success = scores.update_scores(client, ANNOTATION_LOG_PATH, score_type, worker_ids)
            if success:
                logger.info(f'-- Updating scores - successful.')
            else:
                logger.info(f'-- Updating scores - failed.')
            user_input = None
            
        elif user_input == 'stats':
            logger.info(f'-- Retreiving annotation log status - via pipeline CLI.')
            split = None
            while split is None:
                user_input = input('To show stats for specific dataset? [y/n]').strip().lower()
                if user_input in ['y', 'n']:
                    split = user_input == 'y'
                else:
                    print('Please type either "y" or "no".')
            dataset_name = None
            if split:
                while dataset_name is None:
                    user_input = input('Which dataset? [GEO/ATIS/DROP/CLEVR/SPIDER/HOTPOT/CWQ/ACADEMIC]').strip()
                    if user_input in valid_dataset_names:
                        dataset_name = user_input
                    else:
                        print('Please type a valid dataset prefix ([DATASET]_[split]).')
            success = log.annotation_stats(ANNOTATION_LOG_PATH, split_by_dataset=dataset_name)
            if success:
                logger.info(f'-- Retreiving annotation log status - successful.')
            else:
                logger.info(f'-- Retreiving annotation log status - failed.')
            user_input = None

        elif user_input == 'delete':
            logger.info(f'-- Deleting HIT via pipeline CLI [only for reviewable HITs!].')
            hit_id = input('Insert HIT id to be deleted').strip()
            success = delete.delete_hit(client, hit_id)
            if success:
                logger.info(f'-- Deletion of HIT: {hit_id} - successful.')
            else:
                logger.info(f'-- Deletion of HIT: {hit_id} - failed.')
            user_input = None
            
        elif user_input == 'expire':
            logger.info(f'-- Expiring HIT via pipeline CLI.')
            hit_id = input('Insert HIT id to be expired').strip()
            success = delete.expire_hit(client, hit_id)
            if success:
                logger.info(f'-- Expiration of HIT: {hit_id} - successful.')
            else:
                logger.info(f'-- Expiration of HIT: {hit_id} - failed.')
            user_input = None
                
        else:
            action = None
            
    return True

if __name__ == '__main__':
    main()

