In [8]:
import requests
from datetime import datetime
import json
import decimal
import os
from tqdm import tqdm
import traceback
import re

import pandas as pd
import toloka.client as toloka

import psycopg2
from contextlib import closing

from PIL import Image
from io import BytesIO

# credentials
DB_data = '''
    host=host
    port=port
    sslmode=require
    dbname=dbname
    user=user
    password=password
    target_session_attrs=read-write
'''

e = datetime.now()

date = '%s.%s.%s' % (e.day, e.month, e.year)

OAUTH_TOKEN = 'OAUTH_TOKEN1'
HEADERS = {"Authorization": "OAuth %s" % OAUTH_TOKEN, "Content-Type": "application/JSON"}
toloka_client = toloka.TolokaClient(OAUTH_TOKEN, 'PRODUCTION')

skill_id_reject = 1

# to dataframe with processing assignments
working_excel = pd.read_excel('working_excel 1.2.xlsx', sheet_name='Sheet1')
measurement_df = working_excel.loc[:, 'measurements_english_translate':'measurements_arabian_translate'].dropna()

# images names with their order number
images_order_dict = {"1": "front_img", "2": "side_img", "3": "selfie_img",
                           "4": "arm_circumference_cm",
                           "5": "arm_length_cm", "6": "back_build_cm", "7": "calf_circumference_cm",
                           "8": "chest_circumference_cm", "9": "crotch_height_cm", "10": "front_build_cm",
                           "11": "hips_circumference_cm", "12": "leg_length_cm", "13": "neck_circumference_cm",
                           "14": "neck_pelvis_length_front_cm", "15": "neck_waist_length_back_cm",
                           "16": "neck_waist_length_front_cm", "17": "pelvis_circumference_cm",
                           "18": "shoulder_length_cm", "19": "shoulder_width_cm",
                           "20": "thigh_circumference_cm", "21": "under_chest_circumference_cm",
                           "22": "upper_arm_length_cm", "23": "waist_circumference_cm",
                           "H": "height", "W": "weight",
                           "A": "age","G": "gender",
                            "E": "race", "P": "profession"
                           }

# make empty measurement dict
measurements_results = {"front_img": None, "side_img": None, "selfie_img": None,
                            "arm_circumference_cm": None,
                           "arm_length_cm": None, "back_build_cm": None,
                            "calf_circumference_cm": None, "chest_circumference_cm": None,
                            "crotch_height_cm": None,"front_build_cm": None,
                           "hips_circumference_cm":None, "leg_length_cm":None, "neck_circumference_cm":None,
                           "neck_pelvis_length_front_cm": None, "neck_waist_length_back_cm": None,
                           "neck_waist_length_front_cm": None, "pelvis_circumference_cm": None,
                           "shoulder_length_cm": None, "shoulder_width_cm": None,
                           "thigh_circumference_cm": None, "under_chest_circumference_cm": None,
                           "upper_arm_length_cm": None, "waist_circumference_cm": None,
                           "height": None, "weight": None,
                           "age": None, "gender": None, "race": None}

gender_dict = {'F':'female', 'M':'male'}

ethnicity_dict = {'I':'indian', 'C':'caucasian', 'B':'black', 'L':'latino', 'A':'asian', 'M':'maghreb', '0':'null'}

# query templates
download_db_columns = '(assignment_id, worker_id, project_id,' \
                      ' toloka_submit_date, download_date, gender,' \
                      ' age, nation, status, assignment_link)'

send_db_columns = '(assignment_id, worker_id, project_id,' \
                  ' toloka_submit_date, download_date, gender,' \
                  ' age, nation, status, send_date, assignment_link)'

query_download_insert = ''' INSERT INTO public.sets %s
                    VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','DOWNLOADED','%s') '''

query_download_update = ''' UPDATE public.sets SET worker_id='%s',
                                 project_id='%s', toloka_submit_date='%s',
                                  download_date='%s', gender='%s',
                                   age='%s', nation='%s', status='DOWNLOADED',
                                    assignment_link='%s' WHERE assignment_id ='%s' '''

query_send_insert = f''' INSERT INTO public.sets %s
                                VALUES ('%s','%s','%s','%s',
                                '%s','%s','%s','%s','INWORK', '%s', '%s') '''

query_send_update = ''' UPDATE public.sets SET worker_id='%s',
                         project_id='%s', toloka_submit_date='%s',
                          gender='%s',age='%s', status = 'INWORK',
                           assignment_link='%s',nation='%s',
                            send_date = '%s' WHERE assignment_id ='%s' '''

# get sets from database to dataframe for dublicats checking
with closing(psycopg2.connect(DB_data)) as conn:
    with closing(conn.cursor()) as cursor:
        cursor.execute('''SELECT assignment_id, worker_id, project_id, toloka_submit_date,
                                              download_date, gender, age,
                                              nation, status, send_date, assignment_link FROM public.sets''')
        all_sets_in_db_df = pd.DataFrame(cursor.fetchall(), columns = ['assignment_id', 'worker_id', 'project_id', 'toloka_submit_date',
                                              'download_date', 'gender', 'age',
                                              'nation', 'status', 'send_date', 'assignment_link'])


with open('errors.tsv', 'w', encoding='utf-8') as file:
    file.close()

with open('need_manual.tsv', 'w', encoding='utf-8') as file:
    file.close()

def error_writer(request: str) -> None:
    with open('errors.tsv', 'a', encoding='utf-8') as file:
        file.write('\n' + request + '\n' + '*'*50)

def need_manual_writer(request: str) -> None:
    with open('need_manual.tsv', 'a', encoding='utf-8') as file:
        file.write(request + '\n')

#decorator for trying different Toloka tokens
def decorator_change_account(func):
    def wrapper(*args, **kwargs):
        tries = 0
        success = False
        while not success or tries < 5:
            try:
                func(*args, **kwargs)
                break
            except toloka.exceptions.AccessDeniedApiError:
                global OAUTH_TOKEN, HEADERS, toloka_client
                if OAUTH_TOKEN == 'OAUTH_TOKEN1':
                    OAUTH_TOKEN = 'OAUTH_TOKEN2'
                elif OAUTH_TOKEN == 'OAUTH_TOKEN2':
                    OAUTH_TOKEN = 'OAUTH_TOKEN1'
                HEADERS = {"Authorization": "OAuth %s" % OAUTH_TOKEN, "Content-Type": "application/JSON"}
                toloka_client = toloka.TolokaClient(OAUTH_TOKEN, 'PRODUCTION')
                print('Change Toloka-account')
                tries += 1
    return wrapper


# save set to database
def db_update(assignment_id: str,
              worker_id: str,
              project_id: str,
              assignment_data: toloka.Assignment,
              worker_data: json,
              assignment_link: str,
              check_working_df: pd.DataFrame) -> None:

    toloka_date = assignment_data.created
    toloka_submit_date = '%s.%s.%s' % (toloka_date.day, toloka_date.month, toloka_date.year)

    dublicate_set = all_sets_in_db_df.loc[all_sets_in_db_df['assignment_id'] == assignment_id]

    solution = assignment_data.solutions[0]

    age = solution.output_values['age']
    if 'A' in check_working_df.dropna(axis=1):
        age = str(float(check_working_df.reset_index()['A'][0])).replace('.0', '')

    gender = ''
    if 'gender' in worker_data:
        gender = worker_data['gender']
    if 'G' in check_working_df.dropna(axis=1):
        gender = gender_dict[check_working_df.reset_index()['G'][0].upper()]

    nation = ''
    nation = solution.output_values['race']
    if 'E' in check_working_df.dropna(axis=1):
        nation = ethnicity_dict[check_working_df.reset_index()['E'][0].upper()]

    query = ''

    scenario_date = date

    if '+' in check_working_df.values and not 'send' in check_working_df.values:
        print('Update sets status to "download"')
        if dublicate_set.empty:
            query = query_download_insert % (download_db_columns, assignment_id,
                                             worker_id, project_id, toloka_submit_date,
                                             scenario_date, gender.upper(), age, nation, assignment_link)

        else:
            query = query_download_update % (worker_id, project_id,
                                             toloka_submit_date, scenario_date,
                                             gender.upper(), age, nation, assignment_link, assignment_id)

            print('There is already set in DB, update its status')


    elif 'send' in check_working_df.values:
        print('Update sets status to "send to requester"')
        if dublicate_set.empty:
            query = query_send_insert % (send_db_columns, assignment_id,
                                         worker_id, project_id, toloka_submit_date,
                                         scenario_date, gender.upper(), age, nation, scenario_date, assignment_link)

            print('There is no such set, add new')

        else:

            query = query_send_update % (worker_id, project_id,
                                         toloka_submit_date, gender.upper(), age,
                                         assignment_link, nation, scenario_date, assignment_id)

            print('There is already set in DB, update its status')

    try:
        with closing(psycopg2.connect(DB_data)) as conn:
            with closing(conn.cursor()) as cursor:
                cursor.execute(query)
                conn.commit()
                print('Update data')
    except Exception as e:
        print('Some error')
        error_writer(f'{assignment_id}\t{e}')

# get Assignment data
def get_assignment_data(assignment_id: str) -> toloka.Assignment:
    assignment_data = toloka_client.get_assignment(assignment_id=assignment_id)
    return assignment_data

# get Pool data
def get_pool_data(pool_id: str) -> toloka.Pool:
    pool_data = toloka_client.get_pool(pool_id=pool_id)
    return pool_data

# get Worker data
def get_worker_data(worker_id: str) -> json:
    worker_data = requests.get(url='https://toloka.dev/api/new/requester/workers/' + worker_id, headers=HEADERS).json()
    return worker_data


# skill giving
@decorator_change_account
def skill_give(assignment_id: str, worker_id: str) -> None:
    toloka_client.set_user_skill(skill_id=skill_id_reject, user_id=worker_id, value=decimal.Decimal('100'))
    print('Skill added')


# send message
def message_send(assignment_data: toloka.Assignment,
                 reject_message: str,
                 reject_topic: str,
                 worker_id: str,
                 refusal_reassons_column: str,
                 check_working_df: pd.DataFrame,
                 measurements_translate: dict) -> None:

    assignment_id = assignment_data.id
    image_and_reject_reason_dict = {}
    for image_column_name in check_working_df.dropna(axis=1):
        if image_column_name != 'reject_reasons':
            image_and_reject_reason_dict[images_order_dict[str(image_column_name)]] = str(check_working_df.reset_index()[image_column_name][0]).split(' ')

    reject_reasons_for_html = reject_reasons_for_html_maker(image_and_reject_reason_dict, refusal_reassons_column, measurements_translate)
    reject_message = reject_message.replace('{reject_reasons}', reject_reasons_for_html)
    message_body = {
        "topic": {
            "EN": reject_topic,
        },
        "text": {
            "EN": reject_message,
        },
        "recipients_select_type": "DIRECT",
        "recipients_ids": [worker_id],
        "answerable": True
    }
    send_msg = requests.post('https://toloka.dev/api/v1/message-threads/compose', headers=HEADERS,
                             json=message_body).json()

    if 'created' in send_msg:
        print('Message sended')
    else:
        print('Some error: ', send_msg)
        need_manual_writer(f"{assignment_id}\tsend message\n")

# reject set
@decorator_change_account
def reject_set(assignment_id: str) -> None:
    toloka_client.reject_assignment(assignment_id=assignment_id, public_comment='Have some mistakes')
    print('Отклонили сет ', assignment_id)

# make html message with reject reasons
def reject_reasons_for_html_maker(image_and_reject_reason_dict: dict,
                                  refusal_reassons_column: str,
                                  measurements_translate: dict) -> str:
    reject_reasons_for_html = ''
    for key, value in image_and_reject_reason_dict.items():
        image_name = measurements_translate[key]
        reject_reasons = value
        for reject_reason in reject_reasons:
            reject_reason_text = working_excel.loc[working_excel['refusal_reasons_number'].apply(float) == float(reject_reason), refusal_reassons_column].values[0]
            reject_reason_for_html = '''
            <li style="margin-top: 0cm; margin-right: 0cm; margin-bottom: 8pt; line-height: normal;
             font-size: 15px; font-family: Calibri, sans-serif; background: white;">
             <strong>
             <span style='font-size:16px;font-family:"Arial",sans-serif;color:#141824;'>''' \
                                     + reject_reason_text + f' ({image_name})' '''</span></strong></li>'''
            reject_reasons_for_html += reject_reason_for_html
    return reject_reasons_for_html

# get assignment_id from assignment_link
def get_assignment_id_from_link(assignment_id):
    assignment_id = assignment_id.split('assignments/')[1].split('?')[0]
    return assignment_id

# select worker language
def language_select(worker_data) -> [str]:
    if 'RU' in worker_data['languages']:
        language = 'russian'
    elif 'ES' in worker_data['languages']:
        language = 'spain'
    elif 'FR' in worker_data['languages']:
        language = 'france'
    elif 'DE' in worker_data['languages']:
        language = 'german'
    elif 'TR' in worker_data['languages']:
        language = 'turkey'
    elif 'HI' in worker_data['languages']:
        language = 'hindi'
    elif 'UR' in worker_data['languages']:
        language = 'urdu'
    elif 'AR' in worker_data['languages']:
        language = 'arabian'
    else:
        language = 'english'
    print('Язык выбран: ', language)
    refusal_reassons_column = f'refusal_reasons_text_{language}'
    measurement_column = f'measurements_{language}_translate'
    reject_topic = working_excel.loc[1, f'reject_message_{language}']
    reject_message = working_excel.loc[0, f'reject_message_{language}']

    measurements_translate = {}
    for key, value in zip(measurement_df['measurements_english_translate'], measurement_df[measurement_column]):
        measurements_translate[key] = value

    return reject_message, reject_topic, refusal_reassons_column, measurements_translate


def insert_validator_data_to_measurements(check_working_df:pd.DataFrame) -> dict:
    for column_name in check_working_df.dropna(axis=1):
        if column_name != 'reject_reasons':
            value = str(check_working_df.reset_index().loc[0, column_name])
            if column_name == 'G':
                value = gender_dict[value.upper()]
            elif column_name == 'E':
                value = ethnicity_dict[value.upper()]
            elif column_name == 'A':
                value = str(float(re.sub(r'[^0-9]', '', value))).replace('.0', '')
            if value == '0.0' or value == 0.0 or value == 0 or value == '0':
                value = 'null'
            measurements_results[images_order_dict[str(column_name)]] = value
    return measurements_results


def manual_input_some_data(solution, measurements_results: dict, assignment_id: str, param_name: str) -> str:
    if not param_name in measurements_results or not measurements_results[param_name]:
        data = solution.output_values[param_name]
        if not data:
            data = input(f'{assignment_id}, {param_name}, need manual edit: ')
    else:
        data = measurements_results[param_name]
    return data


# download images
def download_images(assignment_data: toloka.Assignment,
                    check_working_df:pd.DataFrame) -> None:
    assignment_id = assignment_data.id
    cur_dir = os.path.join('measurements_sets', date, assignment_id)
    if not os.path.exists(cur_dir):
        os.makedirs(cur_dir)
        measurements_results = insert_validator_data_to_measurements(check_working_df)
        photos_keys = []
        for solution in assignment_data.solutions:
            for key in tqdm(solution.output_values.keys()):
                if key[-3:] == 'img':
                    photos_keys.append(key)
                elif key[-2:] == 'cm' or key == 'weight' or key == 'height':
                    img_to_cm_translate = manual_input_some_data(solution, measurements_results, assignment_id, param_name = key)
                    measurements_results[key] = str(float(re.sub(r'[^0-9.]', '', img_to_cm_translate)))
                elif key == 'race':
                    measurements_results[key] = solution.output_values[key]
                elif key == 'age':
                    measurements_results[key] = str(solution.output_values[key])
            for photo_key in photos_keys:
                out_f = BytesIO()
                toloka_client.download_attachment(attachment_id=solution.output_values[photo_key], out=out_f)
                img = Image.open(out_f)
                if photo_key != 'front_img' and photo_key != 'side_img' and photo_key != 'selfie_img':
                    img_to_cm_translate = measurements_results[photo_key[:-3]+'cm']
                    img.save(os.path.join(cur_dir, photo_key + '_' + img_to_cm_translate + '.jpg'))
                else:
                    img.save(os.path.join(cur_dir, photo_key + '.jpg'))

        measurements_results = insert_validator_data_to_measurements(check_working_df)
        files_info_json = json.dumps(measurements_results)
        print(files_info_json)
        with open(os.path.join(cur_dir, 'measurements.json'), 'w') as f:
            f.write(files_info_json)

    else:
        print('Such dir already exists')

# check sets for dublicats
def predownload_set_dublicat_checking_and_download(assignment_data: toloka.Assignment,
                                                   worker_id: str,
                                                   check_working_df: pd.DataFrame) -> None:
    assignment_id = assignment_data.id
    if not assignment_id in all_sets_in_db_df['assignment_id'].unique() and not \
            worker_id in all_sets_in_db_df['worker_id'].unique():
        download_images(assignment_data, check_working_df)
    else:
        same_set_by_id = all_sets_in_db_df.loc[(all_sets_in_db_df['assignment_id'] == assignment_id)]
        same_set_by_worker = all_sets_in_db_df.loc[(all_sets_in_db_df['worker_id'] == worker_id)]
        if not same_set_by_id.empty:
            print('There is already set with this id: ')
            print(same_set_by_id.to_markdown())
        if not same_set_by_worker.empty:
            print('There is already set with this worker: ')
            print(same_set_by_worker.to_markdown())
        decision = input('Download? \n 1.Yes \n 2.No')
        if decision == '1':
            download_images(assignment_data, check_working_df)
        else:
            print('Cancel download')


def start(assignment_id: str, assignment_cell_in_excel: str) -> None:
    assignment_data = get_assignment_data(assignment_id)
    pool_data = get_pool_data(pool_id=assignment_data.pool_id)
    project_id = pool_data.project_id
    worker_id = assignment_data.user_id
    worker_data = get_worker_data(worker_id)

    reject_message, reject_topic, refusal_reassons_column, measurements_translate = language_select(worker_data)

    check_working_df = working_excel.loc[working_excel['assignment_id'] == assignment_cell_in_excel].loc[:,'reject_reasons':'P']

    if "+" in check_working_df.values or 'send' in check_working_df.values:
        assignment_link = f'https://platform.toloka.ai/requester/project/{project_id}/pool/{assignment_data.pool_id}/assignments/{assignment_id}?direction=ASC'
        print(assignment_link)
        if '+' in check_working_df.values:
            print('Start sets downloading')
            predownload_set_dublicat_checking_and_download(assignment_data, worker_id, check_working_df)
        db_update(assignment_id, worker_id, project_id, assignment_data, worker_data, assignment_link, check_working_df)

    else:
        if str(assignment_data.status) == 'Status.SUBMITTED':
            print('Start sets rejection')
            reject_set(assignment_id)
            if not 404 in check_working_df.values:
                message_send(assignment_data, reject_message, reject_topic, worker_id, refusal_reassons_column, check_working_df, measurements_translate)
                skill_give(assignment_id, worker_id)
            else:
                print('No message')
                print('No skill')

        else:
            print('Set has another status: ', assignment_data.status, ', skip')
    print('-' * 50)


def main():
    for assignment_id in working_excel['assignment_id'].dropna():
        global measurements_results
        measurements_results = measurements_results.fromkeys(measurements_results, None)
        assignment_cell_in_excel = assignment_id
        if 'http' in assignment_id:
            assignment_id = get_assignment_id_from_link(assignment_id)
        print('Processing set: ', assignment_id)
        try:

            start(assignment_id, assignment_cell_in_excel)

        except toloka.exceptions.DoesNotExistApiError:
            print(assignment_id, ' - no such set, may be it is on another account')
        except Exception as e:
            error_message = traceback.format_exc()
            error_writer(f"{assignment_id}\t{error_message}\n")
            print(assignment_id, 'error - wtite to file')


if __name__ == '__main__':
    main()