In [5]:
import re
import os
import json
import requests
from datetime import datetime
import warnings
from contextlib import closing
import traceback

import pandas as pd
import toloka.client as toloka

warnings.simplefilter(action='ignore', category=FutureWarning)

OAUTH_token = YOUR_TOKEN
HEADERS = {"Authorization": "OAuth %s" % OAUTH_TOKEN, "Content-Type": "application/JSON"}
toloka_client = toloka.TolokaClient(OAUTH_TOKEN, 'PRODUCTION')

e = datetime.now()
date = '%s.%s.%s' % (e.day, e.month, e.year)

#create dataframe with processing sets
working_excel = pd.read_excel('work1.xlsx', sheet_name='Sheet1')
reject_message_russian = working_excel.loc[0, 'reject_message_russian']
reject_message_russian_topic = working_excel.loc[1, 'reject_message_russian']


#write to file functions
def error_writer(request: str) -> None:
    with open('errors.tsv', 'a', encoding='utf-8') as file:
        file.write(request)


def need_manual_writer(request: str) -> None:
    with open('need_manual.tsv', 'a', encoding='utf-8') as file:
        file.write(request)


#get all Assignment data
def get_assignment_data(assignment_id: str) -> toloka.Assignment:
    assignment_data = toloka_client.get_assignment(assignment_id=assignment_id)
    return assignment_data

#get all Pool data
def get_pool_data(pool_id: str) -> toloka.Pool:
    pool_data = toloka_client.get_pool(pool_id=pool_id)
    return pool_data

#get all worker data
def get_worker_data(worker_id: str) -> json:
    worker_data = requests.get(url='https://toloka.dev/api/new/requester/workers/' + worker_id,
                               headers=HEADERS).json()
    return worker_data


#select worker language
def language_select() -> [str, str, str]:
    worker_language = 'RU'
    reject_message = reject_message_russian
    reject_topic = reject_message_russian_topic
    refusal_reassons_column = 'refusal_reasons_text_russian'
    return worker_language, reject_message, reject_topic, refusal_reassons_column


#main function to send message
def message_send(assignment_data: toloka.Assignment,
                 reject_message: str,
                 reject_topic: str,
                 worker_id: str,
                 refusal_reassons_column: str,
                 check_working_df: pd.DataFrame) -> None:

    assignment_id = assignment_data.id
    image_and_reject_reason_dict = {}
    # print(check_working_df)

    #create dict audio_name: reject_reason
    for image_column_name in check_working_df.dropna(axis=1):
        # print(check_working_df[audio_column_name])
        image_and_reject_reason_dict[image_column_name] = str(check_working_df.reset_index()[image_column_name][0]).split(' ')
    print(image_and_reject_reason_dict)

    #create html message
    reject_reasons_for_html = reject_reasons_for_html_maker(image_and_reject_reason_dict, refusal_reassons_column, assignment_data)
    reject_message = reject_message.replace('{reject_reasons}', reject_reasons_for_html)
    message_body = {
        "topic": {
            "EN": reject_topic,
        },
        "text": {
            "EN": reject_message,
        },
        "recipients_select_type": "DIRECT",
        "recipients_ids": [worker_id],
        "answerable": True
    }
    # print('Типо отправили сообщение, записали в файл')
    # with open('message_test.html', 'w', encoding='utf-8') as file:
    #     file.write(reject_topic + '\n')
    #     file.write(reject_message)
    send_msg = requests.post('https://toloka.dev/api/v1/message-threads/compose', headers=HEADERS,
                             json=message_body).json()

    if 'created' in send_msg:
        print('Отправили сообщение')
    else:
        print('Сообщение не отправлено: ', send_msg)
        need_manual_writer(f"{assignment_id}\tотправить сообщение\n")

#create html message
def reject_reasons_for_html_maker(image_and_reject_reason_dict: dict,
                                  refusal_reassons_column: str,
                                  assignment_data: toloka.Assignment) -> str:
    reject_reasons_for_html = ''
    for key, value in image_and_reject_reason_dict.items():
        phrase = key
        reject_reasons = value
        for reject_reason in reject_reasons:
            print(str(float(reject_reason)))
            # print(working_excel['refusal_reasons_number'].apply(str))
            print(str(float(reject_reason)).replace('.0', ''))
            print(refusal_reassons_column)
            reject_reason_text = working_excel.loc[working_excel['refusal_reasons_number'].apply(str) == str(float(reject_reason)).replace('.0', ''), refusal_reassons_column].values[0]
            # print(reject_reason_text)
            reject_reason_for_html = '''
            <li style="margin-top: 0cm; margin-right: 0cm; margin-bottom: 8pt; line-height: normal;
             font-size: 15px; font-family: Calibri, sans-serif; background: white;">
             <strong>
             <span style='font-size:16px;font-family:"Arial",sans-serif;color:#141824;'>''' \
                                     + reject_reason_text + f' (фото: {phrase})' '''</span></strong></li>'''
            reject_reasons_for_html += reject_reason_for_html
    return reject_reasons_for_html

#reject_set
def reject_set(assignment_id: str) -> None:
    # print('Типо отклонили сет')
    toloka_client.reject_assignment(assignment_id=assignment_id, public_comment='Есть ошибки')
    print('Отклонили сет ', assignment_id)


#download all audio files
def download_data(assignment_data: toloka.Assignment,
                    check_working_df:pd.DataFrame) -> None:
    assignment_id = assignment_data.id
    json_data = {'age': None, 'gender': None}
    # print(assignment_data)
    cur_dir = os.path.join('bald_sets', date, assignment_id)
    try:
        os.makedirs(cur_dir)
        solution = assignment_data.solutions[0]
        img_num = 1
        for toloka_output_name in solution.output_values:
            if 'img' in toloka_output_name:
                filepath = os.path.join(cur_dir, f'{assignment_id}_{img_num}.jpg')
                with open(filepath, 'wb') as out_f:
                    toloka_client.download_attachment(attachment_id=solution.output_values[toloka_output_name], out=out_f)
                img_num += 1
            else:
                json_data[toloka_output_name] = solution.output_values[toloka_output_name]

        with open(os.path.join(cur_dir, f'{assignment_id}.json'), 'w') as json_file:
            json.dump(json_data, json_file)

    except Exception as e:
        print('Не удалось скачать фото')
        error_writer(f"{assignment_id}\t{e}\n")


#assignment_id from assignment_link maker
def get_assignment_id_from_link(assignment_link: str) -> str:
    assignment_id = assignment_link.split('assignments/')[1].split('?')[0]
    return assignment_id


def main():
    for assignment_id in working_excel['assignment_id'].dropna():
        assignment_cell_in_excel = assignment_id
        if 'http' in assignment_id:
            assignment_id = get_assignment_id_from_link(assignment_id)
        print('Обрабатываем сет: ', assignment_id)
        try:
            assignment_data = get_assignment_data(assignment_id)
            pool_data = get_pool_data(pool_id=assignment_data.pool_id)
            project_id = pool_data.project_id
            worker_id = assignment_data.user_id
            worker_data = get_worker_data(worker_id)

            worker_language, reject_message, reject_topic, refusal_reassons_column = language_select()

            check_working_df = working_excel.loc[working_excel['assignment_id'] == assignment_cell_in_excel].loc[:, 'img_up':'img_right']

            if "+" in check_working_df.values or 'send' in check_working_df.values:
                assignment_link = f'https://platform.toloka.ai/requester/project/{project_id}/pool/{assignment_data.pool_id}/assignments/{assignment_id}?direction=ASC'
                print(assignment_link)
                if '+' in check_working_df.values:
                    print('Начинаем скачивать сет')
                    download_data(assignment_data, check_working_df)

            else:
                if str(assignment_data.status) == 'Status.SUBMITTED':
                    print('Начинаем отклонение сета')
                    reject_set(assignment_id)
                    if not '404' in check_working_df.values:
                        message_send(assignment_data, reject_message, reject_topic, worker_id, refusal_reassons_column, check_working_df)
                    else:
                        print('Сообщение не отправляем')
                        print('Навык не выдаем')

                else:
                    print('У сета уже другой статус: ', assignment_data.status, ', пропускаем')

            print('-' * 50)

        except toloka.exceptions.DoesNotExistApiError:
            print(assignment_id, ' - такого сета не найдено')
        except Exception as e:
            error_message = traceback.format_exc()
            error_writer(f"{assignment_id}\t{error_message}\n")
            print(assignment_id, 'ошибка - прописана в файле')


if __name__ == '__main__':
    main()

Обрабатываем сет:  00025b8fd7--6482e4b3b41b45475d74b5c1
У сета уже другой статус:  Status.REJECTED , пропускаем
--------------------------------------------------
Обрабатываем сет:  00025b8fd7--6482e601ad39145d8bdb4bde
У сета уже другой статус:  Status.REJECTED , пропускаем
--------------------------------------------------
Обрабатываем сет:  00025b8fd7--6482eaa0f84f6d54ffc3ace2
У сета уже другой статус:  Status.REJECTED , пропускаем
--------------------------------------------------
Обрабатываем сет:  00025b8fd7--6482ea2eb41b45475d75c9ad
У сета уже другой статус:  Status.REJECTED , пропускаем
--------------------------------------------------
