<a href="https://colab.research.google.com/github/parus-cristatus/tolokapizza/blob/main/tolokaapi/project_transfer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import json
from tqdm import tqdm
from typing import Generator, Tuple, List

In [None]:
SANDBOX_ENDPOINT = 'https://sandbox.toloka.dev/api/v1/'
PROD_ENDPOINT = 'https://toloka.dev/api/v1/'

In [None]:
SANDBOX_TOKEN = 'xxxxxxxxxxxx'
PROD_TOKEN = 'xxxxxxxxxxxx'

In [None]:
PROJECT_ID = '108202'

In [None]:
def get_call(endpoint: str, call: str, token: str, params: dict=None) -> dict:
    """Returns json data from get request.
    
    Arguments:
        call: parameter
        token: toloka token (sandbox or prod)
        endpoint: endpoint (sandbox or prod)
        params: get call params
    
    Examples:
        >>> endpoint_from = 'https://toloka.dev/api/v1/'
        >>> token_from = 'KpC_UWW4h17E...'
        >>> get_call('projects', token_from, endpoint_from, {'limit': 100})
    """
    headers = {
        "Authorization": "OAuth " + token
    }
    response = requests.get(url=f"{endpoint}{call}", headers=headers, params=params)
    return response.json()

In [None]:
def post_call(endpoint: str, call: str, token: str, data: str) -> dict:
    """Returns json data from post request.
    
    Arguments:
        call: parameter
        endpoint: endpoint (sandbox or prod)
        token: toloka token (sandbox or prod)
        data: data to send in the body of the request
    
    Examples:
        >>> endpoint_to = 'https://sandbox.toloka.dev/api/v1/'
        >>> token_to = 'HQGC_xPF6W...'
        >>> post_call('projects', endpoint_to, token_to, json.dumps(project_from))
    """
    headers={
        "Authorization": "OAuth " + token,
        "Content-Type": "application/JSON"
    }
    response = requests.post(f"{endpoint}{call}", data=data, headers=headers)
    return response.json()

In [None]:
def get_values_from_dict(keys: list, data: dict) -> Generator[List[str], None, None]:
    """Returns generator
    
    Arguments:
        keys: list keys by which looking for values
        data: dict from which get the values

    Examples:
        >>> get_values_from_dict(['key_id','skill_id'], data)
    """
    if isinstance(data, dict):
        for k, v in data.items():
            if k in keys and v.isdigit():
                yield v
            if isinstance(v, (dict, list)):
                yield from get_values_from_dict(keys, v)
    elif isinstance(data, list):
        for i in data:
            if i.get('key') in keys:
                yield i.get('value')
            yield from get_values_from_dict(keys, i)

In [None]:
def deep_upd_dict(data: dict, values: dict) -> dict:
    """Returns dict with updated values.
    
    Arguments:
        data: dict in which to update the values
        values: dict with new values

    Examples:
        >>> data = {"filter": {'or': [{'category': 'skill', 'key': '29716', 'operator': 'NE'}]}}
        >>> values = {'29716': '11659'}
        >>> deep_upd_dict(data, values)
    """
    if isinstance(data, dict):
        return {k: deep_upd_dict(v, values) for k, v in data.items()}
    elif isinstance(data, list):
        return [deep_upd_dict(x, values) for x in data]
    else:
        return values.get(data, data)

In [None]:
def get_pools(prj_id: str, status: str, endpoint: str, token: str) -> list:
    """Returns list of pools
    
    Arguments:
        prj_id: project id
        status: pool status (OPEN, CLOSED, ARCHIVED)
        endpoint: endpoint (sandbox or prod)
        token: toloka token (sandbox or prod)
    """
    params = {
            'project_id': prj_id,
            'status': status.upper()
        }
    return get_call(endpoint, 'pools', token, params)['items']

In [None]:
def get_skill(endpoint: str, skill_id: str, token: str) -> dict:
    """Returns dict with data the requested skill

    Arguments:
        endpoint: endpoint (sandbox or prod)
        skill_id: skill id
        token: toloka token (sandbox or prod)
    """
    return get_call(endpoint, f'skills/{skill_id}', token)

In [None]:
def create_skill(name: str, endpoint: str, token: str) -> dict:
    """Returns dict with info about the created skill
    
    Arguments:
        name: skill name
        endpoint: endpoint (sandbox or prod)
        token: toloka token (sandbox or prod)
    """
    data = {"name": f"{name}"}
    return post_call(endpoint, 'skills', token, data=json.dumps(data))

In [None]:
# Get project from production
project_from = get_call(PROD_ENDPOINT, f"projects/{PROJECT_ID}", PROD_TOKEN)
# Create a project in Sandbox
project_to = post_call(SANDBOX_ENDPOINT, 'projects', SANDBOX_TOKEN, data=json.dumps(project_from))

In [None]:
# Get all closed pools from production
pool_list = get_pools(PROJECT_ID, 'CLOSED', PROD_ENDPOINT, PROD_TOKEN)

# Get the pool to export
interest_pool = pool_list[2]

# Change the pool's project_id property
interest_pool['project_id'] = project_to['id']

# Delete the pool id
interest_pool.pop('id')

In [None]:
# Create training if exists
if interest_pool['quality_control'].get('training_requirement'):
    training_pool_id = interest_pool['quality_control'].get('training_requirement')['training_pool_id']
    
    prod_training = get_call(PROD_ENDPOINT, f"trainings/{training_pool_id}", PROD_TOKEN)
    prod_training.pop('id')
    prod_training['project_id'] = project_to['id']

    sandbox_training = post_call(SANDBOX_ENDPOINT, 'trainings', SANDBOX_TOKEN, data=json.dumps(prod_training))
    interest_pool['quality_control']['training_requirement']['training_pool_id'] = sandbox_training['id']

In [None]:
# Get all skills ids
skill_ids = set(get_values_from_dict(['key', 'answer_weight_skill_id', 'skill_id'], interest_pool))
skill_ids

In [None]:
# Create skills with the same name in sandbox
new_val = {}
for i in tqdm(skill_ids):
    skill = get_skill(PROD_ENDPOINT, i, PROD_TOKEN)
    new_val[i] = create_skill(skill['name'], SANDBOX_ENDPOINT, SANDBOX_TOKEN)['id']

In [None]:
# deep pool update
upd_pool = deep_upd_dict(interest_pool, new_val)

In [None]:
# Create a pool in sandbox
post_call(SANDBOX_ENDPOINT, 'pools', SANDBOX_TOKEN, data=json.dumps(upd_pool))