In [1]:
import requests
import ruamel.yaml as yaml
import json
import os
import sys
from typing import List, Tuple, Dict, Union, Optional, Any

In [None]:
credentials_path = 'memsource_credentials.yml'

In [5]:
#For .ipynb notebook
project_name = "NLLB eval workflow test 3"

In [None]:
#For .py script
project_name = sys.argv[1]
print('project', project_name)

### 1- authenticate

In [6]:
def load_credentials(memsource_credentials):
    with open(memsource_credentials, "r") as f:
        data = yaml.safe_load(f)
        return (data["memsource_username"], data["memsource_password"])

In [7]:
def api_get_token(username, password):
    assert isinstance(username, str)
    assert isinstance(password, str)

    res = requests.post(
        "https://cloud.memsource.com/web/api2/v1/auth/login",
        json={"userName": username, "password": password},
    )

    return res.json()["token"]


def authenticate(memsource_credentials):
    return api_get_token(*load_credentials(memsource_credentials))

def arg_checker(
    acceptable: Union[List[Tuple[str, str]], List[str]],
    **kwargs
) -> bool:

    ref = {}
    for x in acceptable:
        if not isinstance(x, tuple):
            x = {x: object}
        else:
            x = {x[0]: x[1]}
        ref.update(**x)

    for key, val in kwargs.items():
        try:
            assert key in ref
        except AssertionError:
            logger.error(f"Wrong argument name: {key} is not in {ref.keys()}")
            raise

        try:
            assert isinstance(val, ref[key])
        except AssertionError:
            logger.error(
                f"Wrong argument type: {key} = {val} should be of type {ref[key]}"
            )
            raise

    return True

def check_status(
    res: requests.Response,
    element: str,
    **kwargs):
    if res.ok:
        return res.json()
    else:
        if res.status_code == 404:
            print(f"Could not find {element}: parameter: `{kwargs}`")
        else:
            print(f"Problem with the request: parameters: `{kwargs}`")
        return False

In [8]:
authentication_token = authenticate(credentials_path)
print("token", authentication_token)

token 5Z1y87rQvyY1A4dOfs2EUgN1TcnDeSARodzadTHu0udQ3O02OIVLypJv8U3DtVqln


### 1.5 - Get project uid

In [9]:
def api_list_projects(token, **kwargs):

    res = requests.get(
        f"https://cloud.memsource.com/web/api2/v1/projects",
        params={"token": token, **kwargs},
    )

    return res.json()

def get_project_uid_from_name(project_name, token):
    projects_info = api_list_projects(token)
    if 'content' in projects_info:
        for p in projects_info['content']:
            if p['name'] == project_name:
                return p['uid']
    else:
        print(projects_info['errorDescription'])
    
    return None

In [11]:
project_uid = get_project_uid_from_name("NLLB eval workflow test 3", authentication_token)
print('project_uid', project_uid)

project_uid 11Q1aqH4FuqRE9H1F0DsD61


### 2- List jobs of project

In [12]:
def api_list_jobs(project_uid, token, **kwargs):
    assert arg_checker(
        acceptable=[
            ("pageNumber", int),
            ("pageSize", int),
            ("count", int),
            ("workflowLevel", int),
            ("status", str),
            ("targetLang", str),
        ],
        **kwargs,
    )

    res = requests.get(
        f"https://cloud.memsource.com/web/api2/v2/projects/{project_uid}/jobs",
        params={"token": token, **kwargs},
    )

    return check_status(res, "jobs", **kwargs)

def get_completed_job_ids(job_list):
    return [job['uid'] for job in job_list['content'] if job['status'] == "COMPLETED"]

def get_filename_of_job(job_uid, job_list):
    for job in job_list['content']:
        if job['uid'] == job_uid:
            return job['filename']
    return None

In [25]:
#DOUBT
#I get the same job ids for all workflow levels
#How to specify later doing the analysis on MT or Tr?

workflow_level = 3 # 1-MT, 2-Tr, 3-Rv
job_list = api_list_jobs(project_uid, authentication_token, workflowLevel=3)
complete_jobs = get_completed_job_ids(job_list)
print('jobs', [j['uid'] for j in job_list['content']])
print('complete', complete_jobs)

jobs ['CxkZxeg5e2H2bFkCapu017', '3VFwgmpxss8r3pe7qVxU81']
complete ['CxkZxeg5e2H2bFkCapu017', '3VFwgmpxss8r3pe7qVxU81']


### 3 - Create analyses

In [39]:
def api_post_create_analysis(token, job_uid_list, analysis_type, countSourceUnits, analysis_name, compareWorkflowLevel=None):
    if analysis_type == "Compare":
        json={"jobs":[{"uid":uid} for uid in job_uid_list], 
              "type":analysis_type, "countSourceUnits":countSourceUnits, 
              "name":analysis_name, "compareWorkflowLevel":compareWorkflowLevel}
    else:
        json={"jobs":[{"uid":uid} for uid in job_uid_list], 
              "type":analysis_type, "countSourceUnits":countSourceUnits, 
              "name":analysis_name}
    
    res = requests.post(
        "https://cloud.memsource.com/web/api2/v1/analyses",
        params={"token": token},
        json=json,
    )

    return res.json()

In [26]:
#For .ipynb (take the first complete job)
job_id = complete_jobs[1]

In [None]:
#for .py script
for job_id in complete_jobs:
    #indent cells after

In [27]:
doc_id = get_filename_of_job(job_id, job_list)
print('job', job_id, doc_id)

job 3VFwgmpxss8r3pe7qVxU81 Word file.docx


In [21]:
#Pre-analysis on MT
analysis_type = "PreAnalyse"
pre_analysis_name = f"TWBData-{analysis_type}-{job_id}-{doc_id}"
res = api_post_create_analysis(authentication_token, complete_jobs, analysis_type="PreAnalyse", 
                         countSourceUnits=False, analysis_name=pre_analysis_name)
print(pre_analysis_name)
print(res)

TWBData-PreAnalyse-CxkZxeg5e2H2bFkCapu017-testdoc4.odt
{'asyncRequest': {'action': 'PRE_ANALYSE', 'asyncResponse': None, 'createdBy': {'userName': 'alp.oktem', 'uid': 'NlFL3FLGprpWblWU7gWOb3', 'id': '819479', 'firstName': 'Alp', 'lastName': 'Oktem', 'role': 'PROJECT_MANAGER', 'email': 'alp.oktem@clearglobal.org'}, 'dateCreated': '2022-11-25T18:31:06+0000', 'id': '837754006', 'project': {'uid': '11Q1aqH4FuqRE9H1F0DsD61', 'name': 'NLLB eval workflow test 3'}, 'parent': None}, 'analyse': {'id': '117601653'}}


In [37]:
#Post-editing analysis on (post-edited) Translation
analysis_type = "PostAnalyse"
post_analysis_name = f"TWBData-{analysis_type}-{job_id}-{doc_id}"
res = api_post_create_analysis(authentication_token, [job_id], analysis_type=analysis_type, 
                         countSourceUnits=False, analysis_name=post_analysis_name)
print(post_analysis_name)
print(res)

TWBData-PostAnalyse-3VFwgmpxss8r3pe7qVxU81-Word file.docx
{'asyncRequest': {'action': 'POST_ANALYSE', 'asyncResponse': None, 'createdBy': {'userName': 'alp.oktem', 'uid': 'NlFL3FLGprpWblWU7gWOb3', 'id': '819479', 'firstName': 'Alp', 'lastName': 'Oktem', 'role': 'PROJECT_MANAGER', 'email': 'alp.oktem@clearglobal.org'}, 'dateCreated': '2022-11-25T18:48:30+0000', 'id': '837758316', 'project': {'uid': '11Q1aqH4FuqRE9H1F0DsD61', 'name': 'NLLB eval workflow test 3'}, 'parent': None}, 'analyse': {'id': '117602866'}}


In [None]:
#Post editing analysis on revision 
#(How to specify it? Job ids are all the same)


In [40]:
#Compare analysis  
#What should compareWorkflowLevel be?
analysis_type = "Compare"
post_analysis_name = f"TWBData-{analysis_type}-{job_id}-{doc_id}"
res = api_post_create_analysis(authentication_token, [job_id], analysis_type=analysis_type, 
                               countSourceUnits=False, analysis_name=post_analysis_name,
                               compareWorkflowLevel=1)
print(post_analysis_name)
print(res)

TWBData-Compare-3VFwgmpxss8r3pe7qVxU81-Word file.docx
{'asyncRequest': {'action': 'COMPARE_ANALYSE', 'asyncResponse': None, 'createdBy': {'userName': 'alp.oktem', 'uid': 'NlFL3FLGprpWblWU7gWOb3', 'id': '819479', 'firstName': 'Alp', 'lastName': 'Oktem', 'role': 'PROJECT_MANAGER', 'email': 'alp.oktem@clearglobal.org'}, 'dateCreated': '2022-11-25T18:49:18+0000', 'id': '837758411', 'project': {'uid': '11Q1aqH4FuqRE9H1F0DsD61', 'name': 'NLLB eval workflow test 3'}, 'parent': None}, 'analyse': {'id': '117602894'}}


### 4 - List analyses

In [None]:
def api_list_analyses(token, project_uid, job_uid):

    res = requests.get(
        f"https://cloud.memsource.com/web/api2/v3/projects/{project_uid}/jobs/{job_uid}/analyses",
        params={"token": token},
    )

    return res.json()

def get_analyseuid_from_name(analysis_name, analysis_list):
    #Warning: returns first analysis with name. There might be multiplicates
    for ana in analysis_list['content']: 
        if ana['name'] == analysis_name:
            return ana['uid']

In [None]:
analysis_list = api_list_analyses(authentication_token, project_uid, job_id)

In [None]:
print('ANALYSES')
for aname in [a['name'] for a in analysis_list['content']]:
    print(aname)

In [None]:
#get_analyseuid_from_name(analysis_name, analysis_list)

### 5- Get analysis result

In [None]:
def api_get_analysis(token, analyse_uid):

    res = requests.get(
        f"https://cloud.memsource.com/web/api2/v3/analyses/{analyse_uid}",
        params={"token": token},
    )

    return res.json()

In [None]:
pre_analyse_id = get_analyseuid_from_name(pre_analysis_name, analysis_list)
pre_analysis_result = api_get_analysis(authentication_token, pre_analyse_id)

In [None]:
pre_analysis_out = os.path.join('results', pre_analysis_name + '.json')

with open(pre_analysis_out, 'w') as f:
    f.write(json.dumps(pre_analysis_result, indent=4))

In [None]:
post_analyse_id = get_analyseuid_from_name(post_analysis_name, analysis_list)
post_analysis_result = api_get_analysis(authentication_token, post_analyse_id)

In [None]:
post_analysis_out = os.path.join('results', post_analysis_name + '.json')

with open(post_analysis_out, 'w') as f:
    f.write(json.dumps(post_analysis_result, indent=4))

### 6- Interpret results

- Translation edit percentage from PE analysis on translation
- Accumulative edit percentage from PE analysis on revisionm
- Time from PE analysis on revision?