# Watson Assistant Translation Notebook 
by: Pratyush Singh

# Watson Assistant Skill Translation 

## Installation 

In [2]:
# After running this cell once, comment out the following code. Packages only need to be installed once.
!pip install --user --upgrade -q "ibm-watson";
!pip install --user --upgrade -q "bokeh==2.0.0";
!pip install --user --upgrade -q "pandas==1.0.1";
!pip install --user --upgrade -q "tqdm==4.43.0";



In [3]:
import ibm_boto3
from ibm_botocore.client import Config, ClientError


In [4]:
# Import required libraries
import pandas as pd
import json
import numpy as np
import re
from tqdm import tqdm 

from IPython.display import display
from ibm_cloud_sdk_core.authenticators import BasicAuthenticator
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson import LanguageTranslatorV3
from ibm_watson import AssistantV1


## Authentication

In [5]:
authenticator = BasicAuthenticator('apikey', '<API_KEY_WATSON_ASSISTANT>')
sdk_object = AssistantV1(version='2020-02-05', authenticator=authenticator)
sdk_object.set_service_url('<SERVICE_URL>')

In [1]:
assistant_information = {'workspace_id' : '<WORKSPACE_ID>',
                         'assistant_id' : '<ASSISTANT_ID>'}

In [None]:
lt_authenticator = IAMAuthenticator('<LANGUAGE_TRANSLATOR_API_KEY')
language_translator = LanguageTranslatorV3(version='2020-04-01', authenticator=lt_authenticator)

language_translator.set_service_url('<LANGUAGE_TRANSLATOR_URL>')

In [8]:
def translate(text:str) -> str:
    """ Translates the text into Spanish
    
    Calls the Watson Language Translator Service to translate the input text.
    
    Args: 
    - text: the input text
    
    Returns:
    - translate: the translated output
    """
    if not len(text):
        return text
    
    result = language_translator.translate(text=text, model_id='en-es').get_result()
    translate = result['translations'][0]['translation']
    
    return translate

## Intent Translation

### Get the Intents and Training Examples

In [9]:
response = sdk_object.list_intents(workspace_id=assistant_information['workspace_id']).get_result() #handle pagination
intents = [intent['intent'] for intent in response['intents']]

In [10]:
def _get_intent_examples(intent:str) -> str:
    """ Retrieves the intent examples that will be translated
    
    The intents for the Watson Assistant are retreived in the 
    previous cell. This function takes each intent and gets the examples. 
    It transforms the list of examples into a string seperated by commas
    
    Args:
    - intent: the intent to retrieve the examples for
    
    Returns:
    - examples_list: string of the examples joined together with a comma
    
    """
    examples = sdk_object.list_examples(workspace_id=assistant_information['workspace_id'], 
                                        intent=intent).get_result()
    examples = examples['examples']
    
    examples_list = [example['text'].replace(',', ' ') for example in examples]
    examples_list = ','.join(examples_list)
    
    return examples_list
    

In [11]:
intents_examples = [_get_intent_examples(intent) for intent in intents]

In [12]:
intent_df = pd.DataFrame({"intents": intents, "examples": intents_examples})
intent_df

Unnamed: 0,intents,examples
0,About_San_Jac,Can you provide me some information about San ...
1,Accessibility_Services,"disability services counselor,Does San Jacinto..."
2,Account_Delinquencies,"I have a delinquent account.,I owe a past due ..."
3,Advising,"advising,Am i required to come in and talk to ..."
4,Apply,"admissions,am i eligible for any scholarships,..."
...,...,...
95,Leadership,"South campus provost,Who are the board members..."
96,Library_Assistance,"article databases,ask a librarian,borrow books..."
97,Locations,"bursaries,location for central campus admissio..."
98,Locations_General_Areas,"Does San Jac have a student center?,I have to ..."


Expands the comma seperated list into individual rows

In [13]:
temp_df = pd.DataFrame(intent_df.examples.str.split(',').tolist(), 
                       index=intent_df.intents).stack()
temp_df = temp_df.reset_index([0, 'intents'])
temp_df.columns = ['intents', 'examples']
intents_df = temp_df.copy()

intents_df.head()

Unnamed: 0,intents,examples
0,About_San_Jac,Can you provide me some information about San ...
1,About_San_Jac,How big is San Jac?
2,About_San_Jac,How does San Jac compare to other colleges?
3,About_San_Jac,How many students are there at the college or ...
4,About_San_Jac,How many students attend San Jacinto College?


### Translate the Intent Examples 

In [14]:
tqdm.pandas()

intents_df['translated_examples'] = intents_df['examples'].progress_apply(lambda x: translate(x))

intents_df.head()

  from pandas import Panel
100%|██████████| 1606/1606 [01:46<00:00, 15.13it/s]


Unnamed: 0,intents,examples,translated_examples
0,About_San_Jac,Can you provide me some information about San ...,¿Me puede proporcionar alguna información sobr...
1,About_San_Jac,How big is San Jac?,¿Qué tan grande es San Jac?
2,About_San_Jac,How does San Jac compare to other colleges?,¿Cómo se compara San Jac con otros colegios?
3,About_San_Jac,How many students are there at the college or ...,¿Cuántos estudiantes hay en el colegio o en el...
4,About_San_Jac,How many students attend San Jacinto College?,¿Cuántos estudiantes asisten a San Jacinto Col...


In [15]:
translated_intents = intents_df[['translated_examples', 'intents']]

## Translate Entities

### Retrieve Entities

In [16]:
def get_entities() -> []:
    """ Retrieves the entities for the assistant
    """
    response = sdk_object.list_entities(workspace_id=assistant_information['workspace_id'], 
                                        export=True).get_result()
    
    return response['entities']

In [17]:
entities = get_entities()

In [18]:
entity_names = [entity['entity'] for entity in entities for value in entity['values'] if value['type'] == 'synonyms'] # retrieve the entity snyonyms 
not_translated = [entity['entity'] for entity in entities for value in entity['values'] if value['type'] != 'synonyms'] # keep track of the non translated entities (these are the sys entities and contextual)
values = [value['value'] for entity in entities for value in entity['values'] if value['type'] == 'synonyms'] # extract the value of the entities
synonyms = [','.join(value['synonyms']) for entity in entities for value in entity['values'] if value['type'] == 'synonyms'] # join the synonyms together in a list

In [19]:
entities_df = pd.DataFrame({"entity": entity_names, "values": values, "synonyms": synonyms})
entities_df.head()

Unnamed: 0,entity,values,synonyms
0,App,SJC,
1,App,Blackboard,
2,App,Safety Center,
3,App,Send Word Now,
4,Area,Restrooms,"Toilets,Men's Room,Women's Room,Rest Rooms,Lat..."


In [20]:
project.save_data(file_name="Entities_English.csv", data=entities_df.to_csv(header=True, index=False, encoding='utf-8'), overwrite=True);

In [21]:
entity_values_df = entities_df['synonyms'].str.split(',', expand=True) # tran
entity_values_df_t = entity_values_df.T
entity_values_translated_t = entity_values_df_t.applymap(lambda x: translate(x) if x is not None else x) # applyMap applies the translate function to every synonym in the DataFrame

In [22]:
entity_values_translated_t.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,345,346,347,348,349,350,351,352,353,354
0,,,,,Aseos,,,Sala de pesas,Edificio de estudiantes,,...,Plazo de pago,,Registro,,,,,,No,Sí
1,,,,,Sala de hombres,,,Sala de trabajo,Edificio de la Unión,,...,Plazo de inscripción,,Registro,,,,,,N,Y
2,,,,,Sala de Mujeres,,,Sala Cardio,SUB,,...,,,Inscríbase en el,,,,,,Nope,Sí
3,,,,,Salas de descanso,,,Gimnasio,,,...,,,Inscríbase,,,,,,Nada,Yep
4,,,,,Letrina,,,Tribunal de baloncesto,,,...,,,,,,,,,anterior 1980,OK


In [23]:
entity_values_translated = entity_values_translated_t.T
entity_values_translated.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,Aseos,Sala de hombres,Sala de Mujeres,Salas de descanso,Letrina,Commode,Sala de Niños,Habitación para niñas,Sala de baño,cuarto de baño,...,caca,Lav,dump,Polvo,pee pee,dookey,Tomar una fuga,vaciar mi vejiga,take a crap,la cabeza


In [24]:
entity_values_translated.insert(loc=0, column='entities', value=entities_df['entity'])
entity_values_translated.insert(loc=1, column='values', value=entities_df['values'])
entity_values_translated.head()

Unnamed: 0,entities,values,0,1,2,3,4,5,6,7,...,12,13,14,15,16,17,18,19,20,21
0,App,SJC,,,,,,,,,...,,,,,,,,,,
1,App,Blackboard,,,,,,,,,...,,,,,,,,,,
2,App,Safety Center,,,,,,,,,...,,,,,,,,,,
3,App,Send Word Now,,,,,,,,,...,,,,,,,,,,
4,Area,Restrooms,Aseos,Sala de hombres,Sala de Mujeres,Salas de descanso,Letrina,Commode,Sala de Niños,Habitación para niñas,...,caca,Lav,dump,Polvo,pee pee,dookey,Tomar una fuga,vaciar mi vejiga,take a crap,la cabeza


In [25]:
entity_values_translated['values'] = entity_values_translated['values'].progress_apply(lambda x: translate(x)) # translate the values

100%|██████████| 355/355 [00:22<00:00, 16.02it/s]


## Translate Dialog Nodes

This step is only section that requires a manual upload of a Data Asset. 

1. Import the skill into the workspace by clicking on *insert credentials* -- This will auto-populate the cell below with details of your COS instance
2. Next, navigate to your COS bucket where this Notebook is stored and retrieve the resource CRN.
3. In the cell below, change the variable to the name of your skill. It is best to keep the same name everytime.

In [26]:
SKILL_NAME = ""

In [27]:

# @hidden_cell
# The following code contains the credentials for a file in your IBM Cloud Object Storage.
# You might want to remove those credentials before you share your notebook.
credentials_1 = {
    'IAM_SERVICE_ID': '',
    'IBM_API_KEY_ID': '',
    'ENDPOINT': '',
    'IBM_AUTH_ENDPOINT': 'https://iam.cloud.ibm.com/oidc/token',
    'BUCKET': '',
    'FILE': SKILL_NAME
}

COS_RESOURCE_CRN = ""

cos = ibm_boto3.resource("s3",
    ibm_api_key_id=credentials_1['IBM_API_KEY_ID'],
    ibm_service_instance_id=COS_RESOURCE_CRN,
    ibm_auth_endpoint=credentials_1['IBM_AUTH_ENDPOINT'],
    config=Config(signature_version="oauth"),
    endpoint_url=credentials_1['ENDPOINT']
)

In [28]:
def get_bucket_contents(bucket_name:str, _filter:str) -> []:
    """Retrieves the skill from the COS. 
    This function searches the bucket until the file name is met.
    This function does NOT get the contents of the file, only the file path
    
    Args:
    bucket_name - name of bucket where the notebook and all data assets are stored
    _filter - name of the file
    """
    print("Retrieving bucket contents from: {0}".format(bucket_name))
    try:
        files = cos.Bucket(bucket_name).objects.all()
        logs = [file.key for file in tqdm(files, desc="Retrieving Logs from COS", 
                                          position=0, leave=True) if _filter in file.key]
        
        return logs
    
    except ClientError as be:
        print("CLIENT ERROR: {0}\n".format(be))
    except Exception as e:
        print("Unable to retrieve bucket contents: {0}".format(e))

In [29]:
skill_file = get_bucket_contents("notebookprodforaskjac-donotdelete-pr-dh0piajhgp2k3g", SKILL_NAME)[0]

Retrieving Logs from COS: 0it [00:00, ?it/s]

Retrieving bucket contents from: notebookprodforaskjac-donotdelete-pr-dh0piajhgp2k3g


Retrieving Logs from COS: 68it [00:00, 79.47it/s]


In [30]:
def get_item(bucket_name, item_name):
    "Retrieves the contents of the file"
    try:
        file = cos.Object(bucket_name, item_name).get()
        file = file['Body'].read()
        file = file.decode("utf-8")
        file = json.loads(file)
        
        return file
    
    except ClientError as be:
        print("CLIENT ERROR: {0}\n".format(be))
    except Exception as e:
        print("Unable to retrieve file contents: {0}".format(e))


In [31]:
skill_json = get_item("", skill_file)

In [32]:
def replace_values(obj, key):
    """Pull all values of specified key from nested JSON."""
    text_dict = dict()

    def replace(obj, arr, key):
        """Recursively search for values of key in JSON tree."""
        if isinstance(obj, dict):
            for k, v in obj.items():
                if k == 'options': # check for options first because we do not want to recursively translate any thing else
                    for idx, option in enumerate(v):
                        label = option['label']
                        label_trans  = translate(label)
                        option['label'] = str(label_trans)
                        v[idx] = option
                        obj[k] = v
                    continue
                if isinstance(v, (dict, list)):
                    replace(v, arr, key)
                elif k == 'title':
                    sen = translate(v)
                    obj[k] = str(sen)
                elif k == key:
                    sen = translate(v)
                    obj[k] = str(sen) # exchange/replace values. TODO: need to implement key doesn't exist case
        elif isinstance(obj, list):
            for item in obj:
                replace(item, arr, key)
        return text_dict

    results = replace(obj, text_dict, key)
    
    return results, obj  # Obj is added to be able to see dialog_nodes JSON result


In [33]:
result = replace_values(skill_json['dialog_nodes'], 'text')

## Save 

In [34]:
project.save_data(file_name="Translated_Skill.json", data=json.dumps(result, indent=2), overwrite=True);
project.save_data(file_name="Translated_Entities.csv", data=entity_values_translated.to_csv(header=False, index=False, encoding='utf-8'), overwrite=True);
project.save_data(file_name='Translated_Intents.csv', data=translated_intents.to_csv(header=False, index=False, encoding='utf-8'), overwrite=True);

In [35]:
from datetime import datetime as dt
today = dt.today().strftime("%y-%m-%d")

skill_file = f"Translated_Skill_{today}.json"
entity_file = f"Translated_Entities_{today}.csv"
intents_file = f"Translated_Intents_{today}.csv"
zip_file = f"Translated_Results_{today}.zip"

with open(skill_file, 'w') as outfile:
    json.dump(skill_json, outfile, indent=2)

entity_values_translated.to_csv(entity_file, header=False, index=False, encoding='utf-8')
translated_intents.to_csv(intents_file, header=False, index=False, encoding='utf-8')

In [36]:
from zipfile import ZipFile

with ZipFile(zip_file, 'w') as zip: 
        # writing each file one by one 
        for file in [skill_file, entity_file, intents_file]: 
            zip.write(file) 

In [37]:
import os
def copy_python_env_file_to_project_asset(project=project, python_env_source_dir='python environemnt source directory', python_env_filename='python environment file name'):
    filename_with_path=python_env_source_dir+'/'+python_env_filename
    print('Source directory listing: ')
    print(os.listdir(python_env_source_dir))
    print('File: ')
    print(os.stat(filename_with_path))
    print('Copying file {0} from python environment to project assets'.format(python_env_filename))
    file_data=open(filename_with_path, 'rb')
    project.save_data(data=file_data.read(),file_name=python_env_filename,overwrite=True)
    file_data.close() 
    print('Done.')

In [38]:
copy_python_env_file_to_project_asset(project=project, python_env_source_dir='.', python_env_filename=zip_file)

Source directory listing: 
['Translated_Skill_21-01-13.json', 'Translated_Intents_21-01-13.csv', 'Translated_Entities_21-01-13.csv', 'Translated_Results_21-01-13.zip']
File: 
os.stat_result(st_mode=33184, st_ino=922964, st_dev=1048588, st_nlink=1, st_uid=1000, st_gid=2000, st_size=2015613, st_atime=1610571221, st_mtime=1610571221, st_ctime=1610571221)
Copying file Translated_Results_21-01-13.zip from python environment to project assets
Done.
