## Data

In [14]:
import pandas as pd

source_language = "English"

# a TSV file with two columns - 'code' and 'term' (in the source language) is expected 
df = pd.read_csv('./sample_data/formatted_example_for_translation.tsv', sep='\t', encoding='utf8', dtype=str, names=['code', 'term'])

In [15]:
df

Unnamed: 0,code,term
0,88400008,neoplastic cells
1,95550001,peritoneal thickening
2,135819008,very poor general condition
3,72885007,neutropenic
4,225581002,progressive loss of vision
5,231911000,superficial vascularisation


## Translation

In [None]:
from openai import OpenAI
import getpass

api_key = getpass.getpass('Enter your OpenAI API key:')

model = 'gpt-3.5-turbo-0125'
client = OpenAI(api_key=api_key)
output_path = './'

In [None]:
def infer(messages):
 return client.chat.completions.create(
      model=model,
      messages=messages,
      temperature=0.3,
      max_tokens=4095,
      top_p=1,
      frequency_penalty=0,
      presence_penalty=0
    ).choices[0].message.content

In [None]:
def format_user_message(message):
    return {
        'role': 'user',
        'content': message
    }

In [None]:
def get_prompt(source_term):    
    return f"""You are the AI assistant of a medical doctor. Given a medical term in {source_language}, translate it into the corresponding Spanish medical term. Return only the translated Spanish term.
```{source_term}```"""

In [None]:
print(get_prompt(f'Some {source_language} term'))

In [None]:
def translate(source_term):
    print(f'Translating {source_term}')
    result = infer([format_user_message(get_prompt(source_term))])
    return result.strip('`')

In [None]:
df['translated_es'] = df['term'].transform(translate)

In [None]:
source_language_lower = source_language.lower()
df.to_csv(f'symptemist_task3_{source_language_lower}_test_translated.tsv', sep='\t', encoding='utf8', index=False)

# formatted - ready to be processed by the cross-lingual SapBERT
df[['code', 'translated_es']].to_csv(f'symptemist_task3_{source_language_lower}_test_translated_formatted.tsv', sep='\t', encoding='utf8', header=False, index=False)