In [None]:
!nvidia-smi

# Common  Utility

In [None]:
!pip install -q python-dotenv

import csv, os, re, json, asyncio
from tqdm import tqdm
from pathlib import Path

HOME_DIR = Path("/content/drive/MyDrive/YOUR_HOME_DIR")
DATA_PATH = HOME_DIR/"data"/"inference_all_aksorn2.csv"

def read_data():
    print("Opening CSV", DATA_PATH)
    with open(DATA_PATH, "r", newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        return  list(reader)

def save_data(data):
    print("Writing CSV", DATA_PATH)
    with  open(DATA_PATH, "w", encoding = "utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames = list(data[0].keys()))
        writer.writeheader()
        for item in data:
            writer.writerow(item)
    print("Fin writing", DATA_PATH)

from google.colab import runtime
def google_terminate():
    runtime.unassign()

from google.colab import drive
def google_mount():
    drive.mount('/content/drive')

def unmask_token(masked_text, mask_dict):
    for real, mask in mask_dict.items():
        masked_text = masked_text.replace(mask, real)
    return masked_text

while not Path(".env").exists():
    input("Put in the .env file")
from dotenv import load_dotenv
load_dotenv()

GCP_CRED_PATH = os.getenv('GCP_CRED_PATH')
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
HF_API_KEY = os.getenv('HF_API_KEY')


CODE_SWITCH_TRANSLATION_PROMPT = r"""
You are a linguist with expertise in medicine and had your training in Thailand.
You are well acquainted to how's Thai MD usually code switched between Thai Language and English when they're communicating medical-related information among each other.
For instance, you never translate the following English medical terms and jargons, symptoms, technical terms, and pharmaceutical terms into Thai.

Hence, task is to examine the medical-related information text input and translated them into Thai with the previously given constraint and information.
"""

MONOLINGUAL_TRANSLATION_PROMPT = r"""Translate the following text input into Thai in Medical Context"""



# How this colab notebook kinda works
- It is important to note that some LLMs are needed to be cloned directly
- This means that to save GPU's memory as much as possible, we gotta **delete sessions and rerun the Notebook**  so as to refresh GPU memory

So the way this notebooks gonna work is
- You runthe commomn utility first
- Then pick which block you want to run next (and it will terminate the session)

#  GPT4  Masking

In [None]:
!pip install -q openai

google_mount()

import openai
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_ASYNC_CLIENT = openai.AsyncClient(api_key=OPENAI_API_KEY)

GPT4_TAG_PROMPT  = r"""
Annotate the medical report with HTML-like tags. The output should start with <annotated> and end with </annotated>.
Use the following tags to annotate the respective terms:
- <patho> for pathological and medical symptoms terms
- <pharm> for pharmaceutical terms and drugs' names
- <taxo> for scientific names and taxonomical-like names
- <anato> for anatomical terms
- <chem> for chemical names
- <med> for medical practices and jargons
FYI:
- Drug names sometimes start with a single charactor followed by full stop then full name. For example: A. Parafivir, B. Paracetamol.
- Anatomical terms must include limbs, organs, cells, and organelle.
"""
all_data = read_data()

def process_mask(text, raw_masked):
    """ given a text and a raw_masked xml, provide a masked text aswell as its masking back"""
    keyword_masks = {}
    xml_split =  re.split(r"(</?\w+>)", raw_masked)
    for token_before, token, token_after in zip(xml_split, xml_split[1:], xml_split[2:]):
        if (
            len(token_before) >  2 and
            len(token_after) == len(token_before) + 1 and
            token_before[0] == "<" and token_before[-1] == ">" and
            token_after[0:2] == "</" and token_after[-1]  == ">"  and
            token_before[1:-1] == token_after[2:-1] and
            token not in keyword_masks
        ):
            keyword_masks[token] = f"<#X{len(keyword_masks)}>"
    keyword_list = list(keyword_masks.keys())
    keyword_list.sort(key=lambda item: len(item), reverse=True)
    for keyword in keyword_list:
        text = text.replace(keyword,  keyword_masks[keyword])
    return text, keyword_masks


async def process_item(item):
    text = item["text"]
    completion = await OPENAI_ASYNC_CLIENT.chat.completions.create(
        model='gpt-4-1106-preview',
        messages=[
            {"role": "system", "content": GPT4_TAG_PROMPT},
            {"role": "user", "content": text.strip()}
        ],
        temperature = 0.1
    )
    raw_masked =  completion.choices[0].message.content
    item["raw_masked"] =  raw_masked
    masked, mask_dict = process_mask(text, raw_masked)
    item["masked"] = masked
    item["mask_dict"] = json.dumps(mask_dict)

CHUNK_SIZE = 60
for i in tqdm(range(0, len(all_data), CHUNK_SIZE)):
    batch = all_data[i: i+CHUNK_SIZE]
    tasks = [process_item(item) for item in batch]
    await asyncio.gather(*tasks)
    # print(batch)
    # break

save_data(all_data)
google_terminate()


# Google translate

In [None]:
!pip install -q google-cloud-translate

google_mount()
from google.cloud import translate

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GCP_CRED_PATH
GCP_CLIENT = translate.TranslationServiceClient()

def raw_translate(text: str) -> str:
    response = GCP_CLIENT.translate_text(
        # dict(
            parent="YOUR_PROJECT_NAME",
            contents=[text],
            mime_type="text/plain",
            source_language_code="en-US",
            target_language_code="th",
        # )
    )
    return response.translations[0].translated_text

def raw_translate_batch(text_list: list[str]) -> list[str]:
    response = GCP_CLIENT.translate_text(
        # dict(
            parent="YOUR_PROJECT_NAME",
            contents=text_list,
            mime_type="text/plain",
            source_language_code="en-US",
            target_language_code="th",
        # )
    )
    return [item.translated_text for item in response.translations]

def translate_batch(text_list : list[str]) ->list[str]:
    BATCH_SIZE  = 60
    translated_text_list = [ "" for _ in text_list]
    enumerated_text_list = list(enumerate(text_list))
    for i in tqdm(range(0, len(text_list), BATCH_SIZE)):
        batch = enumerated_text_list[i: i+BATCH_SIZE]
        translations  = raw_translate_batch([item[1] for item in batch])
        for (idx, _),  translation in zip(batch, translations):
            translated_text_list[idx] = translation
    return translated_text_list

all_data = read_data()

def process(prompt_name, is_masked):
    print(prompt_name)
    user_texts = [(item["masked"] if is_masked else item["text"]) for item in all_data]
    translations = translate_batch(user_texts)
    for item, translation in zip(all_data, translations):
        if is_masked:
            translation = unmask_token(translation, json.loads(item["mask_dict"]))
        item[f"google_translate_{prompt_name}"] = translation

process("mask", True)
process("no_mask", False)

save_data(all_data)
google_terminate()


# GPT translate

In [None]:
!pip install -q openai

google_mount()
import openai

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_ASYNC_CLIENT = openai.AsyncClient(api_key=OPENAI_API_KEY)

all_data = read_data()


async def process_item(model, item):
    # for model in ["gpt-4-1106-preview", "gpt-3.5-turbo-1106"]:

    text = item["text"].strip()
    masked = item["masked"].strip()
    mask_dict = json.loads(item["mask_dict"])
    for prompt_name, is_masked, is_cs in [
        ("cs_no_mask", False, True),
        ("mn_no_mask", False, False),
        ("cs_mask", True, True),
        ("mn_mask", True, False),
    ]:
        prompt  = (CODE_SWITCH_TRANSLATION_PROMPT if is_cs else MONOLINGUAL_TRANSLATION_PROMPT)
        user_text = (masked if is_masked else text)
        completion = await OPENAI_ASYNC_CLIENT.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": prompt},
                {"role": "user", "content": user_text}
            ],
            temperature = 0.1
        )
        response =  completion.choices[0].message.content
        if is_masked:
            response = unmask_token(response, mask_dict)
        item[f"{model}_{prompt_name}"] =  response

CHUNK_SIZE = 60
for i in tqdm(range(0, len(all_data), CHUNK_SIZE)):
    batch = all_data[i: i+CHUNK_SIZE]
    tasks = [process_item("gpt-4-1106-preview", item) for item in batch]
    await asyncio.gather(*tasks)

for i in tqdm(range(0, len(all_data), CHUNK_SIZE)):
    batch = all_data[i: i+CHUNK_SIZE]
    tasks = [process_item("gpt-3.5-turbo-1106", item) for item in batch]
    await asyncio.gather(*tasks)

save_data(all_data)
google_terminate()


# Gemini  Translate

In [None]:
!pip install -q httpx


google_mount()
import httpx
import time

GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
HTTPX_CLIENT = httpx.AsyncClient()

all_data = read_data()
async def async_translate( text: str) ->str:
    url = f"https://generativelanguage.googleapis.com/v1/models/gemini-pro:generateContent?key={GEMINI_API_KEY}"
    resp = await HTTPX_CLIENT.post(url, headers={
        "Content-Type": "application/json"
    }, data=json.dumps({
        "contents" : [
            {"parts":[{"text": text}]}
        ],
        "safetySettings": [{
                "category": category,
                "threshold": "BLOCK_NONE"
            } for category in [
                "HARM_CATEGORY_HARASSMENT",
                "HARM_CATEGORY_HATE_SPEECH",
                "HARM_CATEGORY_SEXUALLY_EXPLICIT",
                "HARM_CATEGORY_DANGEROUS_CONTENT"
            ]
        ],
        "generationConfig": {
            "temperature": 0.1,
            "maxOutputTokens": 2048,
            "topP": 1.0,
            "topK": 1 # definitely change this if we gonna make like more diverse stuff
        }
    }),  timeout=None)
    response = resp.json()
    if "candidates" not in response or len(response["candidates"]) == 0 :
        print(text,response)
        return ""
    candidate = response["candidates"][0]
    if candidate["finishReason"] == "MAX_TOKENS":
        return ""
    elif candidate["finishReason"]== "STOP":
        return candidate["content"]["parts"][0]["text"]
    else:
        print(text,response)
        return ""

async def process_item(item):
    # for model in ["gpt-4-1106-preview", "gpt-3.5-turbo-1106"]:

    text = item["text"].strip()
    masked = item["masked"].strip()
    mask_dict = json.loads(item["mask_dict"])
    for prompt_name, is_masked, is_cs in [
        ("cs_no_mask", False, True),
        ("mn_no_mask", False, False),
        ("cs_mask", True, True),
        ("mn_mask", True, False),
    ]:
        prompt  = (CODE_SWITCH_TRANSLATION_PROMPT if is_cs else MONOLINGUAL_TRANSLATION_PROMPT)
        user_text = (masked if is_masked else text)
        response = await async_translate(prompt + "\n\n" + user_text + "\n")
        if is_masked:
            response = unmask_token(response, mask_dict)
        item[f"gemini_{prompt_name}"] =  response



CHUNK_SIZE = 14

for i in tqdm(range(0, len(all_data), CHUNK_SIZE)):
    start_time = time.time()
    batch = all_data[i: i+CHUNK_SIZE]
    tasks = [process_item(item) for item in batch]
    await asyncio.gather(*tasks)
    end_time = time.time()

    duration = end_time - start_time
    duration_needed_to_wait  = 62 -  duration
    if duration > 0:
        print("waiting for RPM in ", duration_needed_to_wait, "seconds")
        await asyncio.sleep(duration_needed_to_wait)

await HTTPX_CLIENT.aclose()

save_data(all_data)
google_terminate()

# NLLB Translator

In [None]:

!pip install -q sentencepiece
!pip install -q accelerate bitsandbytes transformers[sentencepiece] peft

import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from collections import defaultdict
from peft import PeftModel

model_name = "facebook/nllb-200-3.3B"  # Used this when we actuall do  things fr

model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang="eng_Latn", tgt_lang="tha_Thai")

google_mount()

# Fake lora model for now
lora_model = model

# Translate Preprocessing code
def raw_translate_en_to_th(text:str, debug = False):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors="pt")
        inputs =  {k : v.to(torch.device("cuda:0")) for k,v in inputs.items()}
        finetune_translated_tokens = lora_model.generate(
            **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["tha_Thai"],
            do_sample=True, temperature = 0.1
        )
        finetune_out  =  str(tokenizer.batch_decode(finetune_translated_tokens, skip_special_tokens=True)[0])
    if(debug):
        print("Text:", text)
        print("Tokenized:",inputs)
        print("Finetune Translated tokens:", finetune_translated_tokens)
        print("Finetune Translated:", finetune_out)
        print("---------------------------")
    return str(finetune_out)

def chunkify(text:str):
    """
    generates a list of chunks (where each chunk is determined by the same preprocessing we did for the training set)
    """
    out_chunks = []
    en_chunks = re.split(r"(\s*\n(?:\s*\n)+\s*)", text)
    for en_chunk in (en_chunks):
        if en_chunk.strip() ==  "":
            yield en_chunk
            continue
        result = tokenizer(en_chunk)
        input_length = len(result["input_ids"])
        if input_length > 256:
            # then try to split by \n and perform greedy pack
            #  chances are, these >256 chunk are just a bunch of choices so you  can simply split by  \n
            en_chunklets = re.split(r"\s*\n\s*", en_chunk)
            current_en_subchunk_length = 0
            start_subchunk_idx = 0
            for i in range(len(en_chunklets)):
                result = tokenizer(en_chunklets[i])
                en_chunklet_length = len(result["input_ids"]) - 1  # minus 2 specialtoken but also adding 1 more extra tokenin case things happens
                if current_en_subchunk_length + en_chunklet_length > 256 -1:
                    #emit (chunklet[start_subchunk_idx: i])
                    en_subchunk = "\n".join(en_chunklets[start_subchunk_idx: i])
                    start_subchunk_idx = i
                    current_en_subchunk_length = en_chunklet_length
                    yield (en_subchunk)

            if start_subchunk_idx < len(en_chunklets):
                en_subchunk = "\n".join(en_chunklets[start_subchunk_idx: ])
                yield (en_subchunk)

        else:
            yield (en_chunk)

def mask_newline(masked_text: str, mask_mapping: dict[str, str]) -> str:
    """
    Add the \n tothe mask_mapping dict and insert fake token  to the masked  text
    """
    if "\n" not in  mask_mapping:
        token = "\x80"
        mask_mapping["\n"] = token
    token = mask_mapping["\n"]
    lines = masked_text.split("\n")
    return (token).join(lines)




def unmask_token(translated_text: str, mask_mapping: dict[str, str]) -> str:
    """
    Revert the process of token masking that we have done
    Argument:
    - text :str   the translated_text with masked tokens
    - mask_mapping:  {str: str}  the mapping from orignal string to masked token
    """
    for original_str, mask in mask_mapping.items():
        translated_text = translated_text.replace(mask, original_str)
    return translated_text

def translate_en_to_th(masked_text: str, mask_mapping: dict[str, str] = {} )-> str:
    """
    Perform the Translateion of text given by the algorithm above

    """
    translated_chunk_list = []
    for chunk in chunkify(masked_text):
        if chunk.isspace() or chunk=="":
            translated_chunk_list.append(chunk)
            continue
        masked_chunk = mask_newline(chunk, mask_mapping)
        masked_translated_chunk = raw_translate_en_to_th(masked_chunk)

        translated_chunk = unmask_token(masked_translated_chunk, mask_mapping)
        translated_chunk_list.append(translated_chunk)
    return "".join(translated_chunk_list)

# Batch translation preprocessing code
def raw_translate_en_to_th_batch(text_list:list[str], debug = False):
    with torch.no_grad():
        inputs = tokenizer(text_list, return_tensors="pt", padding=True)
        inputs =  {k : v.to(torch.device("cuda:0")) for k,v in inputs.items()}
        finetune_translated_tokens = lora_model.generate(
            **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["tha_Thai"],
            do_sample=True, temperature = 0.1
        )
        finetune_out  =  tokenizer.batch_decode(finetune_translated_tokens, skip_special_tokens=True)
    if(debug):
        print("Text list", text_list)
        print("Tokenized:",inputs)
        print("Finetune Translated tokens:", finetune_translated_tokens)
        print("Finetune Translated:", finetune_out)
        print("---------------------------")
    return finetune_out


def translate_en_to_th_batch(text_list: list[str], mask_mapping: dict[str, str] = {} )-> str:
    """
    Perform the Translateion of text given by the algorithm as chunk

    """
    translated_chunk_list_list = []
    chunklet_list_list = [list(chunkify(text)) for text in text_list]
    translated_chunk_list_list = [[chunklet for chunklet in chunklet_list] for chunklet_list in chunklet_list_list]

    chunklet_idx_list = [(i, j, chunklet) for i, chunklet_list in enumerate(chunklet_list_list)  for j, chunklet in enumerate(chunklet_list)]
    chunklet_idx_list = [(i,j, chunklet)  for i, j, chunklet in chunklet_idx_list if chunklet.strip() != ""]

    masked_chunklet_idx_list = [(i,j, mask_newline(chunklet, mask_mapping)) for i, j, chunklet  in chunklet_idx_list]
    masked_chunklet_idx_list.sort(key = lambda item: len(item[2]))

    with torch.no_grad():
        BATCH_SIZE = 20
        # print(chunklet_list_list)
        for batch_start_idx in tqdm(range(0, len(masked_chunklet_idx_list), BATCH_SIZE)):
            batch = masked_chunklet_idx_list[batch_start_idx : batch_start_idx + BATCH_SIZE]
            translated_batch = raw_translate_en_to_th_batch([text for i,j, text in batch])
            for idx, batch_item in enumerate(batch):
                i, j, _ = batch_item
                translated_chunklet = translated_batch[idx]
                translated_chunklet =  unmask_token(translated_chunklet, mask_mapping)
                print(i,j, translated_chunklet)
                translated_chunk_list_list[i][j]  = translated_chunklet
    # print(translated_chunk_list_list)
    translated_text_list = ["".join(chunk_list) for chunk_list in translated_chunk_list_list]
    return translated_text_list

all_data = read_data()

def process(prompt_name, is_masked):
    print(prompt_name)
    user_text = [(item["masked"] if is_masked else item["text"]) for item in all_data]
    nllb_translations = translate_en_to_th_batch(user_text)
    for item, translation in zip(all_data, nllb_translations):
        if is_masked:
            translation = unmask_token(translation, json.loads(item["mask_dict"]))
        item[f"nllb_{prompt_name}"] = translation

process("normal_mask", True)
process("normal_no_mask", False)

lora_model = PeftModel.from_pretrained(model, "/content/drive/MyDrive/Cariva/nllb_finetuned_mach_2")
process("mach_mask", True)
process("mach_no_mask", False)
lora_model = PeftModel.from_pretrained(model, "/content/drive/MyDrive/Cariva/nllb_finetuned_augmented")
process("augmented_mask", True)
process("augmented_no_mask", False)
lora_model = PeftModel.from_pretrained(model, "/content/drive/MyDrive/Cariva/nllb_finetuned_mach_augmented")
process("mach_augmented_mask", True)
process("mach_augmented_no_mask", False)
lora_model = PeftModel.from_pretrained(model, "/content/drive/MyDrive/Cariva/nllb_finetuned_mach_comet")
process("mach_filt_mask", True)
process("mach_filt_no_mask", False)
lora_model = PeftModel.from_pretrained(model, "/content/drive/MyDrive/Cariva/nllb_finetuned_augmented_filt")
process("augmented_filt_mask", True)
process("augmented_filt_no_mask", False)
lora_model = PeftModel.from_pretrained(model, "/content/drive/MyDrive/Cariva/nllb_finetuned_mach_augmented_filt")
process("mach_augmented_filt_mask", True)
process("mach_augmented_filt_no_mask", False)

save_data(all_data)
google_terminate()

# GPU Model Translation

In [None]:

!pip install -q sentencepiece
!pip install -q accelerate bitsandbytes transformers[sentencepiece] peft

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from collections import defaultdict

HF_API_KEY = os.getenv("HF_API_KEY")
model_out_name = "openthaigpt_7b"
model_name = "openthaigpt/openthaigpt-1.0.0-beta-7b-chat-ckpt-hf"  # Used this when we actuall do  things fr


model = AutoModelForCausalLM.from_pretrained(model_name, token = HF_API_KEY, device_map="auto", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name, token = HF_API_KEY, padding_side = "left")
tokenizer.use_default_system_prompt = False
tokenizer.pad_token=tokenizer.eos_token  # Add this line for LLama 2  or  Typhoon because theydont have defaultpadding token

def llama_prompt_template(**kwargs):
    prompt = kwargs["prompt"]
    text = kwargs["text"]
    return f"<s>[INST] <<SYS>>\n{prompt}\n<</SYS>>\n\n{text}[/INST]"

def llama_deprompt(completion):
    idx = completion.find("[/INST]")
    if idx < 0: return completion
    return completion[idx  + len("[/INST]") :]

def openthaigpt_prompt_template(**kwargs):
    prompt = kwargs["prompt"]
    text = kwargs["text"]
    return f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{prompt}\n\n### Input:\n{text}\n\n### Response:\n"

def openthaigpt_deprompt(completion):
    idx = completion.find("### Response:\n")
    if idx < 0: return completion
    return completion[idx  + len("### Response:\n"):]

google_mount()

# Translate Preprocessing code
def raw_inference(text:str, debug = False):

    with torch.no_grad():
        inputs = tokenizer(text, return_tensors="pt")
        inputs =  {k : v.to(torch.device("cuda:0")) for k,v in inputs.items()}
        finetune_translated_tokens = model.generate(
            **inputs,
            do_sample=True, temperature = 0.1,
            max_new_tokens=1024,
            repetition_penalty=1.2
        )
        finetune_out  =  str(tokenizer.batch_decode(finetune_translated_tokens, skip_special_tokens=True)[0])
    if(debug):
        print("Text:", text)
        print("Tokenized:",inputs)
        print("Finetune Translated tokens:", finetune_translated_tokens)
        print("Finetune Translated:", finetune_out)
        print("---------------------------")
    return str(finetune_out)

# Batch translation preprocessing code
def raw_inference_batch(text_list:list[str], debug = False):

    with torch.no_grad():
        inputs = tokenizer(text_list, return_tensors="pt", padding=True)
        inputs =  {k : v.to(torch.device("cuda:0")) for k,v in inputs.items()}
        finetune_translated_tokens = model.generate(
            **inputs,
            do_sample=True, temperature = 0.1,
            max_new_tokens=1024,
            repetition_penalty=1.2
        )
        finetune_out  =  tokenizer.batch_decode(finetune_translated_tokens, skip_special_tokens=True)
    if(debug):
        print("Text list", text_list)
        print("Tokenized:",inputs)
        print("Finetune Translated tokens:", finetune_translated_tokens)
        print("Finetune Translated:", finetune_out)
        print("---------------------------")
    return finetune_out



def inference_batch(text_list: list[str], mask_mapping: dict[str, str] = {} )-> str:
    """
    Perform the Translateion of text given by the algorithm as chunk

    """

    enumerated_text_list = list(enumerate(text_list))
    enumerated_text_list.sort(key = lambda item: len(item[1]), reverse=True)
    translated_text_list = [None for _ in text_list]

    BATCH_SIZE = 10
    # print(chunklet_list_list)

    with torch.no_grad():
        for batch_start_idx in tqdm(range(0, len(enumerated_text_list), BATCH_SIZE)):
            batch = enumerated_text_list[batch_start_idx : batch_start_idx + BATCH_SIZE]
            translated_batch = raw_inference_batch([text for i, text in batch])
            for idx, batch_item in enumerate(batch):
                i, _ = batch_item
                translated_text = translated_batch[idx]
                translated_text =  unmask_token(translated_text, mask_mapping)
                # print(i, translated_text)
                translated_text_list[i]  = translated_text
    return translated_text_list

all_data = read_data()


def process(prompt_name, is_masked, is_cs):
    print(prompt_name)
    prompt = (CODE_SWITCH_TRANSLATION_PROMPT if is_cs else MONOLINGUAL_TRANSLATION_PROMPT)
    user_texts = [(item["masked"] if is_masked else item["text"]) for item in all_data]
    llm_texts = [openthaigpt_prompt_template(prompt = prompt, text = user_text) for user_text in user_texts]
    completions = inference_batch(llm_texts)
    translation = [openthaigpt_deprompt(completion) for completion in completions]
    for item, translation in zip(all_data, translation):
        if is_masked:
            translation = unmask_token(translation, json.loads(item["mask_dict"]))
        item[f"{model_out_name}_{prompt_name}"] = translation


process("cs_mask", True, True)
process("cs_no_mask", False, True)
process("mn_mask", True, False)
process("mn_no_mask", False, False)

save_data(all_data)
google_terminate()

# Gradio-Cient Translator

In [None]:
!pip install -q gradio_client

google_mount()
import asyncio
from gradio_client import Client

LLAMA2_13B_CLIENT = Client("https://huggingface-projects-llama-2-13b-chat.hf.space/--replicas/fmwtn/")

all_data = read_data()


async def process_item(item):
    # for model in ["gpt-4-1106-preview", "gpt-3.5-turbo-1106"]:

    text = item["text"].strip()
    masked = item["masked"].strip()
    mask_dict = json.loads(item["mask_dict"])
    for prompt_name, is_masked, is_cs in [
        ("cs_no_mask", False, True),
        ("mn_no_mask", False, False),
        ("cs_mask", True, True),
        ("mn_mask", True, False),
    ]:
        prompt  = (CODE_SWITCH_TRANSLATION_PROMPT if is_cs else MONOLINGUAL_TRANSLATION_PROMPT)
        user_text = (masked if is_masked else text)
        completion = await asyncio.wrap_future(LLAMA2_13B_CLIENT.submit(
            user_text,	# str  in 'Message' Textbox component
            prompt,	# str  in 'System prompt' Textbox component
            1024,	# float (numeric value between 1 and 2048) in 'Max new tokens' Slider component
            0.1,	# float (numeric value between 0.1 and 4.0) in 'Temperature' Slider component
            1,	# float (numeric value between 0.05 and 1.0) in 'Top-p (nucleus sampling)' Slider component
            1,	# float (numeric value between 1 and 1000) in 'Top-k' Slider component
            1.2,	# float (numeric value between 1.0 and 2.0) in 'Repetition penalty' Slider component
            api_name="/chat"
        ))
        response =  completion
        print(completion)
        if is_masked:
            response = unmask_token(response, mask_dict)
        item[f"llama2_13b_{prompt_name}"] =  response

CHUNK_SIZE = 1
for i in tqdm(range(0, len(all_data), CHUNK_SIZE)):
    batch = all_data[i: i+CHUNK_SIZE]
    tasks = [process_item(item) for item in batch]
    await asyncio.gather(*tasks)


save_data(all_data)
google_terminate()
