### Import Libraries

In [11]:
import os
import ast
import csv
import time
import openai
import pandas as pd

from dotenv import load_dotenv
from tqdm import tqdm
from glob import glob

### Load Environment Variables

In [5]:
load_dotenv()

api_key = os.environ.get("API_KEY")
org_key = os.environ.get("ORG_KEY")

### Generate Prompt

In [28]:
# Function to generate prompts based on the conditions
def generate_translate_prompt(row):
    return f"""Translate all the text below to Indonesian:

Question: {row['question']}
Concept: {row['question_concept']}
Options: {', '.join(row['choices']['text'])}"""

### OpenAI Request Function

In [29]:
def get_openai_chat_completion(input_prompt, model_name):
    completion = openai.ChatCompletion.create(
        model=model_name,
        messages=[
            {
                'role': 'user',
                'content': input_prompt 
            }
        ],
        temperature=0.3,
        request_timeout=60,
    )
    return completion

def get_openai_completion(input_prompt, model_name, max_tokens=256, temp=0.1, timeout=60):
    completion = openai.Completion.create(
        model=model_name,
        prompt=input_prompt,
        max_tokens=max_tokens,
        temperature=temp,
        request_timeout=timeout,
    )
    return completion

# Define a function to rephrase the CSV data using OpenAI GPT-3.5-Turbo
def translate_csv_data(row, model_name, history, api_type="instruct"):
    input_prompt = generate_translate_prompt(row)

    if input_prompt in history.keys():
        return input_prompt, history[input_prompt]["response"]

    if api_type == "chat":
        try:
            completion = get_openai_chat_completion(input_prompt, model_name)
        except Exception:
            print('Caught exception, wait for 1 min...')
            time.sleep(60)
            completion = get_openai_chat_completion(input_prompt, model_name)
        response = completion.choices[0].message.content.strip()

    elif api_type == "instruct":
        try:
            completion = get_openai_completion(input_prompt, model_name)
        except Exception:
            print('Caught exception, wait for 1 min...')
            time.sleep(60)
            completion = get_openai_completion(input_prompt, model_name)
        response = completion.choices[0].text.strip()
            
    return input_prompt, response

### CSV Related Function

In [30]:
def load_csv_data(file_path):
    data_list = []

    with open(file_path, newline="") as csvfile:
        csv_reader = csv.DictReader(csvfile)
        for row in csv_reader:
            row["choices"] = ast.literal_eval(row["choices"])
            data_list.append(row)

    return data_list


def save_data(samples, file_path):
    header = samples[0].keys()

    with open(file_path, "w", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=header)
        writer.writeheader()
        for row in samples:
            writer.writerow(row)

    print(f'CSV file "{file_path}" has been created with the data.')

### Run Translation

In [31]:
openai.api_key = api_key
openai.organization = org_key

model_name = "gpt-3.5-turbo-instruct"

In [32]:
for folder in glob("./eval/*/"):
    for file in glob(f"{folder}*.csv"):
        if "v3" in file:
            fname = file.split("/")[-1]
            print(f"Translating file {fname}")

            os.makedirs(f"{folder}chatgpt-instruct/", exist_ok=True)

            history_path = f"{folder}chatgpt-instruct/history_{fname}"
            if os.path.exists(history_path):
                print(f"Load response history from file {history_path}")
                resp_history_df = pd.read_csv(history_path, converters={'response': lambda x: ast.literal_eval(x)})
                response_history = dict(zip(resp_history_df.prompt, resp_history_df.response))
            else:
                print(f"Initialize response history")
                response_history = {}

            file_data = load_csv_data(file)
            for data in tqdm(file_data):
                prompt, response = translate_csv_data(data, model_name, response_history)
                response_history[prompt] = {"response": response}
        
                resp_history_df = pd.DataFrame({'prompt': response_history.keys(), 'response': response_history.values()})
                resp_history_df.to_csv(history_path, index=False)

                response = response.split("\n")
                data["question"] = str(response[0].split(": ")[-1])
                data["question_concept"] = str(response[1].split(": ")[-1]).lower()
                data["choices"]["text"] = response[2].split(": ")[-1].lower().split(", ")
            
            translated_file = f"{folder}chatgpt-instruct/translated_chatgpt_instruct_{fname}"
            save_data(file_data, translated_file)

Translating file v3_test_name.csv
Load response history from file ./eval/test/chatgpt-instruct/history_v3_test_name.csv


100%|██████████| 46/46 [01:02<00:00,  1.37s/it]


CSV file "./eval/test/chatgpt-instruct/translated_chatgpt_instruct_v3_test_name.csv" has been created with the data.
Translating file v3_test_both.csv
Load response history from file ./eval/test/chatgpt-instruct/history_v3_test_both.csv


100%|██████████| 8/8 [00:09<00:00,  1.14s/it]


CSV file "./eval/test/chatgpt-instruct/translated_chatgpt_instruct_v3_test_both.csv" has been created with the data.
Translating file v3_test_concept.csv
Load response history from file ./eval/test/chatgpt-instruct/history_v3_test_concept.csv


100%|██████████| 17/17 [00:19<00:00,  1.17s/it]


CSV file "./eval/test/chatgpt-instruct/translated_chatgpt_instruct_v3_test_concept.csv" has been created with the data.
Translating file v3_test_option.csv
Load response history from file ./eval/test/chatgpt-instruct/history_v3_test_option.csv


100%|██████████| 46/46 [00:49<00:00,  1.07s/it]


CSV file "./eval/test/chatgpt-instruct/translated_chatgpt_instruct_v3_test_option.csv" has been created with the data.
Translating file v3_validation_name.csv
Load response history from file ./eval/validation/chatgpt-instruct/history_v3_validation_name.csv


100%|██████████| 51/51 [01:01<00:00,  1.21s/it]


CSV file "./eval/validation/chatgpt-instruct/translated_chatgpt_instruct_v3_validation_name.csv" has been created with the data.
Translating file v3_validation_both.csv
Initialize response history


100%|██████████| 10/10 [00:11<00:00,  1.16s/it]


CSV file "./eval/validation/chatgpt-instruct/translated_chatgpt_instruct_v3_validation_both.csv" has been created with the data.
Translating file v3_validation_concept.csv
Initialize response history


100%|██████████| 23/23 [00:25<00:00,  1.10s/it]


CSV file "./eval/validation/chatgpt-instruct/translated_chatgpt_instruct_v3_validation_concept.csv" has been created with the data.
Translating file v3_validation_option.csv
Initialize response history


100%|██████████| 52/52 [01:00<00:00,  1.16s/it]


CSV file "./eval/validation/chatgpt-instruct/translated_chatgpt_instruct_v3_validation_option.csv" has been created with the data.
Translating file v3_train_name.csv
Initialize response history


100%|██████████| 90/90 [02:01<00:00,  1.35s/it]


CSV file "./eval/train/chatgpt-instruct/translated_chatgpt_instruct_v3_train_name.csv" has been created with the data.
Translating file v3_train_both.csv
Initialize response history


100%|██████████| 16/16 [00:18<00:00,  1.16s/it]


CSV file "./eval/train/chatgpt-instruct/translated_chatgpt_instruct_v3_train_both.csv" has been created with the data.
Translating file v3_train_concept.csv
Initialize response history


100%|██████████| 27/27 [00:32<00:00,  1.19s/it]


CSV file "./eval/train/chatgpt-instruct/translated_chatgpt_instruct_v3_train_concept.csv" has been created with the data.
Translating file v3_train_option.csv
Initialize response history


100%|██████████| 81/81 [01:27<00:00,  1.08s/it]

CSV file "./eval/train/chatgpt-instruct/translated_chatgpt_instruct_v3_train_option.csv" has been created with the data.



