### Import Libraries

In [1]:
import os
import ast
import csv
import time
import openai
import pandas as pd

from dotenv import load_dotenv
from tqdm import tqdm
from glob import glob

### Load Environment Variables

In [2]:
load_dotenv()

api_key = os.environ.get("API_KEY")
org_key = os.environ.get("ORG_KEY")

### Generate Prompt

In [13]:
# Function to generate prompts based on the conditions
def generate_translate_prompt(row):
    return f"""Translate all the question, concept, and options below to Indonesian and Sundanese, return all in JSON format, where Indonesian and Sundanese have different key:

Question: {row['question']}
Concept: {row['question_concept']}
Options: {', '.join(row['choices']['text'])}"""

### OpenAI Request Function

In [14]:
def get_openai_chat_completion(input_prompt, model_name):
    completion = openai.ChatCompletion.create(
        model=model_name,
        messages=[
            {
                'role': 'user',
                'content': input_prompt 
            }
        ],
        temperature=0.3,
        request_timeout=60,
    )
    return completion

def get_openai_completion(input_prompt, model_name, max_tokens=256, temp=0.1, timeout=60):
    completion = openai.Completion.create(
        model=model_name,
        prompt=input_prompt,
        max_tokens=max_tokens,
        temperature=temp,
        request_timeout=timeout,
    )
    return completion

# Define a function to rephrase the CSV data using OpenAI GPT-3.5-Turbo
def translate_csv_data(row, model_name, history, api_type="instruct"):
    input_prompt = generate_translate_prompt(row)

    if input_prompt in history.keys():
        return input_prompt, history[input_prompt]["response"]

    if api_type == "chat":
        try:
            completion = get_openai_chat_completion(input_prompt, model_name)
        except Exception:
            print('Caught exception, wait for 1 min...')
            time.sleep(60)
            completion = get_openai_chat_completion(input_prompt, model_name)
        response = completion.choices[0].message.content.strip()

    elif api_type == "instruct":
        try:
            completion = get_openai_completion(input_prompt, model_name)
        except Exception:
            print('Caught exception, wait for 1 min...')
            time.sleep(60)
            completion = get_openai_completion(input_prompt, model_name)
        response = completion.choices[0].text.strip()
            
    return input_prompt, response

### CSV Related Function

In [15]:
def load_csv_data(file_path):
    data_list = []

    with open(file_path, newline="") as csvfile:
        csv_reader = csv.DictReader(csvfile)
        for row in csv_reader:
            row["choices"] = ast.literal_eval(row["choices"])
            data_list.append(row)

    return data_list


def save_data(samples, file_path):
    header = samples[0].keys()

    with open(file_path, "w", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=header)
        writer.writeheader()
        for row in samples:
            writer.writerow(row)

    print(f'CSV file "{file_path}" has been created with the data.')

### Run Translation

In [16]:
openai.api_key = api_key
openai.organization = org_key

model_name = "gpt-3.5-turbo-instruct"

In [19]:
for folder in glob("./eval/*/"):
    for file in glob(f"{folder}*.csv"):
        if "v3" in file:
            fname = file.split("/")[-1]
            print(f"Translating file {fname}")

            os.makedirs(f"{folder}sample-chatgpt-instruct/", exist_ok=True)
            history_path = f"{folder}sample-chatgpt-instruct/history_{fname}"

            if os.path.exists(history_path):
                print(f"Load response history from file {history_path}")
                resp_history_df = pd.read_csv(history_path, converters={'response': lambda x: ast.literal_eval(x)})
                response_history = dict(zip(resp_history_df.prompt, resp_history_df.response))
            else:
                print(f"Initialize response history")
                response_history = {}

            file_data = load_csv_data(file)
            for data in tqdm(file_data[:5]):
                prompt, response = translate_csv_data(data, model_name, response_history)
                print(response)
                # response_history[prompt] = {"response": response}
        
                # resp_history_df = pd.DataFrame({'prompt': response_history.keys(), 'response': response_history.values()})
                # resp_history_df.to_csv(history_path, index=False)

                # response = response.split("\n")
                # data["question"] = str(response[0].split(": ")[-1])
                # data["question_concept"] = str(response[1].split(": ")[-1]).lower()
                # data["choices"]["text"] = response[2].split(": ")[-1].lower().split(", ")
            break
            # translated_file = f"{folder}chatgpt-instruct/translated_chatgpt_instruct_{fname}"
            # save_data(file_data, translated_file)
    break

Translating file eval\test\v3_test_name.csv
Initialize response history


  0%|          | 0/5 [00:00<?, ?it/s]

 20%|██        | 1/5 [00:02<00:09,  2.47s/it]

{
    "Indonesian": {
        "Question": "Pegangan pada alat yang dibutuhkan Budi rusak. Ini adalah pegangan logam panjang, dengan pegas di dalamnya. Alat apa yang rusak?",
        "Concept": "pegangan",
        "Options": ["panci", "kuas", "tas bawaan", "kunci", "payung"]
    },
    "Sundanese": {
        "Question": "Pegangan pada alat nu Budi butuhkeun rusak. Ieu nyaéta pegangan logam panjang, kalawan pegas di dinya. Alat naon nu rusak?",
        "Concept": "pegangan",
        "Options": ["panci", "kuas", "tas bawaan", "kunci", "payung"]
    }
}


 40%|████      | 2/5 [00:05<00:08,  2.69s/it]

{
    "Indonesian": {
        "question": "Siti takut melakukan kejahatan palsu dalam persidangan. Apa yang mungkin menyebabkan kejahatan palsu?",
        "concept": "melakukan kejahatan palsu",
        "options": [
            "waktu penjara",
            "masuk penjara",
            "penuntutan pidana",
            "persidangan yang salah",
            "ketidakadilan"
        ]
    },
    "Sundanese": {
        "question": "Siti kahiji ngarasa kawas ngalakukeun kejahatan palsu dina sidang. Naon nu bisa ngarahkeun ka kejahatan palsu?",
        "concept": "ngalakukeun kejahatan palsu",
        "options": [
            "waktu tahanan",
            "masuk tahanan",
            "pidana kriminal",
            "sidang anu salah",
            "ketidakadilan"
        ]
    }
}


 60%|██████    | 3/5 [00:07<00:04,  2.41s/it]

{
  "Indonesian": {
    "Question": "Siti membalik halaman secara acak dan melihat \"sloth.\" Apa yang mungkin sedang dilihatnya?",
    "Concept": "sloth",
    "Options": ["alam semesta", "hutan belantara", "kamus", "iklan", "buku matematika"]
  },
  "Sundanese": {
    "Question": "Siti balik kaca acak jeung ngeliat \"sloth.\" Naon nu mungkin aya dijieunna?",
    "Concept": "sloth",
    "Options": ["universe", "wilderness", "dictionary", "commercial", "math book"]
  }
}


 80%|████████  | 4/5 [00:10<00:02,  2.50s/it]

{
  "Indonesian": {
    "Question": "John bertindak dingin, tetapi sebenarnya dia sangat apa?",
    "Concept": "dingin",
    "Options": [
      "membakar",
      "mempersiapkan",
      "ramah",
      "kebalikan dari panas",
      "dingin"
    ]
  },
  "Sundanese": {
    "Question": "John ngagawe dingin, tapi asliné sanésna anu sangar naon?",
    "Concept": "dingin",
    "Options": [
      "ngabak",
      "ngarapikeun",
      "ramah",
      "kebalikan tina panas",
      "dingin"
    ]
  }
}


100%|██████████| 5/5 [00:12<00:00,  2.54s/it]

{
  "Indonesian": {
    "Question": "Budi takut untuk berlari melalui jalur karena dia takut terluka. Apa yang mungkin dia lakukan sebagai gantinya?",
    "Concept": "berlari",
    "Options": ["berjalan cepat", "berjalan", "berdiri diam", "berjalan pelan", "pergi cepat"]
  },
  "Sundanese": {
    "Question": "Budi kaget ngalari lewat jalur sabab dia kaget kena luka. Naon nu bisa dia lakukan salaku ganti?",
    "Concept": "ngalari",
    "Options": ["jalan cepet", "jalan", "ngadeg", "jalan pelan", "pergi cepet"]
  }
}



