### Import Libraries

In [27]:
import os
import ast
import csv
import json
import openai

from dotenv import load_dotenv

### Load Environment Variables

In [28]:
load_dotenv()

api_key = os.environ.get("API_KEY")
org_key = os.environ.get("ORG_KEY")

### Load CSV Rephrase Data

In [29]:
# Specify the path to your CSV file
split = ["train", "validation", "test"]

all_data = {}

for s in split:
    csv_file_path = f'{s}_rephrase.csv'

    # Initialize an empty list to store the data
    data_list = []

    # Open the CSV file for reading
    with open(csv_file_path, newline='') as csvfile:
        # Create a CSV reader object
        csv_reader = csv.DictReader(csvfile)
        
        # Iterate through each row in the CSV file
        for row in csv_reader:
            # Append the row (as a dictionary) to the data_list
            row["choices"] = ast.literal_eval(row["choices"])

            if row["concept"] == "True":
                row["concept"] = True
            elif row["concept"] == "False":
                row["concept"] = False
            else:
                raise TypeError("concept data cannot be recognized")
            
            if row["name"] == "True":
                row["name"] = True
            elif row["name"] == "False":
                row["name"] = False
            else:
                raise TypeError("name data cannot be recognized")

            if row["option"] == "True":
                row["option"] = True
            elif row["option"] == "False":
                row["option"] = False
            else:
                raise TypeError("option data cannot be recognized")

            data_list.append(row)
    
    all_data[s] = data_list

### Generate Prompt

In [43]:
# Function to generate choice text
def generate_choices_text(choices):
    labels = choices["label"]
    texts = choices["text"]

    choice_text = ""
    for idx, label in enumerate(labels):
        choice_text += f'{label}. "{texts[idx]}"\n'
    
    return choice_text

# Function to generate answer text
def generate_answer_text(choices, answerKey):
    idx = choices["label"].index(answerKey)
    answer_text = f'{answerKey}. "{choices["text"][idx]}"'
    
    return answer_text

# Function to generate prompts based on the conditions
def generate_rephrase_name_prompt(row):
    return f"""Change all names in the given phrases to Indonesian names. Change only the names. Keep all remaining phrases and keep it all in english and reply with only your answer.

Phrase: {row['question']}
Answer:"""

def generate_rephrase_all_prompt(row):
    return f"""Given a commonsense question, a concept, options, and the question answer, change them to become relevant to Indonesia. If an aspect is flagged to be changed, then you need to change it completely. If it's flagged as keep, then keep as it is. Make sure your changes are still in the same domain/topic with the given data, and there is only one clear answer in the options. Reply with only your changed data in a JSON format.

Data:
###
Question: {row['question']} -> Change
Concept: {row['question_concept']} -> {'Keep' if row['concept'] else 'Change'}
Options: -> {'Keep' if row['option'] else 'Change'}
{generate_choices_text(row['choices'])}Question Answer: {generate_answer_text(row['choices'], row['answerKey']) if row['answerKey'] else ''}
###

Changed data:"""

### Rephrase Function

In [44]:
def get_openai_chat_completion(input_prompt, model_name):
    return openai.ChatCompletion.create(
        model=model_name,
        messages=[
            {
                'role': 'user',
                'content': input_prompt 
            }
        ],
        temperature=0.3
    )

# Define a function to rephrase the CSV data using OpenAI GPT-3.5-Turbo
def rephrase_csv_data(row, model_name):
    if row["name"] and not row["concept"] and not row["option"]:
        input_prompt = generate_rephrase_name_prompt(row)
    else:
        input_prompt = generate_rephrase_all_prompt(row)

    print(input_prompt)
    
    try:
        completion = get_openai_chat_completion(input_prompt, model_name)
    except Exception:
        print('Caught exception, wait for 1 min...')
        time.sleep(60)
        completion = get_openai_chat_completion(input_prompt, model_name)
    
    response = completion.choices[0].message.content.strip()

    if row["concept"] or row["option"]:
        response = ast.literal_eval(response)

    return response

### Run Rephrase

In [45]:
openai.api_key = api_key
openai.organization = org_key

model_name = "gpt-3.5-turbo"

In [46]:
data = all_data["train"][0]
rephrase_csv_data(data, model_name)

Change all names in the given phrases to Indonesian names. Change only the names. Keep all remaining phrases and keep it all in english and reply with only your answer.

Phrase: Sammy wanted to go to where the people were.  Where might he go?
Answer:


'Rudi wanted to go to where the people were. Where might he go?'