In [None]:
!pip install openai
import requests
import json
import openai

In [None]:
API_KEY = ""
API_ENDPOINT = "https://api.openai.com/v1/chat/completions"
openai.api_key = API_KEY

In [None]:
with open('/content/drive/MyDrive/MacawProject/optional_dims.txt', 'r') as optional_dims:
  optional_dims_content = optional_dims.read()

I initialized '/content/drive/MyDrive/MacawProject/dimensions_prompt.txt' with:
"You are given a SITUATION and possible answers for the situation in ANSWERS. Your task is to choose the best four dimensions that will help you find the correct answer on the SITUATION.
After you chose the dimensions, please elaborate the scene on these dimensions.
Inputs: OPTIONAL_DIMENSIONS, SITUATION, ANSWERS
Outputs: CHOSEN_DIMENSIONS, ELABORATIONS
OPTIONAL_DIMENSIONS:"
then the dimensions as in optional_dims.

In [None]:

def read_situation_from_json(file_path, is_train):
  situation_data = []
  with open(file_path, 'r') as file:
    for line in file:
      data = json.loads(line)
      id = data.get('id')
      question = data.get('question')
      possible_ans = data.get('mcoptions')
      answer = data.get('answer')
      d = {}
      d['id'] = id
      d['question'] = question
      d['mcoptions'] = possible_ans
      d['answer'] = answer
      if is_train:
        chosen_dims = data.get('chosen_dimensions')
        elaborations = data.get('elaboration')
        d['chosen_dimensions'] = chosen_dims
        d['elaboration'] = elaborations
      situation_data.append(d)
  return situation_data


def append_dims_elaborations_to_few_shot_prompt(output_prompt_filename, situation, dims, elaborations):
  with open(output_prompt_filename, 'a') as prompt_file:
    prompt_file.write('Inputs:\n')
    prompt_file.write('SITUATION: ' + situation['question'] + '\n')
    prompt_file.write('ANSWERS: ' + situation['mcoptions'] + '\n')
    prompt_file.write('Outputs:\n')
    prompt_file.write('CHOSEN_DIMENSIONS:\n' + dims)
    prompt_file.write('ELABORATIONS:' + elaborations + '\n')


def generate_few_shot_prompt(in_context_samples_filename, output_prompt_filename):
  situations = read_situation_from_json(in_context_samples_filename, True)
  for situation in situations:
    dims = ""
    elaborations = ""
    for d in situation['chosen_dimensions']:
      dims += d + '\n'
    elaborations += situation['elaboration'].replace(" [", "\n[")
    append_dims_elaborations_to_few_shot_prompt(output_prompt_filename, situation, dims, elaborations)

in_context_samples_filename = "/content/drive/MyDrive/MacawProject/in_context_few_shots.jsonl"
output_prompt_filename = '/content/drive/MyDrive/MacawProject/dimensions_prompt.txt'
# generate_few_shot_prompt(in_context_samples_filename, output_prompt_filename)




Call GPT-3.5 on new samples

In [None]:

import os

def gpt3(prompt):
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=prompt,
        temperature=0.7,
        max_tokens=300,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    if response:
        if response.choices and response.choices[0]:
            res = response.choices[0].text.strip()
            return res
    return None

def write_test_sets_with_context(test_sets_dir, test_sets_generated_context_dir, output_prompt_filename):
  files = os.listdir(test_sets_dir)
  jsonl_files = [file for file in files if file.endswith('.jsonl')]

  with open (output_prompt_filename, 'r') as prompt_few_shot:
    prompt_few_shot_content = prompt_few_shot.read()

  for jsonl_file in jsonl_files:
    jsonl_file_full_path = os.path.join(test_sets_dir, jsonl_file)
    situations = read_situation_from_json(jsonl_file_full_path, False)
    for situation in situations:
      prompt = prompt_few_shot_content + '\n' + "SITUATION: " + situation['question']
      res = gpt3(prompt)
      elaborations = res.split("ELABORATIONS:")[1]
      elaborations = elaborations.replace('\n', ' ')
      situation['context'] = elaborations

      output_file_full_path = os.path.join(test_sets_generated_context_dir, jsonl_file.split('.')[0] + "_gpt3_generate_context" + ".jsonl")
      with open(output_file_full_path, 'a') as output_file:
        output_file.write(json.dumps(situation) + '\n')

output_prompt_filename = '/content/drive/MyDrive/MacawProject/dimensions_prompt.txt'
test_sets_dir = '/content/drive/MyDrive/MacawProject/test_sets'
test_sets_generated_context_dir = '/content/drive/MyDrive/MacawProject/test_sets_generated_context'
write_test_sets_with_context(test_sets_dir, test_sets_generated_context_dir, output_prompt_filename)


Generate COT for commonsense dataset using GPT3

In [None]:
import json
import pandas as pd
commonsense_generated_context_path = '/content/drive/MyDrive/MacawProject/test_sets_generated_context/commonsense_qa_gpt3_generate_context.jsonl'
prompt_prefix = "You are given a multiple-choice task. Given a question, and a context, you should choose the best answer to the question out of different options. Please include the answer only and not the label of the option. For example for the options (A) bank (B) library, if you want to answer bank, please reply 'bank'"


def gpt3_w_context():
  cot = pd.DataFrame(columns=['id', 'prediction' , 'gt'])
  with open(commonsense_generated_context_path, 'r') as f:
    for line in f:
        data = json.loads(line)
        id = data['id']
        question = data['question']
        answer = data['answer']
        mc_options = data['mcoptions']
        context = data['context']
        prompt = prompt_prefix + '\n' + 'question: ' + question + '\n' + 'context: ' + context + '\n' + 'optional answers: ' + mc_options
        print(prompt)
        print("----")
        res = gpt3(prompt)
        cot = cot.append({'id' : id, 'prediction' : res, 'gt' : answer}, ignore_index=True)

  cot.to_csv('cot_commonsense_output.csv')
  return cot


def gpt3_no_context():
  cot = pd.DataFrame(columns=['id', 'prediction' , 'gt'])
  with open(commonsense_generated_context_path, 'r') as f:
    for line in f:
        data = json.loads(line)
        id = data['id']
        question = data['question']
        answer = data['answer']
        mc_options = data['mcoptions']
        prompt = prompt_prefix + '\n' + 'question: ' + question + '\n' + 'optional answers: ' + mc_options
        print(prompt)
        print("----")
        res = gpt3(prompt)
        cot = cot.append({'id' : id, 'prediction' : res, 'gt' : answer}, ignore_index=True)

  cot.to_csv('cot_commonsense_no_context_output.csv')
  return cot


def print_eval(df):
  df['prediction'] = df['prediction'].str.lower()
  df['gt'] = df['gt'].str.lower()
  df_differ = df[df['prediction'] != df['gt']]
  print(df_differ)
  proportion = (df['prediction'] == df['gt']).mean()
  print('Proportion:', proportion)


df_w_context = gpt3_w_context()
print_eval(df_w_context)


df_no_context = gpt3_no_context()
print_eval(df_no_context)



