In [1]:
import openai
import re
import time

import numpy as np

from tqdm import tqdm
from datasets import load_dataset

In [2]:
openai.api_key = "sk-"

In [3]:
from tenacity import (
    retry,
    stop_after_attempt,
    wait_chain,
    wait_fixed
) 

@retry(wait=wait_chain(*[wait_fixed(3) for i in range(3)] +
                       [wait_fixed(5) for i in range(2)] +
                       [wait_fixed(10)]))
def completion_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)

In [4]:
def test_answer(pred_str, ans_str):
    pattern = '\d*\.?\d+'
    pred = re.findall(pattern, pred_str)
    if(len(pred) >= 1):
        # print(pred_str)
        pred = pred[-1]
        gold = re.findall(pattern, ans_str)
        # print(ans_str)
        gold = gold[-1]
        return pred == gold
    else: return False

def parse_pred_ans(filename):
    with open(filename) as fd: lines = fd.readlines()
    am, a = None, None
    num_q, acc = 0, 0
    current_mode = 'none'
    questions = []
    ans_pred = []
    ans_gold = []
    for l in lines:
        if(l.startswith('Q: ')):
            if(am is not None and a is not None):
                questions.append(q)
                ans_pred.append(am)
                ans_gold.append(a)
                if(test_answer(am, a)):
                    acc += 1
            current_mode = 'q'
            q = l
            num_q += 1
        elif(l.startswith('A_model:')):
            current_mode = 'am'
            am = l
        elif(l.startswith('A:')):
            current_mode = 'a'
            a = l
        else:
            if(current_mode == 'q'): q += l
            elif(current_mode == 'am'): am += l
            elif(current_mode == 'a'): a += l
            else:
                raise ValueError(current_mode)
                
    questions.append(q)
    ans_pred.append(am)
    ans_gold.append(a)
    if(test_answer(am, a)):
        acc += 1
    print('num_q %d correct %d ratio %.4f' % (num_q, acc, float(acc / num_q)))
    return questions, ans_pred, ans_gold

def test_finished(ans_model):
    if('answer is' in ans_model): return True
    else: return False

def extract_ans(ans_model):
    ans_model = ans_model.split('\n')
    ans = []
    residual = []
    for li, al in enumerate(ans_model):
        ans.append(al)
        if('answer is' in al):
            break
    residual = list(ans_model[li + 1:])
    ans = '\n'.join(ans)
    residual = '\n'.join(residual)
    return ans, residual

In [18]:
prompt_1 = """
You'll be provided with integer 'n' and a list of integers (length of m).
Your job is to sort the list of integers in ascending order and answer the list.
!! Just answer the list. Do not answer anything else than the sorted list.

<Example>
Q) n: 20, list: [4, 2, 4, 5]
A) [2, 4, 4, 5]

Q) n: 20, list: [19, 18, 19, 1]
A) [1, 18, 19, 19]
</Example>

"""

In [34]:
prompt_2 = """
You are given two inputs:

An integer n which represents the upper limit of the range of integers.
A list of integers called number_list which includes m numbers that are selected from between 0 and n (inclusive).
Sort the list number_list in ascending order and provide the sorted output.

It is essential that all numbers in number_list are within the range of 0 to n. If any number lies outside this range, specify that an error has occurred due to out-of-range values.

Task: Provide the sorted list of numbers from the provided number_list based on the criteria mentioned. 
!! Just answer the list. Do not answer anything else than the sorted list.

Examples for Execution:

For the input n = 10 and number_list = [3, 1, 8, 7], provide the sorted result.
For the input n = 5 and number_list = [6, 2, 4], indicate that there is an error because the number 6 is not within the range 1 to 5.

"""

In [20]:
questions = [
    "n=10, number_list=[3, 2, 4, 1]",
    "n=20, number_list=[6, 2, 20, 18, 5, 1]",
    "n=20, number_list=[0, 0, 0, 0, 0, 0]",
    "n=10, number_list=[1, 2, 3, 4, 5]",
]
answers = [
    "[1, 2, 3, 4]",
    "[1, 2, 5, 6, 18, 20]",
    "[0, 0, 0, 0, 0, 0]",
    "[1, 2, 3, 4, 5]"
]

### 1) 

In [23]:
i = 0
with open('./outputs/test_8_1.txt', 'w') as fd:
    for q, a in zip(questions, answers):
        
        prompt_q = prompt_1 + '\nQ): ' + q + '\n'  
        
        response = completion_with_backoff(
              model="gpt-3.5-turbo",
              messages=[
                    {"role": "system", "content": "Follow the given examples and answer the question."},
                    {"role": "user", "content": prompt_q},
                ]
            )
        ans_model = response['choices'][0]['message']['content']
        ans_, residual = extract_ans(ans_model)
            
        fd.write('Q: %s\nA_model:\n%s\nA:\n%s\n\n' % (q, ans_, a))

In [24]:
_, _, _ = parse_pred_ans('outputs/test_8_1.txt')

num_q 4 correct 4 ratio 1.0000


### 2) 

In [35]:
i = 0
with open('./outputs/test_8_2.txt', 'w') as fd:
    for q, a in zip(questions, answers):
        
        prompt_q = prompt_2 + '\nQ): ' + q + '\n'  
        
        response = completion_with_backoff(
              model="gpt-3.5-turbo",
              messages=[
                    {"role": "system", "content": "Follow the given examples and answer the question."},
                    {"role": "user", "content": prompt_q},
                ]
            )
        ans_model = response['choices'][0]['message']['content']
        ans_, residual = extract_ans(ans_model)
            
        fd.write('Q: %s\nA_model:\n%s\nA:\n%s\n\n' % (q, ans_, a))

In [36]:
_, _, _ = parse_pred_ans('outputs/test_8_2.txt')

num_q 4 correct 4 ratio 1.0000
