# 0. 배운 내용 복습 / 실습
- CoT + Prompt Compression + Benchmark Evaluation
- OpenAI API 실행 준비 (각자 부여된 API 키 확인)
  - https://platform.openai.com/docs/quickstart
- LLMlingua2 설치 후 실행
  - https://github.com/microsoft/LLMLingua
- CoT prompt 를 활용하여 GPT 3.5 모델을 GSM8k 데이터셋에 대해 평가한 코드 실행
  - https://github.com/FranxYao/chain-of-thought-hub/blob/main/gsm8k/gpt3.5turbo_gsm8k_complex.ipynb
- 3 에서 쓰인 CoT prompt 를 LLMlinguage2 를 사용하여 300 token target 으로 compress 후 compress 하지 않은 경우와 결과 비교


In [1]:
!pip install llmlingua



In [2]:
!pip install openai



In [3]:
!pip install datasets



In [4]:
import openai
import re
import time

import numpy as np

from tqdm import tqdm
from datasets import load_dataset

In [5]:
# 코랩에 저장하는 기능 이용 (key 모양)
from google.colab import userdata
api_key = userdata.get('api_key')

In [6]:
train_dataset = load_dataset("gsm8k", "main", split="train")
test_dataset  = load_dataset("gsm8k", "main", split="test")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/7.94k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

In [7]:
train_dataset[0]['question']

'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?'

In [8]:
!pwd

/content


In [9]:
# 직접 파일 생성 & 마운트
prompt_complex = open('./drive/MyDrive/LLM 교육 advanced/prompt_hardest.txt').read()

In [10]:
print(prompt_complex)

Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?
Let's think step by step
Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.
For the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.
Angelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.
However, they need to include time for breaks and lunch. Every hour they want 

In [11]:
from tenacity import (
    retry,
    stop_after_attempt,
    wait_chain,
    wait_fixed
)

In [12]:
@retry(wait=wait_chain(*[wait_fixed(3) for i in range(3)] +
                       [wait_fixed(5) for i in range(2)] +
                       [wait_fixed(10)]))
def completion_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)

def test_answer(pred_str, ans_str):
    pattern = '\d*\.?\d+'
    pred = re.findall(pattern, pred_str)
    if(len(pred) >= 1):
        # print(pred_str)
        pred = pred[-1]
        gold = re.findall(pattern, ans_str)
        # print(ans_str)
        gold = gold[-1]
        return pred == gold
    else: return False

def parse_pred_ans(filename):
    with open(filename) as fd: lines = fd.readlines()
    am, a = None, None
    num_q, acc = 0, 0
    current_mode = 'none'
    questions = []
    ans_pred = []
    ans_gold = []
    for l in lines:
        if(l.startswith('Q: ')):
            if(am is not None and a is not None):
                questions.append(q)
                ans_pred.append(am)
                ans_gold.append(a)
                if(test_answer(am, a)):
                    acc += 1
            current_mode = 'q'
            q = l
            num_q += 1
        elif(l.startswith('A_model:')):
            current_mode = 'am'
            am = l
        elif(l.startswith('A:')):
            current_mode = 'a'
            a = l
        else:
            if(current_mode == 'q'): q += l
            elif(current_mode == 'am'): am += l
            elif(current_mode == 'a'): a += l
            else:
                raise ValueError(current_mode)

    questions.append(q)
    ans_pred.append(am)
    ans_gold.append(a)
    if(test_answer(am, a)):
        acc += 1
    print('num_q %d correct %d ratio %.4f' % (num_q, acc, float(acc / num_q)))
    return questions, ans_pred, ans_gold

def test_finished(ans_model):
    if('answer is' in ans_model): return True
    else: return False

def extract_ans(ans_model):
    ans_model = ans_model.split('\n')
    ans = []
    residual = []
    for li, al in enumerate(ans_model):
        ans.append(al)
        if('answer is' in al):
            break
    residual = list(ans_model[li + 1:])
    ans = '\n'.join(ans)
    residual = '\n'.join(residual)
    return ans, residual

In [13]:
prompt_q = prompt_complex + '\nQuestion: ' + test_dataset[1]['question'] + '\n'

In [14]:
from openai import OpenAI

In [15]:
client = OpenAI(
    # This is the default and can be omitted
    api_key=api_key,
)

In [16]:
response_before_prompt_comp = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "Follow the given examples and answer the question."},
        {"role": "user", "content": prompt_q},
    ]
)

In [17]:
from llmlingua import PromptCompressor

In [None]:
llm_lingua = PromptCompressor(
    model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank",
    use_llmlingua2=True, # Whether to use llmlingua-2
)

In [29]:
compressed_prompt = llm_lingua.compress_prompt(prompt_complex, rate=0.12, force_tokens = ['\n', '?'])

In [30]:
compressed_prompt

{'compressed_prompt': 'Angelo Melanie 2 chapters 4 worksheets 3 1.5 3 breaks lunch?\n\n 3 6\n 1.5\n 12 3\n breaks lunch\n 3 breaks\n\n 15\n 4 3.75\n 4 days\n\n\n Mark 25 2 pointers 8 3 10 free throws opponents?\n\n 50\n team 3 pointers 24\n 10 free throws\n 50+24+10 84\n opponents\n half 3 12\n free\n 100+12+5=117\n 84+117=201\n 201\n\n Bella 20 2/5 60?\n\n 24\n 84\n 30\n 12\n 42\n\n\n\n 140\n\n\n4 fruit baskets 9 apples 15 oranges 14 bananas 2 fourth?\n\n 9+15=24\n=38\n\n fourth 9-2\n oranges\n+7=20\n 14-2 bananas\n=32\n=146\n 146\n\n 4 watermelon 36 orange $0.50 $66?\n\n\n\n $60\n\n\n\n\n\napple $60 $1\n 1\n\n Susy 800 Sarah 300 100 40 Sarah 50 90 three?\n\n 140\n\n 10\n 170\n 140\n 30\n 10\n 180\n\n 180\n\n Sam 30 $10 $3 $2 profit?\n\n 12 boxes $120\n 360\n\n $15\n 330\n 110\nsold $2 110 2 $220\n earned $220 $15 $235\n $120 $115\n 115\n\n 2/3 1/5 3:2 1000?\n\n\n\n\n 400\n\n\n\n\n\n',
 'compressed_prompt_list': ['Angelo Melanie 2 chapters 4 worksheets 3 1.5 3 breaks lunch?\n\n 3 6\n 

In [31]:
comp_prompt_q = compressed_prompt['compressed_prompt'] + '\nQuestion: ' + test_dataset[1]['question'] + '\n'

In [32]:
response_after_prompt_comp = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "Follow the given examples and answer the question."},
        {"role": "user", "content": comp_prompt_q},
    ]
)

In [33]:
question = test_dataset[1]['question']
print("---- Q ----")
print(question)
print()
print("---- answer before prompt compression ----")
answer_before_prompt_comp = response_before_prompt_comp.choices[0].message.content
print(answer_before_prompt_comp)
print()
print("---- answer after prompt compression ----")
answer_after_prompt_comp = response_after_prompt_comp.choices[0].message.content
print(answer_after_prompt_comp)

---- Q ----
A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?

---- answer before prompt compression ----
It takes 2 bolts of blue fiber for the robe.
Half of that amount is 2/2 = 1 bolt of white fiber.
Therefore, in total, it takes 2 (blue) + 1 (white) = 3 bolts of fiber to make the robe.

---- answer after prompt compression ----
To find out how many bolts it takes for a robe that requires 2 bolts of blue fiber and half that much white fiber, we need to calculate the total number of bolts needed.

- Blue fiber bolts: 2 bolts
- White fiber bolts: Half of 2 bolts = 2 bolts / 2 = 1 bolt

Total bolts needed for the robe = Blue fiber bolts + White fiber bolts
Total bolts needed = 2 bolts + 1 bolt = 3 bolts

Therefore, it takes a total of 3 bolts for the robe.
