# 8.1 **ChatGPT**

## OpenAI API
https://platform.openai.com/

In [2]:
!pip install -q cohere
!pip install -q openai
!pip install -q tiktoken

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m166.6/166.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m66.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.2/82.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━

In [3]:
import openai
import tiktoken
from getpass import getpass

In [4]:
api_key = getpass('Input yout OpenAI API Key here: ')
openai.api_key = api_key

Input yout OpenAI API Key here: ··········



https://openai.com/pricing

|model|tokens(K)|Input (\$/1M tokens)|Output(\$/1M tokens)|
|---|---:|---:|---:|
|gpt-3.5-turbo-instruct|4|1.50|2.00|
|gpt-3.5-turbo-0125|16|1.50|0.50|
|gpt-4|8|30.00|60.00|
|gpt-4-turbo|128|10.00|30.00|
|gpt-4o|128|5.00|15.00|

In [5]:
api_model = 'gpt-4o'
encoding = tiktoken.encoding_for_model(api_model)

In [6]:
def get_num_input_tokens(content):
    num_template_tokens = 4 # <|start|>user\n{content}<|end|>
    num_priming_tokens = 3 # <|start|>assistant<|message|>
    num_tokens = len(encoding.encode(content))
    num_tokens += num_template_tokens + num_priming_tokens
    return num_tokens

In [7]:
def calc_price(num_input_tokens, num_output_tokens, input_unit=5.0, output_unit=15.0):
    dollars = (num_input_tokens*input_unit + num_output_tokens*output_unit) / 1000000
    return round(dollars, 5)

def estimate_price(num_input_tokens, input_to_output):
    estimated_output_tokens = num_input_tokens * input_to_output
    estimated_price = calc_price(num_input_tokens, estimated_output_tokens)
    return estimated_price

In [8]:
def query_message(message, temperature=0.1, verbose=False):
    exceeded = False
    messages = [{'role':'user', 'content': message}]

    response = openai.chat.completions.create(model=api_model, messages=messages, temperature=temperature)
    input_tokens = response.usage.prompt_tokens
    output_tokens = response.usage.completion_tokens
    if verbose:
        print('input tokens:', input_tokens)
        print('output tokens:', output_tokens)
        print('input to output ratio:', round(output_tokens / input_tokens, 3))
        print('price: $', calc_price(input_tokens, output_tokens))
    answer = response.choices[0].message.content.strip()

    return answer

## Data

FOMC statement: https://www.federalreserve.gov/newsevents/pressreleases/monetary20240501a.htm

FOMC minutes: https://www.federalreserve.gov/monetarypolicy/fomcminutes20240501.htm

In [9]:
import re
import requests
from bs4 import BeautifulSoup

In [10]:
def get_FOMC_text(date, text_type='statement'):
    if text_type == 'statement':
        url = 'https://www.federalreserve.gov/newsevents/pressreleases/monetary{}a.htm'.format(date)
    elif text_type == 'minutes':
        url = 'https://www.federalreserve.gov/monetarypolicy/fomcminutes{}.htm'.format(date)
    raw = requests.get(url)
    html = BeautifulSoup(raw.text, 'html.parser')
    text = html.find('div', id='article').get_text()
    return text

In [11]:
fomc_dates = ['20230201', '20230322', '20230503', '20230614', '20230726', '20230920', '20231101', '20231213', '20240131', '20240320', '20240501']
statement_list = []
minutes_list = []
for fomc_date in fomc_dates:
    print(fomc_date)
    statement = get_FOMC_text(fomc_date, 'statement')
    statement = re.sub('\n+', '\n', statement)
    statement_list.append(statement)
    minutes = get_FOMC_text(fomc_date, 'minutes')
    minutes_list.append(minutes)

20230201
20230322
20230503
20230614
20230726
20230920
20231101
20231213
20240131
20240320
20240501


In [12]:
statement_list[-1]

"\nMay 01, 2024\nFederal Reserve issues FOMC statement\nFor release at 2:00 p.m. EDT                     \r\n                \r\n                \nShare\nRecent indicators suggest that economic activity has continued to expand at a solid pace. Job gains have remained strong, and the unemployment rate has remained low. Inflation has eased over the past year but remains elevated. In recent months, there has been a lack of further progress toward the Committee's 2 percent inflation objective.\nThe Committee seeks to achieve maximum employment and inflation at the rate of 2 percent over the longer run. The Committee judges that the risks to achieving its employment and inflation goals have moved toward better balance over the past year. The economic outlook is uncertain, and the Committee remains highly attentive to inflation risks.\nIn support of its goals, the Committee decided to maintain the target range for the federal funds rate at 5-1/4 to 5-1/2 percent. In considering any adjustmen

In [13]:
minutes_list[-1]

'\nMinutes of the Federal Open Market Committee\n\nApril 30-May 1, 2024\nA joint meeting of the Federal Open Market Committee and the Board of Governors of the Federal Reserve System was held in the offices of the Board of Governors on Tuesday, April 30, 2024, at 10:00 a.m. and continued on Wednesday, May 1, 2024, at 9:00 a.m.1\nAttendance\r\nJerome H. Powell, Chair\r\nJohn C. Williams, Vice Chair\r\nThomas I. Barkin\r\nMichael S. Barr\r\nRaphael W. Bostic\r\nMichelle W. Bowman\r\nLisa D. Cook\r\nMary C. Daly\r\nPhilip N. Jefferson\r\nAdriana D. Kugler\r\nLoretta J. Mester\r\nChristopher J. Waller\nSusan M. Collins, Austan D. Goolsbee, Alberto G. Musalem, Jeffrey R. Schmid, and Sushmita Shukla, Alternate Members of the Committee\nPatrick Harker, Neel Kashkari, and Lorie K. Logan, Presidents of the Federal Reserve Banks of Philadelphia, Minneapolis, and Dallas, respectively\nJoshua Gallin, Secretary\r\nMatthew M. Luecke, Deputy Secretary\r\nBrian J. Bonis, Assistant Secretary\r\nMichell

## Prompt and Price

Usage: https://platform.openai.com/account/usage

In [14]:
default_template = 'Translate the following text into Korean:\n{}'

def build_prompt(text, template=default_template):
    prompt = template.format(text)
    return prompt

In [15]:
prompt = build_prompt(statement_list[-1])
print(prompt)

Translate the following text into Korean:

May 01, 2024
Federal Reserve issues FOMC statement
For release at 2:00 p.m. EDT                     
                
                
Share
Recent indicators suggest that economic activity has continued to expand at a solid pace. Job gains have remained strong, and the unemployment rate has remained low. Inflation has eased over the past year but remains elevated. In recent months, there has been a lack of further progress toward the Committee's 2 percent inflation objective.
The Committee seeks to achieve maximum employment and inflation at the rate of 2 percent over the longer run. The Committee judges that the risks to achieving its employment and inflation goals have moved toward better balance over the past year. The economic outlook is uncertain, and the Committee remains highly attentive to inflation risks.
In support of its goals, the Committee decided to maintain the target range for the federal funds rate at 5-1/4 to 5-1/2 percent

In [16]:
num_input_tokens = get_num_input_tokens(prompt)
print(num_input_tokens)

581


In [17]:
answer = query_message(prompt, verbose=True)

input tokens: 581
output tokens: 777
input to output ratio: 1.337
price: $ 0.01456


In [18]:
print(answer)

2024년 5월 1일
연방준비제도이사회(Federal Reserve) FOMC 성명서 발표
발표 시간: 오후 2시 EDT

공유하기
최근 지표에 따르면 경제 활동이 견고한 속도로 계속 확장되고 있음을 시사합니다. 일자리 증가가 강세를 유지하고 있으며, 실업률은 낮은 수준을 유지하고 있습니다. 인플레이션은 지난 해 동안 완화되었지만 여전히 높은 수준을 유지하고 있습니다. 최근 몇 달 동안 위원회의 2% 인플레이션 목표를 향한 추가적인 진전은 부족했습니다.
위원회는 장기적으로 최대 고용과 2% 인플레이션 달성을 목표로 하고 있습니다. 위원회는 지난 해 동안 고용 및 인플레이션 목표 달성에 대한 위험이 더 나은 균형으로 이동했다고 판단합니다. 경제 전망은 불확실하며, 위원회는 인플레이션 위험에 대해 매우 주의 깊게 관찰하고 있습니다.
목표 달성을 지원하기 위해, 위원회는 연방기금금리 목표 범위를 5-1/4%에서 5-1/2%로 유지하기로 결정했습니다. 연방기금금리 목표 범위의 조정 여부를 고려할 때, 위원회는 들어오는 데이터, 변화하는 전망, 그리고 위험의 균형을 신중하게 평가할 것입니다. 위원회는 인플레이션이 지속 가능하게 2%를 향해 움직이고 있다는 더 큰 확신을 얻기 전까지는 목표 범위를 줄이는 것이 적절하지 않을 것이라고 예상합니다. 또한, 위원회는 국채, 기관 부채 및 기관 모기지 담보 증권 보유를 계속 줄일 것입니다. 6월부터 위원회는 국채 보유 감소 속도를 늦추기 위해 월간 상환 한도를 600억 달러에서 250억 달러로 줄일 것입니다. 위원회는 기관 부채 및 기관 모기지 담보 증권의 월간 상환 한도를 350억 달러로 유지하고, 이 한도를 초과하는 원금 상환액은 국채에 재투자할 것입니다. 위원회는 인플레이션을 2% 목표로 되돌리는 데 강력히 전념하고 있습니다.
통화 정책의 적절한 입장을 평가함에 있어, 위원회는 경제 전망에 대한 들어오는 정보의 영향을 계속 모니터링할 것입니다. 위원회는 위원회의 목표 달성을 저해할 수 있는 위험이 발생할 경우, 적절하게 통

In [19]:
prompt = build_prompt(statement_list[-2])
answer = query_message(prompt, verbose=True)

input tokens: 482
output tokens: 662
input to output ratio: 1.373
price: $ 0.01234


In [20]:
input_to_output = 1.35

In [21]:
max_total_length = 128000
max_input_length = round(max_total_length / (1+input_to_output))
print(max_input_length)

54468


In [22]:
prompt = build_prompt(minutes_list[-1])
num_input_tokens = get_num_input_tokens(prompt)
print(num_input_tokens)

8850


In [23]:
prompts = [build_prompt(statement) for statement in statement_list]

In [24]:
total_input_tokens = 0
for prompt in prompts:
    total_input_tokens += get_num_input_tokens(prompt)
print('total input tokens:', total_input_tokens)
print('estimated price: $', estimate_price(total_input_tokens, input_to_output))

total input tokens: 5561
estimated price: $ 0.14042
