## OpenAI 環境設定

- 註冊 OpenAI 帳號
- 設定 api_key

In [None]:
import os
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")
# openai.api_key = "sk-XXXXXXXXXXXXXXXXXXXXXXXXX"

### GPT3

- 如需要較完整或是較長回覆，需要設定max_tokens

In [None]:
prompt = "哈囉你好，我是Steve"
response = openai.Completion.create(
  model="text-davinci-003",
  prompt=f"{prompt}",
  max_tokens=256,  # default: 16
)
print(response)

### GPT-3.5

- 需要給 messages

<code>role: 為不同角色，user 為使用者，system 為跟系統說明目的為何，assistant 為聊天機器人對象
<code>content: 為訊息內容
- temperature 為結果確定性（是否有創造力）

In [None]:
prompt = "哈囉你好，我是Steve"
messages = [{"role": "user", "content": prompt}]
response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0
        )
print(response)
response.choices[0].message["content"]

## PTT 爬蟲

- 平行化

In [None]:
from PTTScraper import PTTScraper
import pandas as pd

scraper = PTTScraper()
data = scraper.get_data_until("6/6", max_posts=200)
df = pd.DataFrame(data)

## Prompt 設定

In [None]:
def sentiment_prompt(chat):
    return f"""
    Analyze the sentiment of the following stock market chat.
    Chat text is enclosed within angle brackets.
    Your answer is a single word, either "positive" or "negative" or "neutral". 
    If stock market target is not mentioned in the chat set the sentiment answer to "neutral", 
    otherwise, please list it in chinese, stock code or code name.
    
    Provide above answers in JSON format with keys, respectively:
    sentiment, stock_target.
    And Follow the format:
    
    {{"sentiment":..., "stock_target":...}}
    
    Stock_market_chat text: <{chat}>
    """

def sentiment_prompt_chinese(chat):
    return f'''
    在一組"<>"中是一篇股票相關的發文與回覆推文，請對這篇文章進行情緒分析。
    Your answer is a single word, either "positive" or "negative" or "neutral".
    如果該情緒分析與標的有關，請列出開標的股票名稱、股票代號或是暱稱。
    如果該情緒分析是針對整個市場，顯示為"Market"，如都沒有則顯示"null"。
    
    Provide above sentiment result and stock target answers in JSON format with keys, respectively:
    sentiment, stock_target.
    And Follow the format:
    
    {{"sentiment":..., "stock_target":... }}
    
    Stock_market_chat text: <{chat}>
    '''

def get_completion(prompt, model="gpt-3.5-turbo"):
    
    prompt = sentiment_prompt(prompt)
    messages = [{"role": "user", "content": prompt}]
    if model == "gpt-3.5-turbo":
        response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            temperature=0
        )
        return response.choices[0].message["content"]
    else:
        response = openai.Completion.create(
            model="text-davinci-003",
            prompt=prompt
        )
    return response.choices[0].text

prompts = list(map(sentiment_prompt_chinese, df.Content[:10]))
prompts

In [None]:
review = "今天的台積電要起飛了"

def sentiment_prompt(review):
    return f"""
在雙引號中的金融評論，針對的'標的'以及'情緒'是什麼?
並同時說明，是哪些'文字'，來判斷最後的情緒結果。

用'正面'、'中性'或'負面'來表示情緒。

金融評論："{review}"
"""

prompt = sentiment_prompt(review)
print(get_completion(prompt))

## 計算 tokens 與估算花費

- GPT-3.5 為 1K tokens \$0.002
- GPT-3.0 的 Davinci 為 1K tokens \$0.02
- GPT-3.0 的 Curie 為 1K tokens \$0.002

GPT-3.0 可自行訓練，然而3.5無法，3.5 的結果可以逼近訓練過後的 3.5

In [None]:
import tiktoken
import requests
import json


def num_tokens_from_string(string):
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
    num_tokens = len(encoding.encode(string))
    return num_tokens

paid = 0
for prompt in prompts:
    tokens_per_message = 4
    tokens = num_tokens_from_string(prompt) + tokens_per_message
    print(tokens)
    r = requests.get("https://api.exchangerate-api.com/v4/latest/USD")
    tw_us_rate = json.loads(r.text)["rates"]["TWD"]
    paids_in_tw = tokens / 1000 * 0.002 * tw_us_rate
    paid += paids_in_tw
print(paid)

In [None]:
stock_market_chat = ["今天的台積電要起飛了", "0050 帶頭衝衝衝", "發哥今天是不是軟軟的"]

def sentiment_three_prompt(stock_market_chat):
    return f"""
    Analyze the sentiment of the following stock market chats.
    Each chat is enclosed within angle brackets and the entire conversation is contained within triple backticks.
    Your answer is a single word, either "positive" or "negative" or "neutral". 
    If stock market target is not mentioned in the chat set the sentiment answer to "neutral", 
    otherwise, please list it in chinese, stock code or code name.
    
    Provide above answers in JSON format with keys for three chats separated by angle brackets, respectively:
    sentiment, stock_target.
    And Follow the format:
    {{
        "chat1":{{"sentiment":..., "stock_target":...}},
        "chat2":{{"sentiment":..., "stock_target":...}},
        "chat3":{{"sentiment":..., "stock_target":...}}
    
    }}
    Stock_market_chat text: <{stock_market_chat[0]}>, <{stock_market_chat[1]}>, <{stock_market_chat[2]}>
    """

def sentiment_single_prompt(chat):
    return f"""
    Analyze the sentiment of the following stock market chat.
    Chat text is enclosed within angle brackets.
    Your answer is a single word, either "positive" or "negative" or "neutral". 
    If stock market target is not mentioned in the chat set the sentiment answer to "neutral", 
    otherwise, please list it in chinese, stock code or code name.
    
    Provide above answers in JSON format with keys, respectively:
    sentiment, stock_target.
    And Follow the format:
    
    {{"sentiment":..., "stock_target":...}}
    
    Stock_market_chat text: <{chat}>
    """

prompt = sentiment_prompt(stock_market_chat[0])

In [None]:
def sentiment_multiple_prompt(stock_market_chat):
    return f"""
    Analyze the sentiment of the following series of stock market chats.
    Each chats accompanied by a corresponding number, 
    and the entire collection of the series chats is enclosed within triple backticks.
    Your answer is a single word, either "positive" or "negative" or "neutral". 
    If stock market target is not mentioned in the chat set the sentiment answer to "neutral", 
    otherwise, please list it in chinese, stock code or code name.
    
    Provide above answers in JSON format which cloud convert to Pandas DataFrame directly with keys for the series chats separated by angle brackets, respectively:
    sentiment, stock_target.
    
    And Follow the format:
    [
        the corresponding number :{{"sentiment":..., "stock_target":...}},
        the corresponding number :{{"sentiment":..., "stock_target":...}},
        the corresponding number :{{"sentiment":..., "stock_target":...}},
        ...
    ]

    Stock_market_chat text: {stock_market_chat}
    Please note, it is essential to adhere to the rules specified by the JSON formats and values. 
    """

prompt = sentiment_prompt2(pd.DataFrame(df.Pushes[0]).Content)
print(prompt)