In [1]:
%%capture
# update or install the necessary libraries
!pip install --upgrade openai
!pip install --upgrade langchain
!pip install --upgrade python-dotenv

In [2]:
import openai
import os
import IPython
import pandas as pd
from langchain.llms import OpenAI

In [3]:
os.environ["OPENAI_API_KEY"] = 'your_openai_key'
openai.api_key = 'yout_openai_key'

In [4]:
def generate_response(prompt):
  # Call the OpenAI API to generate a response
  response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "system", "content":"You are an international commodity price expert."},{'role': 'user', 'content': prompt}],
    max_tokens=256,
    n=1,
    temperature=0.7,
    top_p=1,
    frequency_penalty=0.0,
    presence_penalty=0.0,
  )
    # Get the response text from the API response
  response_text = response['choices'][0]['message']['content']

  return response_text

In [5]:
def evaluate_headline(headline):

  prompt = f"""
  Answer “YES” if good news, “NO” if bad news, or “UNKNOWN
  Is this headline good or bad for the LNG price in the term?
  Let's work this out in a step by step way to be sure we have the right answer.
  Please return the response as one of 'YES', 'NO', or 'UNKNOWN' only, and do not include any additional explanation.

  Expected increase in LNG supply, implication of a sharp rise in domestic LNG prices
  A: YES

  Global energy transition movement, a major cause of LNG price increase
  A: YES

  Concerns of oversupply in the LNG market, price decrease forecast
  A: NO

  Increase in domestic LNG imports, expected price drop due to oversupply
  A: NO

  Domestic LNG technology research contributes to environmental protection
  A: UNKNOWN

  LNG power plants, a new driving force for local economic revitalization
  A: UNKNOWN

  LNG-powered ships, the future of an eco-friendly maritime industry
  A: UNKNOWN


  {headline}
  A:
"""

  # few-shot learning and APE -> Self-Consistency\
  # 8 examples

  response = generate_response(prompt)
  return response

In [8]:
df = pd.read_excel(r"/content/drive/MyDrive/news_crawing.xlsx")
df.head()

Unnamed: 0,Date,title
0,"October 4, 2019",Natural Gas Prices See Widespread Losses Despi...
1,"October 4, 2019",‘Broad Diversity’ Seen in U.S. E&P Dealmaking ...
2,"October 3, 2019",November Natural Gas Futures Choppy After EIA ...
3,"October 3, 2019",NatGas Futures Slide Again Ahead of EIA Data; ...
4,"October 3, 2019",Higher Storage to Pressure ‘Very Resilient’ Na...


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6066 entries, 0 to 6065
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    6066 non-null   object
 1   title   6066 non-null   object
dtypes: object(2)
memory usage: 94.9+ KB


In [10]:
dfs = [df.iloc[i:i+100] for i in range(0, len(df), 100)]
#GPT 3.5 turbo의 분당 토큰 수 제한때문에, 100개씩 나눠서 돌리는게 좋습니다.

In [12]:
df = dfs[60]
df['Date'] = pd.to_datetime(df['Date'])

  # Apply the function to each headline
df["Sentiment"] = df['title'].apply(evaluate_headline)

  # Map the sentiment to ChatGPT score
sentiment_to_score = {"YES": 1, "NO": -1, "UNKNOWN": 0}
df["ChatGPT_Score"] = df["Sentiment"].map(sentiment_to_score)

  # Extract the month from the date
df['Month'] = df['Date'].dt.to_period('M')

  # Group by month and calculate the sum of ChatGPT scores for each month
monthly_scores = df.groupby('Month')['ChatGPT_Score'].sum().reset_index()

print(monthly_scores)

     Month  ChatGPT_Score
0  2013-08              0
1  2013-09              0
2  2013-10             -2
