In [1]:
import os
from pathlib import Path

import openai
import pandas as pd
from dotenv import load_dotenv
from utils import create_message, extract_model_answers, get_completion
from tqdm import tqdm

# load api key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

# set global params
MODEL = 'gpt-4-1106-preview'

if MODEL in ['gpt-4-0314', 'gpt-4-0613']:
    THRESH = 7_350
elif MODEL == 'gpt-4-1106-preview':
    THRESH = 127_350
else:
    THRES = 3350

PRICE = {'gpt-4-1106-preview_input': 0.01 / 1000,
         'gpt-4-1106-preview_output': 0.03 / 1000,
         'gpt-3.5-turbo-0301': 0.002 / 1000,
         'gpt-4-0613_input': 0.03 / 1000,
         'gpt-4-0613_output': 0.06 / 1000,
         'text-davinci-003': 0.02 / 1000}

In [2]:
PATH_CONF  = Path('..', 'input', 'news_corpus_conf_v2.feather')
news = pd.read_feather(PATH_CONF)

### 1. Round: General Assessment & Summary

In [3]:
PATH_PROMPT_SYS  = Path('..', 'input', 'prompts', '20231127_sys.txt')
PATH_PROMPT_USER = Path('..', 'input', 'prompts', '20231130.txt')

In [4]:
news = \
    pd.concat([
        news,
        pd.DataFrame(
            news.apply(lambda x:
                create_message(p_sys=PATH_PROMPT_SYS, p_user=PATH_PROMPT_USER,
                               common_names=x['common_names'], title=x['title'], publication_datetime=x['publication_datetime'], body=x['body'],
                               model=MODEL, thresh=THRESH),
                axis=1).tolist(),
            columns=['messages', 'trimmed', 'n_tok'], index=news.index
        )
    ], axis=1)

print(f"""
Model checkpoint:\t{MODEL}
Est. tokens (prompt):\t{news['n_tok'].sum()}
Est. cost (prompt):\t{round((news['n_tok'].sum() * PRICE[f'{MODEL}_input']), 2)}
Est. tokens (output):\t{len(news) * 120}
Est. cost (prompt):\t{round((len(news) * 120 * PRICE[f'{MODEL}_output']), 2)}
""")


Model checkpoint:	gpt-4-1106-preview
Est. tokens (prompt):	4280262
Est. cost (prompt):	42.8
Est. tokens (output):	372240
Est. cost (prompt):	11.17



In [5]:
tqdm.pandas()
news['response'] = news.progress_apply(lambda x: get_completion(x['messages'], x['an'], model=MODEL, p_user=PATH_PROMPT_USER), axis=1)
news = pd.concat([news,
                  pd.DataFrame(news['response'].progress_map(lambda x: extract_model_answers(x)).tolist(), columns=['relevant', 'codes'])],
                 axis=1)

  0%|          | 0/3102 [00:00<?, ?it/s]

100%|██████████| 3102/3102 [00:08<00:00, 368.75it/s]
100%|██████████| 3102/3102 [00:00<00:00, 34037.67it/s]


In [6]:
PATH_CONF_OUT = Path('..', 'output', 'labels_news_gpt_v3.feather')
news.to_feather(PATH_CONF_OUT)

### Face Validity

In [12]:
x = news[news['an'] == 'LBA0000020181205eec501rnp']

In [13]:
print(x['messages'].iloc[0][1]['content'])

Here is a news article:

<article>
Title: PREVIEW-Danske investors bank on Maersk clan to chart course through crisis\n
Date: 2018-12-05\n
Body: * Top investor A.P. Moller called EGM after ousting chairman

* Shareholder meeting on Dec. 7 to elect new board members

* Danske should "open up" to investors - big 10 shareholder

* Danish pension fund PFA says Danske can "move forward"

* Danske hit by $227 billion money laundering scandal

By Simon Jessop, Kirstin Ridley and Teis Jensen

LONDON/COPENHAGEN, Dec 5 (Reuters) - Danske Bank's top investors are looking to Denmark's Maersk family to steer the country's largest lender through the turmoil of a 200 billion euro ($227 billion) money laundering scandal.

The Danish clan's investment firm A.P. Moller Holding, Danske's normally passive top shareholder with a stake of around 21 percent, has ousted the bank's chairman Ole Andersen and called an extraordinary shareholder meeting in Copenhagen on Friday to nominate two successors to the bo

In [14]:
print(x['response'].iloc[0]['output'])

{
  "explanation": "The news article discusses events related to Danske Bank, not directly related to A.P. Moller - Maersk. The article focuses on the actions taken by A.P. Moller Holding, which is the top investor in Danske Bank, in response to a money laundering scandal at Danske Bank. A.P. Moller Holding's involvement is in its capacity as a shareholder of Danske Bank, and the events described are specific to Danske Bank's governance and future direction. Therefore, while the events are significant and have a high probability of affecting Danske Bank's stock price in the short term, they are not directly related to A.P. Moller - Maersk's operations or stock price.",
  "final_answer": "no",
  "event_type": null
}
