In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

In [2]:
os.environ['PYDEVD_DISABLE_FILE_VALIDATION'] = '1'

In [3]:
import openai
import json

In [4]:
import pandas as pd

In [5]:
openai.api_key = openai_api_key

In [6]:
def get_prompt_financial():
    return '''Please retrieve company name, revenue, net income and earnings per share (a.k.a. EPS)
    from the following news article. If you can't find the information from this article 
    then return "". Do not make things up.    
    Then retrieve a stock symbol corresponding to that company. For this you can use
    your general knowledge (it doesn't have to be from this article). Always return your
    response as a valid JSON string. The format of that string should be this, 
    {
        "Company Name": "Walmart",
        "Stock Symbol": "WMT",
        "Revenue": "12.34 million",
        "Net Income": "34.78 million",
        "EPS": "2.1 $"
    }
    News Article:
    ============

    '''

In [7]:
def extract_financial_data(financial_article):
    prompt = get_prompt_financial() + financial_article
    
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user","content": prompt}]
    )
    
    content = response.choices[0]['message']['content']

    try:
        data = json.loads(content)
        return pd.DataFrame(
            data.items(), 
            columns=["Measure", "Value"]
        )

    except (json.JSONDecodeError, IndexError):
        pass

    return pd.DataFrame({
        "Measure": ["Company Name", "Stock Symbol", "Revenue", "Net Income", "EPS"],
        "Value": ["", "", "", "", ""]
    })

In [8]:
text = '''
    Tesla's Earning news in text format: Tesla's earning this 
    quarter blew all the estimates. They reported 4.5 billion $ 
    profit against a revenue of 30 billion $. Their earnings 
    per share was 2.3 $
    '''

In [9]:
df = extract_financial_data(text)

In [11]:
print(df.to_string())

        Measure          Value
0  Company Name          Tesla
1  Stock Symbol           TSLA
2       Revenue   30 billion $
3    Net Income  4.5 billion $
4           EPS          2.3 $
