Stocks Rag

Author: Orvin Bellamy (https://github.com/orvinbellamy)

In [1]:
## Import libraries

import yfinance as yf
import pandas as pd
import openai
from openai import OpenAI
import os
import json
import time
from dotenv import load_dotenv
import logging as log
from filehandler import FileHandler
from yfinancehandler import YFHandler
from eventhandler import EventHandler, ThreadManager

In [2]:
def check_run_status(client, thread_id: str, run_id: str, n_tries: int, wait_time):
    
    ## Wait until status is completed
    for i in range(0, n_tries):

        # Retrieve the latest run
        run_retrieve = client.beta.threads.runs.retrieve(
            thread_id=thread_id,
            run_id=run_id
        )

        # Get the run status
        run_status = run_retrieve.status

        # Check run status
        if run_status == 'completed':
            print('Run is completed')
            return f'Run is completed (run_id: {run_id}, thread_id {thread_id})'
        elif run_status == 'in_progress':
            print('Run is in progress')
            pass
        elif run_status == 'queued':
            print('Run is queued')
            pass
        elif run_status == 'cancelling':
            print('Run is cancelling')
            pass
        elif run_status == 'cancelled':
            raise ValueError(f'Error: run is cancelled (run_id: {run_id}, thread_id {thread_id})')
        elif run_status == 'failed':
            raise ValueError(f'Error: run has failed (run_id: {run_id}, thread_id {thread_id})')
        elif run_status == 'expired':
            raise ValueError(f'Error: run has expired (run_id: {run_id}, thread_id {thread_id})')
        elif run_status == 'requires_action':
            print('Action is required')
            return f'Action required (run_id: {run_id}, thread_id {thread_id})'

        # Sleep to give time for the run to process
        time.sleep(wait_time)

In [3]:
## configruation

# load_dotenv('config.env')
# OPEN_API_KEY = os.getenv('OPEN_API_KEY')

# List of JSON files
# json_files = [('income_stmt', 'df_income_stmt_columns.json'), ('cashflow', 'df_cashflow_columns.json'), ('stocks', 'df_stocks_columns.json')]

# Load schemas from JSON file
with open('dataframe_schemas.json', 'r') as f:
    schemas = json.load(f)

with open('config/config.json', 'r') as f:
    dic_config = json.load(f)
    OPEN_API_KEY = dic_config['OPEN_API_KEY']
    FIN_ANALYST_ID = dic_config['fin_analyst_assistant']

# Set constants for loops
NUM_TRIES = 12
TIME_SLEEP = 5 # in seconds

# Initialize an empty list to store the stocks
stocks_list = []

# Open the text file in read mode
with open('stocks.txt', 'r') as file:
    # Read each line of the file
    for line in file:
        # Strip any leading or trailing whitespace and append the line to the list
        stocks_list.append(line.strip())

df_portfolio = pd.read_csv('config/portfolio.csv')

list_portfolio = list(df_portfolio['ticker'].unique())

with open('openai_files.json', 'r') as f:
    dic_files = json.load(f)

FILE_PATH = 'openai_upload_files/'

client = OpenAI(api_key=OPEN_API_KEY)

### Stocks Data

In [4]:
stocks_list = ['MSFT', 'SHOP', 'AMD', 'NVDA', 'AAPL', 'OKTA', 'SHOP', 'AMAT', 'TTWO', 'SNOW', 'SOXX', 'BA', 'DOCN', 'MCD', 'LULU', 'CSCO', 'ORCL', 'AMZN', 'ASML']

yf_handler = YFHandler(stock_list=stocks_list, schemas=schemas)

df_stocks = yf_handler.import_stocks()
df_cashflow = yf_handler.import_cashflow()
df_income_stmt = yf_handler.import_income_stmt()

# Write to CSV
# Technically this will be done by the FileHandler but just to be safe
df_stocks.to_csv('openai_upload_files/df_stocks.csv', index=False)
df_cashflow.to_csv('openai_upload_files/df_cashflow.csv', index=False)
df_income_stmt.to_csv('openai_upload_files/df_income_stmt.csv', index=False)

file_stocks = FileHandler(
    df=df_stocks,
    dic_file=dic_files,
    file_name='df_stocks.csv',
    file_path=FILE_PATH,
    client=client
)

file_cashflow = FileHandler(
    df=df_cashflow,
    dic_file=dic_files,
    file_name='df_cashflow.csv',
    file_path=FILE_PATH,
    client=client
)

file_income_stmt = FileHandler(
    df=df_income_stmt,
    dic_file=dic_files,
    file_name='df_income_stmt.csv',
    file_path=FILE_PATH,
    client=client
)

  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df = pd.concat([df, df_plc], ignore_index=True)
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _

In [5]:
# Update files
file_stocks.update_openai_file()
file_cashflow.update_openai_file()
file_income_stmt.update_openai_file()

file name: df_stocks.csv, file id: file-SypaC7KPWQTRmonLIFVb1BnS doesn't exist or is already deleted


In [6]:
with open('config/assistants.json', 'r') as f:
    dic_assistants= json.load(f)

In [10]:
# Assistant id asst_I4lFSi5mtU5OYoeivxOc4piG

client.beta.assistants.update(
    assistant_id=dic_assistants['fin_analyst']['id'], 
    instructions=dic_assistants['fin_analyst']['instructions'],
    model=dic_assistants['fin_analyst']['model'],
    tools=dic_assistants['fin_analyst']['tools'],
    tool_resources={
        'code_interpreter': {
            'file_ids': [file_stocks.file_id]
        }
    }
)
# file_stocks.id

Assistant(id='asst_Mqf9cO1sDTOd4UMYwcFfIQrA', created_at=1719539721, description=None, instructions='You are a financial assistant. Your task is to analyze the stock market and provide an analysis on the market or performance of specific stocks. The objective is to identify whether the specific stocks would make a good investment for a 6 months period. Use the income statement knowledge base you have.', metadata={}, model='gpt-4o', name='fin_analyst', object='assistant', tools=[CodeInterpreterTool(type='code_interpreter')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-RFDn3ItudTsuyQwDM8zavKiM']), file_search=None), top_p=1.0)

In [22]:
# # Set dictionary for tracking messages
# dic_messages = {}

# ## Create thread
# thread = client.beta.threads.create(
#     messages=[
#         {
#             'role': 'user',
#             'content': 'Can you explain to me what the file I have attached is.',
#             'attachments': [
#                 {
#                     'file_id': file_stocks.file_id,
#                     'tools': [{'type': 'code_interpreter'}]
#                 }
#             ]
#         }
#     ]
# )

# # Get thread id
# thread_id = thread.id

# # Record first message
# dic_messages['0'] = get_last_message(client=client, thread_id=thread_id)

with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=FIN_ANALYST_ID,
    event_handler=EventHandler(),
) as stream:
    stream.until_done()

KeyError: '0'

In [None]:
## Create thread
thread = client.beta.threads.create(
    messages=[
        {
            'role': 'user',
            'content': 'Can you explain to me what the file I have attached is.',
            'attachments': [
                {
                    'file_id': file_stocks.file_id,
                    'tools': [{'type': 'code_interpreter'}]
                }
            ]
        }
    ]
)

# Get thread id
thread_id = thread.id

# Set ThreadManager to easily track messages in a single thread
thread_fin_analyst = ThreadManager(client=client, thread_id=thread_id)

# # Run assistants
# # Run should use stream now instaed of create
# run = client.beta.threads.runs.create(
#     thread_id = thread.id,
#     assistant_id=FIN_ANALYST_ID
# )

# run_id = run.id

# NUM_TRIES = 10
# TIME_SLEEP = 5

# check_run_status = check_run_status(client=client, thread_id=thread_id, run_id=run_id, wait_time=TIME_SLEEP, n_tries=NUM_TRIES)

# # final step is to delete the thread
# # client.beta.threads.delete(thread_id=THREAD_ID)

with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=FIN_ANALYST_ID,
    event_handler=EventHandler(),
) as stream:
    stream.until_done()

thread_fin_analyst.get_last_message()

In [34]:
dic_messages

{'0': {'message_id': 'msg_Bfqeyo0fYvm3HGRhtblsva6m',
  'assistant_id': None,
  'created_at': 1719539977,
  'file_ids': [Attachment(file_id='file-ol5IZhuwtOCZiKKMrafdDQKz', tools=[CodeInterpreterTool(type='code_interpreter')])],
  'role': 'user',
  'run_id': None,
  'message_text': 'Can you explain to me what the file I have attached is.'},
 '1': {'message_id': 'msg_TwHsVXFRaa0WmbMVLSGrybO1',
  'assistant_id': 'asst_Mqf9cO1sDTOd4UMYwcFfIQrA',
  'created_at': 1719540461,
  'file_ids': [],
  'role': 'assistant',
  'run_id': 'run_rhLE8SeKrSEr11tqkqkvXXCj',
  'message_text': 'The uploaded file appears to contain historical stock market data for various tickers. Here is an overview of the columns present in the dataset:\n\n1. **Date**: The date of the stock market data entry.\n2. **Open**: The opening price of the stock on that date.\n3. **High**: The highest price of the stock on that date.\n4. **Low**: The lowest price of the stock on that date.\n5. **Close**: The closing price of the stoc

In [None]:
SyncCursorPage[ThreadMessage](
    data=[ThreadMessage(id='msg_8LAgBdoruiq7qExZkbXyWlq4', assistant_id='asst_I4lFSi5mtU5OYoeivxOc4piG', content=[MessageContentText(text=Text(annotations=[], value="I am an AI developed by OpenAI, designed to assist you with a wide range of tasks. My capabilities include processing and analyzing text-based information, providing explanations, summaries, or insights on various topics, answering questions, and offering guidance or advice within the constraints of my programming. Specifically, in this context, I can help analyze financial documents, interpret data, and provide an analysis of the stock market or the performance of specific stocks based on the information available in documents you've provided or based on general finance principles. My goal is to support you in making informed decisions or gaining a better understanding of complex subjects."), type='text')], created_at=1711229792, file_ids=[], metadata={}, object='thread.message', role='assistant', run_id='run_TFbiUQc8B52YFXGsuQ3E7aPn', thread_id='thread_Vfn67f8h7LIwzba52KDpYFFk'), 
          ThreadMessage(id='msg_6dDKDzhRD09Ksjb0TNMHdJB9', assistant_id=None, content=[MessageContentText(text=Text(annotations=[], value='Can you tell me what you are and what do you do?'), type='text')], created_at=1711229119, file_ids=[], metadata={}, object='thread.message', role='user', run_id=None, thread_id='thread_Vfn67f8h7LIwzba52KDpYFFk')], 
          object='list', 
          first_id='msg_8LAgBdoruiq7qExZkbXyWlq4', 
          last_id='msg_6dDKDzhRD09Ksjb0TNMHdJB9', 
          has_more=False)

In [44]:
client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="This is another test message, please ignore"
    # file_ids=[file_income_stmt_test.id]
)

ThreadMessage(id='msg_Bp1EXNpq7dv2jAZa7TpCqS8S', assistant_id=None, content=[MessageContentText(text=Text(annotations=[], value='This is another test message, please ignore'), type='text')], created_at=1711434209, file_ids=[], metadata={}, object='thread.message', role='user', run_id=None, thread_id='thread_FGDsn9Jx8j5SjpgDKUZbo7ql')

In [137]:
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=fin_analyst.id
)

In [None]:
run = client.beta.threads.runs.retrieve(
  thread_id=thread.id,
  run_id=run.id
)

In [147]:
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

In [172]:
print(messages.data[0].content[0].text.value)

Based on the financial data provided in the document for Microsoft Corporation (MSFT) as of June 30, 2023, here is an analysis that may help in evaluating its stock for a potential investment over the next 6 months:

1. **Strong Financial Performance**: Microsoft has shown a robust financial performance. Key highlights include:
   - **Total Revenue**: $211.915 billion, indicating a strong top-line growth.
   - **Gross Profit**: $146.052 billion, demonstrating a healthy margin that reflects efficient management and strong pricing power.
   - **Net Income**: $72.361 billion, illustrating a solid bottom line that provides room for future investments and returns to shareholders.

2. **Healthy EBITDA and EPS**: Microsoft reported an EBITDA (Earnings Before Interest, Taxes, Depreciation, and Amortization) of $105.140 billion and a normalized EBITDA of $105.155 billion, suggesting strong operational effectiveness. The Diluted Earnings Per Share (EPS) is $9.68, and the Basic EPS is $9.72, both

In [112]:
# get historical market data
# hist = msft.history(period="1mo")
stocks.tickers['SHOP'].income_stmt.T.reset_index().dtypes
# df = stocks.tickers['SHOP'].cashflow.T.reset_index()
# df.rename(columns={'index': 'Date'},inplace=True)
# df

index                                                         datetime64[ns]
Tax Effect Of Unusual Items                                           object
Tax Rate For Calcs                                                    object
Normalized EBITDA                                                     object
Total Unusual Items                                                   object
Total Unusual Items Excluding Goodwill                                object
Net Income From Continuing Operation Net Minority Interest            object
Reconciled Depreciation                                               object
Reconciled Cost Of Revenue                                            object
EBITDA                                                                object
EBIT                                                                  object
Net Interest Income                                                   object
Interest Expense                                                      object