# Load all the data to embedding to store into the vectorestore:

* Historical price data with indicators
    * Use preprocess_data_2
    * Keep as pandas DataFrame later use DataFrameLoader
* Financial Data
    * 

In [1]:
from financial_data import FinancialDataFinnHub, FinancialDataYahoo
import os

In [2]:
# Create Finnhub object
symbol = "NVDA"
api_key = os.environ.get("FINHUB_API_KEY")
nvda = FinancialDataFinnHub(symbol, api_key)

In [3]:
# Create Yahoo finance object
nvda_yahoo = FinancialDataYahoo(symbol)

In [4]:
nvda.company_info

Unnamed: 0,Attribute,Values
0,country,US
1,currency,USD
2,estimateCurrency,USD
3,exchange,NASDAQ NMS - GLOBAL MARKET
4,finnhubIndustry,Semiconductors
5,ipo,1999-01-22
6,logo,https://static2.finnhub.io/file/publicdatany/f...
7,marketCapitalization,686167.948082
8,name,NVIDIA Corp
9,phone,14084862000.0


In [5]:
nvda_yahoo.quote_table

  data = tables[0].append(tables[1])
  data = data.append(quote_price)


Unnamed: 0,attribute,value
0,1y Target Est,282.54
1,52 Week Range,108.13 - 281.08
2,Ask,278.67 x 1300
3,Avg. Volume,49506470.0
4,Beta (5Y Monthly),1.77
5,Bid,278.72 x 1000
6,Day's Range,275.25 - 281.08
7,EPS (TTM),1.74
8,Earnings Date,"May 24, 2023"
9,Ex-Dividend Date,"Mar 07, 2023"


In [6]:
nvda_yahoo.validation

Unnamed: 0,0,1
0,Market Cap (intraday),671.07B
1,Enterprise Value,669.81B
2,Trailing P/E,156.14
3,Forward P/E,60.24
4,PEG Ratio (5 yr expected),3.38
5,Price/Sales (ttm),25.25
6,Price/Book (mrq),30.36
7,Enterprise Value/Revenue,24.83
8,Enterprise Value/EBITDA,111.88


## Load News then chat about news:
* Use `FinancialDataFinnHub`class to load news url
* langchain `UnstructuredURLLoader` to load news
* Remove unreachable news from the list
* Create `OpenAIEmbeddings` model
* Split text with `RecursiveCharacterTextSplitter`
* Store vectors in `Chroma`
* Create prompt template with `PromprTemplate`
* Create llm chain and chat with News. Look `basics.py`and `index_vectorstore_index_creation.ipynb`


In [78]:
# Use FinancialHub to load news url
news_urls = nvda.company_news['url']
urls = [news_urls.iloc[i][0] for i in range(len(news_urls))]

In [79]:
# langchain UnstructuredURLLoader to load news
from langchain.document_loaders import UnstructuredURLLoader

loader = UnstructuredURLLoader(urls=urls)
data = loader.load()

Error fetching or processing https://finnhub.io/api/news?id=69af8d0a9004b53912dfc61cf5100e5c313e7da4cfbf550471ef061c50ad13ac, exeption: URL return an error: 404
Error fetching or processing https://finnhub.io/api/news?id=ac10ffbe74f9a93d4e019463841cc7dcbec04effa443610b76d2bc1624cd3b7e, exeption: URL return an error: 404
Error fetching or processing https://finnhub.io/api/news?id=6195fe84efcd9d477a4f5c19ecf48cf4d8f7228decd2ff0f6822c11a8cbbb704, exeption: URL return an error: 500
Error fetching or processing https://finnhub.io/api/news?id=842a0c69f973e4b40922ddf2783ec3db5b7a16469319696f2a3b6ee0f3c9f440, exeption: URL return an error: 500
Error fetching or processing https://finnhub.io/api/news?id=0acbb2b2acb7bcfd5229414864a9981deb34deec0c6c48b8a44554807a282bc1, exeption: URL return an error: 500
Error fetching or processing https://finnhub.io/api/news?id=f159c759cdc5611c75ed35cdc587e2c0ad2eb79a31e5405bb269b725bd63778e, exeption: URL return an error: 500
Error fetching or processing https

In [80]:
data

[Document(page_content='Javascript is Disabled\n\nYour current browser configuration', metadata={'source': 'https://finnhub.io/api/news?id=2468f842ecf674796b71f2b6dfcbddb33c1231900ed4a7008a20ff81fb163fc1'}),
 Document(page_content='Skip Navigation\n\nwatchlive\n\nMarkets\n\nPre-Markets\n\nU.S. Markets\n\nCurrencies\n\nCryptocurrency\n\nFutures & Commodities\n\nBonds\n\nFunds & ETFs\n\nBusiness\n\nEconomy\n\nFinance\n\nHealth & Science\n\nMedia\n\nReal Estate\n\nEnergy\n\nClimate\n\nTransportation\n\nIndustrials\n\nRetail\n\nWealth\n\nLife\n\nSmall Business\n\nInvesting\n\nPersonal Finance\n\nFintech\n\nFinancial Advisors\n\nOptions Action\n\nETF Street\n\nBuffett Archive\n\nEarnings\n\nTrader Talk\n\nTech\n\nCybersecurity\n\nEnterprise\n\nInternet\n\nMedia\n\nMobile\n\nSocial Media\n\nCNBC Disruptor 50\n\nTech Guide\n\nPolitics\n\nWhite House\n\nPolicy\n\nDefense\n\nCongress\n\nEquity and Opportunity\n\nCNBC TV\n\nLive TV\n\nLive Audio\n\nBusiness Day Shows\n\nEntertainment Shows\n\nFu

In [81]:
# Remove news if the content is not reacheable. 
for i, d in enumerate(data):
    p_content = d.page_content[:22]
    if p_content == 'Javascript is Disabled':
        data.pop(i)
print(f"You got {len(data)} news to read about {symbol}. ")

You got 21 news to read about NVDA. 


In [82]:
# Create `OpenAIEmbeddings` model
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [83]:
# Split text with RecursiveCharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(data)

In [84]:
docs

[Document(page_content="Skip Navigation\n\nwatchlive\n\nMarkets\n\nPre-Markets\n\nU.S. Markets\n\nCurrencies\n\nCryptocurrency\n\nFutures & Commodities\n\nBonds\n\nFunds & ETFs\n\nBusiness\n\nEconomy\n\nFinance\n\nHealth & Science\n\nMedia\n\nReal Estate\n\nEnergy\n\nClimate\n\nTransportation\n\nIndustrials\n\nRetail\n\nWealth\n\nLife\n\nSmall Business\n\nInvesting\n\nPersonal Finance\n\nFintech\n\nFinancial Advisors\n\nOptions Action\n\nETF Street\n\nBuffett Archive\n\nEarnings\n\nTrader Talk\n\nTech\n\nCybersecurity\n\nEnterprise\n\nInternet\n\nMedia\n\nMobile\n\nSocial Media\n\nCNBC Disruptor 50\n\nTech Guide\n\nPolitics\n\nWhite House\n\nPolicy\n\nDefense\n\nCongress\n\nEquity and Opportunity\n\nCNBC TV\n\nLive TV\n\nLive Audio\n\nBusiness Day Shows\n\nEntertainment Shows\n\nFull Episodes\n\nLatest Video\n\nTop Video\n\nCEO Interviews\n\nCNBC Documentaries\n\nCNBC Podcasts\n\nCNBC World\n\nDigital Originals\n\nLive TV Schedule\n\nWatchlist\n\nInvesting Club\n\nTrust Portfolio\n\nAn

In [86]:
# Store vectors in Chroma 
from langchain.vectorstores import Chroma
vectorstore = Chroma.from_documents(docs, embeddings)

Using embedded DuckDB without persistence: data will be transient


# ConversationalRetrievalChain:
https://python.langchain.com/en/latest/modules/chains/index_examples/chat_vector_db.html?highlight=ConversationalRetrievalChain#conversationalretrievalchain-with-search-distance

https://python.langchain.com/en/latest/reference/modules/chains.html?highlight=ConversationalRetrievalChain#langchain.chains.ConversationalRetrievalChain

Check Mayo's code makechain.ts. Bottom url will show how it works in python

In [77]:
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
CONDENSE_QUESTION_PROMPT

PromptTemplate(input_variables=['chat_history', 'question'], output_parser=None, partial_variables={}, template='Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.\n\nChat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:', template_format='f-string', validate_template=True)

In [65]:
# Ask about news - Simple QA 
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
llm = OpenAI(temperature=0)
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())

In [71]:
chat_history = []
result = chain({"question": "Please tell me news about NVDA.", "chat_history": chat_history})
chat_history.append(result)

" NVDA shares closed 0.91% higher to $270.02 on Monday. This was the stock's second consecutive day of gains. Additionally, NVDA has developed three foundational models for generative cloud-based AI services called NeMo, Picasso, and BioNeMo. Analyst Vijay Kumar rates NVDA shares as Overweight (i.e., Buy) and has a $320 price target for the shares, implying upside of 20% from current levels."

In [74]:
result_2 = chain({"question": "Please tell me news other than NVDA. ", "chat_history": chat_history})
chat_history.append(result_2)
result_2

{'question': 'Please tell me news other than NVDA. ',
 'chat_history': [('Please tell me news about NVDA.',
   " NVDA shares closed 0.91% higher to $270.02 on Monday. This was the stock's second consecutive day of gains. Additionally, NVDA has developed three foundational models for generative cloud-based AI services called NeMo, Picasso, and BioNeMo. Analyst Vijay Kumar rates NVDA shares as Overweight (i.e., Buy) and has a $320 price target for the shares, implying upside of 20% from current levels."),
  {...}],
 'answer': ' There is news about the US, international markets, investing, tech, politics, and more.'}

In [75]:
chat_history

[('Please tell me news about NVDA.',
  " NVDA shares closed 0.91% higher to $270.02 on Monday. This was the stock's second consecutive day of gains. Additionally, NVDA has developed three foundational models for generative cloud-based AI services called NeMo, Picasso, and BioNeMo. Analyst Vijay Kumar rates NVDA shares as Overweight (i.e., Buy) and has a $320 price target for the shares, implying upside of 20% from current levels."),
 {'question': 'Please tell me news other than NVDA. ',
  'chat_history': [...],
  'answer': ' There is news about the US, international markets, investing, tech, politics, and more.'}]

In [76]:
result_3 = chain({"question": "What was the first question?", "chat_history": chat_history})
chat_history.append(result_3)
result_3

ValueError: too many values to unpack (expected 2)

In [39]:
# Create prompt template with PromprTemplate
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain

chat = ChatOpenAI(temperature=0)
prompt = PromptTemplate(template="You are rookie financial analyst. Please tell me your opinion about the company \
                              based on the news you found. I will pass you a news lists. Also, p\
                              retend you know everything like analyst speaking at bloomberg news.\
                              \
                              News: {news}\
                              Comment: Your comment about the company:", 
                       input_variables=["news"])

chain = LLMChain(llm=chat, prompt=chat_prompt_template, output_key="analysis")

second_prompt = PromptTemplate(template="You are veteran bloomberg financial analyst. Criticize the rookie's comment. \
                                        \
                                        {analysis}\
                                        Criticize: Your comment about the rookie's analysis.", 
                              input_variables=['analysis'])
chain_two = LLMChain(llm=chat, prompt=second_prompt, output_key='criticize')

from langchain.chains import SequentialChain
overall_chain = SequentialChain(chains=[chain, chain_two],
                               input_variables=["news"], 
                               output_variables=["analysis", "criticize"], verbose=True)

from langchain.chains import ChatVectorDBChain
from langchain.chains import VectorDBQAWithSourcesChain

## Before I carry on this topic, I will seek more about Output Parser: 

https://python.langchain.com/en/latest/modules/prompts/output_parsers/getting_started.html

notebook: http://localhost:8888/notebooks/PycharmProjects/tradingGPT/scripts/output_parser.ipynb