In [1]:
import os
import sys

# Get the current working directory and add the parent directory to the Python path
current_working_directory = os.getcwd()
print(os.path.join(current_working_directory, ".."))
sys.path.append(os.path.join(current_working_directory, ".."))

/Users/L024258/lilly_work/github-copilot/exploration/notebooks/..


In [2]:
from decouple import AutoConfig
config = AutoConfig(search_path='./../.env')

In [3]:
os.environ["AZURE_OPENAI_API_KEY"] = config('OPENAI_API_KEY')
os.environ["AZURE_OPENAI_ENDPOINT"] = config('AZURE_ENDPOINT')
os.environ["COHERE_API_KEY"] = config('COHERE_TOKEN')

## Loading Generative Model

In [4]:
from models.llm import LLM

model = LLM('gpt-4')
llm = model.load_model()

  from .autonotebook import tqdm as notebook_tqdm


## Loading Embedding Model

In [5]:
from models.em import EM
model = EM('ada-2')
embeddings = model.load_model()

## Data Ingestion

In [6]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://www.moneycontrol.com/news/business/stocks/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer("li",
            class_=("clearfix")
        )
    ),
)

docs = loader.load()

In [7]:
docs

[Document(page_content="   February 17, 2024 11:06 AM IST  Business in the Week Ahead (February 19-23, 2024) RBI and the US Federal Reserve will release minutes of the latest monetary policy meetings. Flash PMIs, bank loan and deposits are other key data points to track \n  February 17, 2024 09:47 AM IST  Market bounces back, gains 1% amid volatility; rupee flat On the sectoral front, BSE Auto index rose 5 percent, BSE Oil & Gas index gained 3 percent, BSE Energy index rose 3 percent. \n  February 17, 2024 03:27 PM IST  Nifty closes the week above 22,000; smallcaps give double digit return despite index ending flat Nifty need to sustain above 22,150 to end the consolidation and march towards the 22,500+ zone else profit taking may resume, says expert. \n  February 17, 2024 09:42 AM IST  Wall Street slides as hot producer price data crimps rate cut bets The S&P 500 lost 24.18 points, or 0.49%, to end at 5,005.15 points, while the Nasdaq Composite lost 132.38 points, or 0.83%, to 15,775.

### Embedding and Storing Documents in Vector Store

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    separators=[" \n\n ", " \n "],
    chunk_size=400,
    chunk_overlap=0,
    length_function=len,
)

In [9]:
from utils.vectordb import VectorDB
vectorstore = VectorDB()
db = vectorstore.create_and_dump(embeddings, text_splitter=text_splitter, inp_text=docs)

In [10]:
question = "Which stocks increased despite promoters selling?"
top_k = db.similarity_search(question)
top_k

[Document(page_content='February 16, 2024 04:57 PM IST  Promoter selling no longer a red flag; these 10 stocks have doubled despite stake sale Stocks of HDFC Asset Management Company, TD Power System, Jindal Stainless and many others have doubled in the last one year despite promoters offloading anywhere between 9 percent and 24 percent', metadata={'source': 'https://www.moneycontrol.com/news/business/stocks/'}),
 Document(page_content='February 16, 2024 04:09 PM IST  Gainers and Losers: 10 stocks that moved the most on February 16 Among sectors, auto was the biggest gainer being up more than 2 percent while Nifty Energy and PSU Bank ended up in the red.', metadata={'source': 'https://www.moneycontrol.com/news/business/stocks/'}),
 Document(page_content='February 16, 2024 02:27 PM IST  Pharma stocks make strong moves on robust results, positive outlook; Natco zooms 14% Shares of Natco, Glenmark and Aurobindo Pharma are up 14, 10 and 2 percent respectively in the session.', metadata={'s

## Answer Generation

In [11]:
from utils.vectordb import VectorDB

vectorstore = VectorDB()
retriever = vectorstore.load_retriever(embeddings, top_k=10, re_ranker='cohere')

In [16]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

template = """You are a financial news assistant. Use the following pieces of news to answer the question at the end.  
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum and keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever, return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)


In [17]:
question = "List all the news for February 17 2024?"
result = qa_chain.invoke({"query": question})
print(result["result"])

On February 17, 2024, the news included the upcoming release of RBI and US Federal Reserve monetary policy meeting minutes, flash PMIs, bank loan, and deposit data. Wall Street experienced a slide with the S&P 500, Nasdaq Composite, and Dow Jones Industrial Average all losing points. The market bounced back with gains in BSE Auto, BSE Oil & Gas, and BSE Energy indices, while the rupee remained flat. Thanks for asking!


In [18]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.prompts import PromptTemplate

template = """You are a financial news assistant. Use the following pieces of news to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum and keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{summaries}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

qa_src_chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm,
    retriever=retriever, return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

In [19]:
question = "List all the news latest news"
result = qa_src_chain.invoke({"question": question})
result

{'question': 'List all the news latest news',
 'answer': 'The latest news includes stocks of HDFC Asset Management Company, TD Power System, and Jindal Stainless doubling despite promoters offloading stakes; Tata Motors stock gaining 3% on the launch of new CVs in South Africa; and Sensex gains led by auto and realty stocks, with Nifty potentially hitting a new all-time high if global markets remain steady. Thanks for asking!',
 'sources': '',
 'source_documents': [Document(page_content='February 16, 2024 04:57 PM IST  Promoter selling no longer a red flag; these 10 stocks have doubled despite stake sale Stocks of HDFC Asset Management Company, TD Power System, Jindal Stainless and many others have doubled in the last one year despite promoters offloading anywhere between 9 percent and 24 percent', metadata={'source': 'https://www.moneycontrol.com/news/business/stocks/', 'relevance_score': 0.19102567}),
  Document(page_content='February 16, 2024 01:27 PM IST  Tata Motors stock gains 3%