In [1]:
import streamlit as st
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_cohere import CohereEmbeddings
from langchain_cohere import ChatCohere
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from dotenv import load_dotenv

In [2]:
loader = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/business-news-live-sebi-board-meeting-manba-finance-share-price-krn-heat-exchanger-allotment-status-ipo-september-30-liveblog-12831964.html#google_vignette",
    "https://www.moneycontrol.com/news/business/startup/cred-revenue-rises-66-to-rs-2473-crore-in-fy24-losses-down-by-41-to-rs-609-crore-12832446.html"
])
data = loader.load()
len(data)

2

In [3]:
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = r_splitter.split_documents(data)
chunks_string = []
for chunk in chunks:
    chunks_string.append(chunk.page_content)

In [4]:
embeddings_model = CohereEmbeddings(cohere_api_key='fOKNiUz8cq3c7LfrrzrX7E70vgGpZStUEgdt3Ohj', model='embed-english-v3.0')
vectorstore = FAISS.from_documents(chunks, embeddings_model)

In [5]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

retrieved_docs = retriever.invoke("What are the approaches to Task Decomposition?")

len(retrieved_docs)

6

In [6]:
llm = ChatCohere(cohere_api_key='fOKNiUz8cq3c7LfrrzrX7E70vgGpZStUEgdt3Ohj', model="command-r-plus")

In [7]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [8]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "Why did sebi did a metting?"})
print(response["answer"])

The Securities and Exchange Board of India (SEBI) held a board meeting to address market expectations and proposals for tightening rules on futures and options transactions.


In [28]:
sources = []
for doc in response.get('context'):
    sources.append(doc.metadata['source'])
sources

['https://www.moneycontrol.com/news/business/business-news-live-sebi-board-meeting-manba-finance-share-price-krn-heat-exchanger-allotment-status-ipo-september-30-liveblog-12831964.html#google_vignette',
 'https://www.moneycontrol.com/news/business/business-news-live-sebi-board-meeting-manba-finance-share-price-krn-heat-exchanger-allotment-status-ipo-september-30-liveblog-12831964.html#google_vignette',
 'https://www.moneycontrol.com/news/business/business-news-live-sebi-board-meeting-manba-finance-share-price-krn-heat-exchanger-allotment-status-ipo-september-30-liveblog-12831964.html#google_vignette',
 'https://www.moneycontrol.com/news/business/business-news-live-sebi-board-meeting-manba-finance-share-price-krn-heat-exchanger-allotment-status-ipo-september-30-liveblog-12831964.html#google_vignette',
 'https://www.moneycontrol.com/news/business/business-news-live-sebi-board-meeting-manba-finance-share-price-krn-heat-exchanger-allotment-status-ipo-september-30-liveblog-12831964.html#goo