In [None]:

from transformers import pipeline

In [114]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.vectorstores import FAISS


In [115]:
loaders = UnstructuredURLLoader(urls = [
    "https://www.moneycontrol.com/news/business/markets/torrent-pharma-kkr-stake-jb-chemicals-12795245.html",
    "https://www.moneycontrol.com/news/business/markets/marico-stock-jumps-as-bangladesh-operations-resume-to-normal-levels-12795061.html",
    "https://www.moneycontrol.com/news/business/stocks/buy-lemon-tree-hotels-target-of-rs-150-anand-rathi-12793470.html"
])
data = loaders.load()

In [116]:
# Extract text from each Document object
text_data = [doc.page_content for doc in data]  # Replace 'content' with the actual attribute or method

# Combine all extracted text into a single string
combined_text = ' '.join(text_data)



In [117]:
combined_text

'English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nFixed Deposits\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nFixed Deposits\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_MARKETS_AS/MC_ENG_ROS_NWS_MKTS_AS_ATF_728\n\nGo PRO @₹99 PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsBusinessMarketsTorrent Pharma may raise up to $3 billion for KKR\'s stake in JB Chemicals: Report\n\nTrending Topics\n\nSensex TodayFirstCry share priceHDFC Bank newsOla Electric share priceUnicommerce share price\n\nTorrent Pharma may raise up to $3 billion for KK

In [118]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=0
)


docs = text_splitter.split_text(combined_text)

In [119]:
docs

['English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nFixed Deposits\n\nMy Alerts',
 'My Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nFixed Deposits\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nChat with Us\n\nDownload App',
 'Follow us on:\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_MARKETS_AS/MC_ENG_ROS_NWS_MKTS_AS_ATF_728\n\nGo PRO @₹99 PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy',
 "Companies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsBusinessMarketsTorrent Pharma may raise up to $3 billion for KKR's stake in JB Chemicals: Report\n\nTrending Topics",
 "Sensex TodayFirstCry share priceHDFC Bank newsOla Electric share priceUnicommerce share price\n\nTorrent Pharma may raise up to $3 billion fo

In [120]:
from langchain_community.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [121]:
def flatten_and_convert_to_string(docs):
    result = []
    for item in data:
        if isinstance(item, (list, tuple)):
            result.extend(flatten_and_convert_to_string(item))
        else:
            result.append(str(item))
    return result
 
# Flatten and convert split_text_0
split_text_0 = flatten_and_convert_to_string(docs)
 
# Now call embed_documents with a list of strings   

In [122]:
embedding=hf.embed_documents(split_text_0)

In [123]:
embedding

[[0.05134328827261925,
  0.01785227656364441,
  -0.002583057852461934,
  -0.04037502408027649,
  0.014090687967836857,
  -0.054015498608350754,
  0.00044319528387859464,
  0.06073393300175667,
  0.001292916014790535,
  0.012855934910476208,
  -0.010597378015518188,
  0.03422471508383751,
  -0.041755810379981995,
  0.11854097247123718,
  0.0008108073961921036,
  0.05602526292204857,
  0.009319957345724106,
  0.003405065508559346,
  -0.017003586515784264,
  0.013675454072654247,
  -0.0031250580213963985,
  0.016306936740875244,
  0.018431084230542183,
  0.0010116759222000837,
  0.0697438046336174,
  -0.00010557564382907003,
  0.04951626434922218,
  -0.030539700761437416,
  0.004572543781250715,
  -0.1014043316245079,
  0.015422925353050232,
  0.004174842499196529,
  0.016936425119638443,
  0.009279127232730389,
  2.518620931368787e-06,
  -0.06846481561660767,
  0.010960372164845467,
  0.015976054593920708,
  0.024039125069975853,
  0.031041236594319344,
  -0.014165408909320831,
  -0.0237

In [124]:
from langchain.schema import Document

# Convert strings into Document objects
docs = [Document(page_content=text) for text in text_data]  # `text_data` is the list of strings

# Create the vector store from documents
vector_store = FAISS.from_documents(docs, embedding=hf)


In [125]:
groq_api_key=os.environ["GROQ_API_KEY"] = "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"

In [126]:
from langchain_groq import ChatGroq

llm = ChatGroq(groq_api_key=groq_api_key,
    model="mixtral-8x7b-32768",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

In [127]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


In [128]:
prompt_template = """Instruct: With this context\n\n{context}{input}\nOutput:"""

In [129]:
prompt = ChatPromptTemplate.from_template(prompt_template)
document_chain = create_stuff_documents_chain(llm, prompt)

In [130]:
from langchain.chains import create_retrieval_chain

retriever = vector_store .as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [131]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000124255627D0>), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='Instruct: With this context\n\n{context}{input}\nOutput:'))])
            | ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000124254F7DF0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001248943D3F0>, temperature=1e-08, gro

In [136]:
# Combine the questions into a single string
question = (
    "Please answer the following questions:\n\n"
    "1. How much stake did KKR acquire?\n           "

    "2. What is the hike in Marico?\n"

    "3.TARGET PRICE OF LEMON TREE HOTELS \n"


    "we want each answer in different lines"

    "create a table of questions asked and generated answers"
)

# Pass the combined question to the LLM
response = retrieval_chain.invoke({"input": question})

# Print the response
response


{'input': 'Please answer the following questions:\n\n1. How much stake did KKR acquire?\n           2. What is the hike in Marico?\n3.TARGET PRICE OF LEMON TREE HOTELS \nwe want each answer in different linescreate a table of questions asked and generated answers',
 'context': [Document(page_content="English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nFixed Deposits\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nFixed Deposits\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_STOCKS_AS/MC_ENG_ROS_NWS_STK_AS_ATF_728\n\nGo PRO @₹99 PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\

In [162]:
# Pass the formatted question to the LLM



# Combine the questions into a single string
question = (
    "Please answer the following questions:\n\n\n   "
    "1. How much stake did KKR acquire?          "

    "2.   What is the hike in Marico?\n"

    "3.TARGET PRICE OF LEMON TREE HOTELS \n")



# Pass the combined question to the LLM
response = retrieval_chain.invoke({"input": question})

# Print the response
response


# Inspect the response to understand its structure





{'input': 'Please answer the following questions:\n\n\n   1. How much stake did KKR acquire?          2.   What is the hike in Marico?\n3.TARGET PRICE OF LEMON TREE HOTELS \n',
 'context': [Document(page_content="English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nFixed Deposits\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nFREE Credit Score₹100 Cash Reward\n\nFixed Deposits\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_STOCKS_AS/MC_ENG_ROS_NWS_STK_AS_ATF_728\n\nGo PRO @₹99 PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsBusinessStocksBuy Lemon Tree Hotels; target of Rs 150: 

NameError: name 'df' is not defined