In [3]:

import os
import streamlit as st
import pickle
import time
import langchain
from secret_key import API_KEY
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredURLLoader
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.runnables import RunnablePassthrough



In [6]:
#load openAI api key
os.environ['GROQ_API_KEY'] = API_KEY

In [7]:
from langchain_groq import ChatGroq
# Initialise LLM with required params
llm = ChatGroq(model="llama-3.1-8b-instant",temperature=0.2)

In [8]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load()
len(data)

2

In [9]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [10]:
len(docs)

18

In [11]:
docs[0]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'}, page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nLoans up to â‚¹50 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nNetwork 18\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_MARKETS_AS/MC_ENG_ROS_NWS_MKTS_AS_ATF_728\n\nMoneycontrol\n\nGo PRO NowPRO\n\nMoneycontrol PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsBusinessMarketsWall Street rises as Tesla soars on AI optimism\n\nTrending Top

In [12]:
# Create the embeddings of the chunks using openAIEmbeddings
embeddings = HuggingFaceEmbeddings()

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_openai = FAISS.from_documents(docs, embeddings)

  embeddings = HuggingFaceEmbeddings()
  embeddings = HuggingFaceEmbeddings()


In [13]:
# Storing vector index create in local
file_path= "../.venv/vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorindex_openai, f)

In [15]:
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

retriever = vectorIndex.as_retriever(search_kwargs={"k": 3})

In [16]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

prompt = ChatPromptTemplate.from_template("""
Use the context below to answer the question.
Return a short answer and list the sources.

CONTEXT:
{context}

QUESTION:
{question}

FORMAT:
Answer: <your answer>
Sources: <document sources>
""")

# Function to convert docs into readable + source format
def format_docs(docs):
    formatted = []
    for d in docs:
        source = d.metadata.get("source", "N/A")
        formatted.append(f"[Source: {source}]\n{d.page_content}")
    return "\n\n".join(formatted)

rag_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm
)


In [17]:
query = "What is the latest news about Tesla?"
result = rag_chain.invoke(query)

print(result)

content='Answer: Tesla rallied 10% after Morgan Stanley upgraded the electric car maker to "overweight" from "equal-weight," saying its Dojo supercomputer could boost the company\'s market value by nearly $600 billion.\n\nSources:\n1. https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html\n2. https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 122, 'prompt_tokens': 861, 'total_tokens': 983, 'completion_time': 0.108674957, 'completion_tokens_details': None, 'prompt_time': 0.066442612, 'prompt_tokens_details': None, 'queue_time': 0.005265533, 'total_time': 0.175117569}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_9ca2574dca', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'} id='lc_run--257084d5-3642-4fb9-8039-5292ed9d030d-

In [25]:
map_prompt = ChatPromptTemplate.from_template("""
You are an assistant that answers questions using ONLY the provided chunk.

CHUNK:
{chunk}

QUESTION:
{question}

Give a short answer using ONLY the above chunk.
""")

In [26]:
reduce_prompt = ChatPromptTemplate.from_template("""
Combine the partial answers below into a final answer.

PARTIAL ANSWERS:
{answers}

FINAL ANSWER:
""")

In [27]:

def map_step(inputs):
    question = inputs["question"]
    docs = inputs["docs"]

    partials = []

    for d in docs:
        chunk = d.page_content
        res = (map_prompt | llm).invoke({"chunk": chunk, "question": question})

        # FIX: Groq response handling
        try:
            text = res.content
        except:
            text = res["message"]["content"]

        partials.append(text)

    return {"partials": partials, "question": question}


In [28]:
def reduce_step(inputs):
    answers = "\n\n".join(inputs["partials"])
    res = (reduce_prompt | llm).invoke({"answers": answers})

    try:
        return res.content
    except:
        return res["message"]["content"]

In [29]:
from langchain_core.runnables import RunnableMap, RunnableLambda

rag_chain = (
    RunnableMap(
        {
            "question": RunnablePassthrough(),
            "docs": retriever
        }
    )
    | RunnableLambda(map_step)
    | RunnableLambda(reduce_step)
)

In [30]:
result = rag_chain.invoke("What is the latest news about Tesla?")
print(result)result = rag_chain.invoke("What is the latest news about Tesla?")
print(result)

There is no information about Tesla in the provided chunks, so there is no final answer to combine.
