# Web Search RAG

In [1]:
#!pip install streamlit
#!pip install openai
#!pip install tiktoken

In [2]:
import os
import langchain.text_splitter
import streamlit as st
import pickle
import langchain
import langchain_community
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS




In [None]:
#os.environ['OPENAI_API_KEY'] = 'enter your openai key'

### Creating our LLM

In [19]:
llm = OpenAI(temperature = 0.9, max_tokens = 500)

### Loading my data

In [4]:
loader = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/tata-motors-mahindra-gain-certificates-for-production-linked-payouts-11281691.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html",
    "https://www.moneycontrol.com/news/business/stocks/buy-tata-motors-target-of-rs-743-kr-choksey-11080811.html"
])
data = loader.load()
len(data)

3

### Creating Chunks

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)
docs = text_splitter.split_documents(data)
len(docs)

20

### Creating my vector index

In [6]:
embeddings = OpenAIEmbeddings()
vectorindex_openai = FAISS.from_documents(docs, embeddings)

  embeddings = OpenAIEmbeddings()


### Storing the vector index created in local

https://python.langchain.com/docs/integrations/vectorstores/faiss/

In [7]:
#file_path = "vector_index.pkl"
#with open(file_path, "wb") as f:
#    pickle.dump(vectorindex_openai, f)

In [8]:
#!pip install dill

In [9]:
#import dill
#file_path = "vector_index.pkl"
#with open(file_path, "wb") as f:
#    dill.dump(vectorindex_openai, f)

In [11]:
file_path = "faiss_store_openai"

# Save the FAISS index to a file
vectorindex_openai.save_local(file_path)

### Retrieving from the pickle file

In [49]:
#if os.path.exists(file_path):
#    with open(file_path,"rb") as f:
#        vectorIndex = pickle.load(f)

In [12]:
#import dill

#with open(file_path, "rb") as f:
#    vectorIndex = dill.load(f)

In [20]:
news_vector_store = FAISS.load_local(
    file_path, embeddings, allow_dangerous_deserialization=True
)

docs = news_vector_store.similarity_search("brocker flash")

In [21]:
docs

[Document(id='4b7a61a3-5cd2-4049-9cc6-f16ce33848af', metadata={'source': 'https://www.moneycontrol.com/news/business/stocks/buy-tata-motors-target-of-rs-743-kr-choksey-11080811.html'}, page_content="Broker Research\n\nAugust 02, 2023 / 22:42 IST\n\n\n\nWatchlist\n\nPortfolio\n\nMessage\n\nSet Alert\n\nlive\n\nbselive\n\nnselive\n\nVolume\n\nTodays L/H\n\nMore\n\nBroker Research\n\nfirst published: Aug 2, 2023 10:39 pm\n\nDiscover the latest Business News, Budget 2025 News, Sensex, and Nifty updates. Obtain Personal Finance insights, tax queries, and expert opinions on Moneycontrol or download the Moneycontrol App to stay updated!\n\nTrending news\n\n'It broke me': French tourist's 46-hour train ordeal in India goes viral, sparks debate\n\nSam Altman gives shoutout to Bengaluru founder behind his baby’s smart crib. Here’s how much it costs\n\nJapanese envoy wins hearts with ‘Gajab Swad Ba’ post after tasting litti chokha in Bihar\n\n“It pains me...': Vir Das pays Rs 50,000 for Air India

In [22]:
chain = RetrievalQAWithSourcesChain.from_llm(llm = llm, retriever = news_vector_store.as_retriever())

In [23]:
chain



In [24]:
query = "what is the price of Tiago iCNG ?"

langchain.debug = True

chain({"question":query}, return_only_outputs=True)

  chain({"question":query}, return_only_outputs=True)


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what is the price of Tiago iCNG ?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Trending Topics\n\nGold PriceSensex LiveIndian RupeeIndia GDP GrowthRBI Monetary Policy Highlights\n\nTata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\n\nThe Punch iCNG is equipped with the company's proprietary twin-cylinder technology with enhanced safety features like a micro-switch to keep the car switched off at the time of refuelling and thermal incident protection that cuts off CNG supply to the engine and releases gas into the atmosphere, Tata Motors said in a statement

{'answer': " The price of Tata Motors' Tiago iCNG is Rs. 7.1 lakh.\n",
 'sources': 'https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html'}

### Now we see it combines those response and makes a resume of them in orther to have a final answer. Because we had an answer for each chunk