In [8]:
import os
import secret_key
os.environ['OPENAI_API_KEY']=secret_key.OPENAI_API_KEY

In [5]:
import os
# import streamlit as st
import pickle
import time
import langchain
from langchain_openai import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [9]:
#Initialising LLM:
llm = OpenAI(temperature=0.9,max_tokens=500)

### Load data

In [10]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load() 
len(data)

2

### Splitting data to creat chunks

In [12]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)
len(docs)

16

### Create embeddings for these chunks and save them to FAISS index


In [39]:
# Directory to save the FAISS vector index
faiss_directory = "faiss_index"
embeddings= OpenAIEmbeddings()

# Saving the FAISS vector index locally if it doesn't already exist
if not os.path.exists(faiss_directory):
    # Assuming `docs` contains your documents and `embeddings` is your embeddings model
    vectorindex_openai = FAISS.from_documents(docs, embeddings)
    vectorindex_openai.save_local(faiss_directory)  # Save the index to 'faiss_index' folder

# Loading the FAISS vector index
if os.path.exists(faiss_directory):
    # Loading the saved FAISS index
    vectorindex_openai = FAISS.load_local(faiss_directory, embeddings, allow_dangerous_deserialization=True)

# Create a retriever from the loaded vector index
retriever = vectorindex_openai.as_retriever()





### Retrieve similar embeddings for a given question and call LLM to retrieve final answer

In [42]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=retriever)
chain



In [43]:
#The method of combining docs that we are using here is Map-Reduce ,we're calling llms 
query="What is the price of Tiago iCNG?"
langchain.debug=True
chain({"question":query},return_only_outputs=True)


  chain({"question":query},return_only_outputs=True)


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "What is the price of Tiago iCNG?"
}


Error in ConsoleCallbackHandler.on_chain_start callback: ValidationError(model='Run', errors=[{'loc': ('__root__',), 'msg': "argument of type 'NoneType' is not iterable", 'type': 'type_error'}])
Parent run 34234b12-b1c0-45d6-ab1b-d1fc07e68898 not found for run 19d06c8a-55c6-4a76-ba05-9a5d32288a2f. Treating as a root run.
Parent run 34234b12-b1c0-45d6-ab1b-d1fc07e68898 not found for run 0a95a510-bb55-46e1-93b7-b472e13967ca. Treating as a root run.
Parent run 34234b12-b1c0-45d6-ab1b-d1fc07e68898 not found for run bf413a3a-6878-4796-a88b-39e5ff4139ef. Treating as a root run.
Parent run 34234b12-b1c0-45d6-ab1b-d1fc07e68898 not found for run 0ac1732c-7465-4011-8b78-88d4d8209fd7. Treating as a root run.


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[llm/start][0m [1m[llm:OpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Use the following portion of a long document to see if any of the text is relevant to answer the question. \nReturn any relevant text verbatim.\nThe company also said it has also introduced the twin-cylinder technology on its Tiago and Tigor models.\n\nThe Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh, while the Tigor iCNG comes at a price range of Rs 7.8 lakh to Rs 8.95 lakh.\n\nTata Motors Passenger Vehicles Ltd Head-Marketing, Vinay Pant said these introductions put together will make the company's CNG line up \"appealing, holistic, and stronger than ever\".\n\nPTI\n\nTags: #Business #Companies\n\nfirst published: Aug 4, 2023 02:17 pm\n\nTop Trends\n\nAngel TaxWiproBudget newsNew Income tax slabIPO News\n\nAdvertisement\n\nRemo

Error in ConsoleCallbackHandler.on_chain_end callback: TracerException('No indexed run ID 34234b12-b1c0-45d6-ab1b-d1fc07e68898.')


[36;1m[1;3m[llm/end][0m [1m[llm:OpenAI] [1.70s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "Generation"
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "total_tokens": 1329,
      "completion_tokens": 197,
      "prompt_tokens": 1132
    },
    "model_name": "gpt-3.5-turbo-instruct"
  },
  "run": null,
  "type": "LLMResult"
}
[36;1m[1;3m[llm/end][0m [1m[llm:OpenAI] [1.70s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " The Punch iCNG is equipped with the company's proprietary twin-cylinder technology with enhanced safety features like a micro-switch to keep the car switched off at the time of refuelling and thermal incident protection that cuts off CNG supply to the engine and releases gas into the atm

{'answer': ' The price of Tiago iCNG is between Rs 6.55 lakh and Rs 8.1 lakh.\n',
 'sources': 'https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html'}