### Import Libraries

In [62]:
import os
import streamlit as st
import pickle
import time

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate

# Import from langchain_classic instead
from langchain_classic.chains import RetrievalQAWithSourcesChain

In [64]:
# # Load environment variables
load_dotenv()

# Get API key from environment
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    st.error("Please set OPENAI_API_KEY in your .env file")
    st.stop()

In [65]:
# Initialise LLM with required params
llm = ChatOpenAI(temperature=0.9, max_tokens=500) 

### (1) Load data

In [66]:
from langchain_community.document_loaders import WebBaseLoader

loaders = WebBaseLoader([
    'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html',
    'https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html'
])
data = loaders.load()

In [67]:
len(data)

2

### (2) Split data to create chunks

In [68]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size =1000,
    chunk_overlap = 200
)

docs = text_splitter.split_documents(data)

In [69]:
len(docs)

40

In [70]:
docs[0]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html', 'title': 'Wall Street rises as Tesla soars on AI optimism', 'description': 'Tesla (TSLA.O) rallied 10% after Morgan Stanley upgraded the electric car maker to ', 'language': 'en'}, page_content='Wall Street rises as Tesla soars on AI optimism')

### (3) Create embeddings for these chunks and save them to FAISS index

In [71]:
embeddings = OpenAIEmbeddings()

vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [72]:
# Save the vector index
vectorindex_openai.save_local("faiss_index")

vectorindex_openai = FAISS.load_local(
    "faiss_index", 
    OpenAIEmbeddings(),
    allow_dangerous_deserialization=True
)

### (4) Retrieve similar embeddings for a given question and call LLM to retrieve final answer

In [73]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorindex_openai.as_retriever())
chain



In [74]:
query = "what is the price of Tiago iCNG?"
# query = "what are the main features of punch iCNG?"

import langchain 

langchain.debug=True

chain({"question": query}, return_only_outputs=True)

{'answer': 'The price of Tiago iCNG is between Rs 6.55 lakh and Rs 8.1 lakh.\n',
 'sources': 'https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html'}