In [None]:
!pip install openai
!pip install -U langchain-openai
!pip install tiktoken

import os
import streamlit as st
import pickle
import faiss
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings


In [None]:
#load openAI api key
os.environ['OPENAI_API_KEY'] = 'your openapi key here'

In [None]:
# Initialise LLM with required params
llm = OpenAI(temperature=0.9, max_tokens=500) 

In [None]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load() 
len(data)


In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [None]:
len(docs)

In [None]:
docs[0]

In [None]:
# Create the embeddings of the chunks using openAIEmbeddings
embeddings = OpenAIEmbeddings()

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_openai = FAISS.from_documents(docs, embeddings)



In [None]:
# Store FAISS index separately
faiss.write_index(vectorindex_openai.index, "faiss_index.bin")

# Store metadata separately
metadata = {
    "docstore": vectorindex_openai.docstore,
    "index_to_docstore_id": vectorindex_openai.index_to_docstore_id
}

with open("faiss_metadata.pkl", "wb") as f:
    pickle.dump(metadata, f)

In [None]:
# Load FAISS index and metadata if exists
if os.path.exists("faiss_index.bin") and os.path.exists("faiss_metadata.pkl"):
    faiss_index = faiss.read_index("faiss_index.bin")

    with open("faiss_metadata.pkl", "rb") as f:
        metadata = pickle.load(f)

    # Reconstruct FAISS VectorStore
    vectorIndex = FAISS(
        index=faiss_index,
        docstore=metadata["docstore"],
        index_to_docstore_id=metadata["index_to_docstore_id"],
        embedding_function=OpenAIEmbeddings()
    )

In [None]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())
chain


In [None]:
query = "what is the price of Tiago iCNG?"
# query = "what are the main features of punch iCNG?"

langchain.debug=True

chain({"question": query}, return_only_outputs=True)