In [15]:
pip install --upgrade filetype

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [2]:
# os.environ['OPENAI_API_KEY'] = "my_api_key"

In [3]:
# Initialise LLM with required params
llm = OpenAI(temperature=0.9, max_tokens=500) 

In [4]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load() 
len(data)

2

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [6]:
len(docs)

15

In [7]:
docs[0]

Document(page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nLoans up to ₹15 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_MARKETS_AS/MC_ENG_ROS_NWS_MKTS_AS_ATF_728\n\nGo PRO @₹99 PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsBusinessMarketsWall Street rises as Tesla soars on AI optimism\n\nTrending Topics\n\nSensex TodayStock Market fall todayITC Share PriceSensex CrashJSW Energy Share PriceUnion Bank Share Price\n\nWall Street rises as Tesla soars on AI optimism', metadata={'source': 

In [8]:
# Create the embeddings of the chunks using openAIEmbeddings
embeddings = OpenAIEmbeddings()

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [9]:
# pip install dill

In [10]:
# import dill as pickle

In [11]:
# # Storing vector index create in local
# file_path="vector_index.pkl"
# with open(file_path, "wb") as f:
#     pickle.dump(vectorindex_openai, f)

In [12]:
# # Step 3: Save only the FAISS index (excluding OpenAIEmbeddings)
# file_path = "vector_index.pkl"
# vectorindex_openai.save_local("faiss_index")  # Save FAISS index to local directory

# # Step 4: Optionally pickle the metadata separately
# metadata_path = "metadata.pkl"
# with open(metadata_path, "wb") as f:
#     pickle.dump({"docs": docs}, f)

# print(f"FAISS index saved at 'faiss_index' and metadata saved at '{metadata_path}'")

In [24]:
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
import pickle

# Step 1: Create the embeddings of the chunks using OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

# Step 2: Create the FAISS vector index
vectorindex_openai = FAISS.from_documents(docs, embeddings)

# Step 3: Save the FAISS index and configuration for embeddings
file_path = "faiss_index.pkl"

# Extract the configuration of OpenAIEmbeddings (e.g., model name)
embeddings_config = {"model": embeddings.model}

# Save FAISS index and embeddings config to a file
vectorindex_openai.save_local("faiss_index")  # Save FAISS index to a directory
with open(file_path, "wb") as f:
    pickle.dump(vectorindex_openai, f)
    # pickle.dump({"faiss_path": "faiss_index", "embeddings_config": embeddings_config}, f)

print(f"FAISS index and embeddings config saved at '{file_path}'")


FAISS index and embeddings config saved at 'faiss_index.pkl'


In [21]:
with open("faiss_index.pkl", "wb") as f:
    pickle.dump(vectorindex_openai, f)

In [28]:
with open("faiss_index.pkl", "rb") as f:
    faiss_ind = pickle.load(f)

In [29]:
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        faiss_ind = pickle.load(f)

In [30]:
print(type(faiss_ind))

<class 'langchain.vectorstores.faiss.FAISS'>


In [16]:
# embeddings = OpenAIEmbeddings()

# # Reload FAISS index
# vectorIndex = FAISS.load_local("faiss_index_directory", embeddings)

In [17]:
print(type(vectorindex_openai)) 

<class 'langchain.vectorstores.faiss.FAISS'>


In [27]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=faiss_index.as_retriever())
chain



In [None]:
query = "what is the price of Tiago iCNG?"
# query = "what are the main features of punch iCNG?"

langchain.debug=True

chain({"question": query}, return_only_outputs=True)