In [1]:
#from sec_key import openapi_key
#import os

#os.environ["OPENAI_API_KEY"] = openapi_key

In [2]:
import streamlit as st
import pickle
import time 
import langchain

from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import SeleniumURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

import warnings
warnings.filterwarnings("ignore")

In [3]:
# Initial LLM with rewuired params
# Max_token limit on the maximum length of the generated text

llm = OpenAI(temperature = 0.9, max_tokens = 500)

In [4]:
loaders = SeleniumURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load() 
len(data)

2

In [5]:
data[0]

Document(page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nMoneycontrol Trending Stock\n\nInfosys\xa0INE009A01021, INFY, 500209\n\nState Bank of India\xa0INE062A01020, SBIN, 500112\n\nYes Bank\xa0INE528G01027, YESBANK, 532648\n\nBank Nifty\n\nNifty 500\n\nQuotes\n\nMutual Funds\n\nCommodities\n\nFutures & Options\n\nCurrency\n\nNews\n\nCryptocurrency\n\nForum\n\nNotices\n\nVideos\n\nGlossary\n\nAll\n\nHello, LoginLog-inor Sign-UpMy AccountMy Profile My PortfolioMy WatchlistFREE Credit Score₹100 Cash RewardMy AlertsMy MessagesPrice AlertsChat with UsDownload AppFollow us on:\n\nPremium\n\nMy Alerts\n\nElections 2024MarketsHOMEINDIAN INDICESSTOCK ACTIONAll StatsTop GainersTop LosersOnly BuyersOnly Sellers52 Week High52 Week LowPrice ShockersVolume ShockersMost Active StocksGLOBAL MARKETSUS MARKETSBIG SHARK PORTFOLIOSSTOCK SCANNERECONOMIC CALENDARMARKET ACTIONDashboardF&OFII & DII ActivityCorporate ActionEARNINGSCOMMODITYPRE MARKETRESEARCHAdviceBroker ResearchTechnicalsCURRENCYBON

In [28]:
text_splitter = RecursiveCharacterTextSplitter(
    #separators = ["\n", "\n\n", "\n \n", ".",  " "],
    chunk_size =  1000,
    chunk_overlap =200
)

# As data is type document we can directly use split_documents over text_splitter inorder to get chunks
docs = text_splitter.split_documents(data)
len(docs)

36

In [29]:
# We have created 36 individual chunks

docs[10]

Document(page_content='Story continues below Advertisement\n\nRemove Ad\n\nOther megacaps also rose, with Amazon\xa0(AMZN.O)\xa0climbing 3.5% and Microsoft\xa0(MSFT.O)\xa0adding 1.1%.\n\nMeta Platforms\xa0(META.O)\xa0jumped 3.25% after a report on Sunday said the social media platform was working on a new, more powerful AI system.\n\nWalt Disney\xa0(DIS.N)\xa0added 1.2% and Charter Communications\xa0(CHTR.O)\xa0rose 3.2% after they\xa0reached a deal\xa0for Disney\'s programming, including ESPN, to return to the Spectrum cable service just hours ahead of the start of NFL "Monday Night Football."\n\nRelated stories\n\nStocks subdued, yen in focus, with inflation data on tap\n\nS&P 500 closes higher to secure strongest Q1 since 2019\n\nUS stocks close with gains, led by Dow as investors look for rate insight\n\nInvestors are looking to August consumer price index data due on Wednesday for clues about how close the Federal Reserve may be to ending its campaign of interest rate hikes. That 

In [30]:
# create OpenAPI Embeddings
# from_documents method in faiss accepts doc or chunks created and it takes another params Embeddings
# And then the result will be stored as vectorindex_openai 

embedding = OpenAIEmbeddings()

vectorindex_openai  = FAISS.from_documents(docs, embedding)

- storing vector index in local create

file_path="vector_indx.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorindex_openai, f)

import pickle
import threading

class MyClass:
    def __init__(vectorindex_openai):
        vectorindex_openai._lock = threading.RLock()  # Use threading.RLock instead of _thread.RLock

    def __getstate__(vectorindex_openai):
        # Exclude the lock from the pickled state
        state = vectorindex_openai.__dict__.copy()
        del state['_lock']
        return state

    def __setstate__(vectorindex_openai, state):
        # Recreate the lock after unpickling
        vectorindex_openai.__dict__.update(state)
        vectorindex_openai._lock = threading.RLock()

# Example usage
obj = MyClass()
file_path = 'vector_index.pkl'
with open(file_path, 'wb') as f:
    pickle.dump(obj, f)

file_path = 'vector_index.pkl'
with open(file_path, 'wb') as f:
    pickle.dump(vectorindex_openai, f)

if os.path.exists(file_path):
    with open(file_path, 'rb') as f:
        vectorindex = pickle.load(f)

In [34]:
# RetrievalQAWithSourcesChain - Question answering chain with sources over documents

chain = RetrievalQAWithSourcesChain.from_llm(llm = llm, retriever = vectorindex_openai.as_retriever())
chain



In [35]:
# Sample question

query = "what is the price of Tiago iCNG?"

langchain.debug=True

chain({"question": query}, return_only_outputs = True)

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what is the price of Tiago iCNG?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "The company also said it has also introduced the twin-cylinder technology on its Tiago and Tigor models.\n\nThe Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh, while the Tigor iCNG comes at a price range of Rs 7.8 lakh to Rs 8.95 lakh.\n\nTata Motors Passenger Vehicles Ltd Head-Marketing, Vinay Pant said these introductions put together will make the company's CNG line up \"appealing, holistic, and stronger than ever\".\n\nPTI\n\nTags:\n\n#Business\n\n#Companies\n\nfi

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain > 5:llm:OpenAI] [1.69s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "Generation"
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "total_tokens": 1269,
      "completion_tokens": 153,
      "prompt_tokens": 1116
    },
    "model_name": "gpt-3.5-turbo-instruct"
  },
  "run": null
}
[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain > 6:llm:OpenAI] [1.69s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " \nThe Punch iCNG is equipped with the company's proprietary twin-cylinder technology with enhanced safety features like a mic

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 9:chain:LLMChain > 10:llm:OpenAI] [1.26s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " The price of Tiago iCNG ranges from Rs 6.55 lakh to Rs 8.1 lakh. \nSOURCES: https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "Generation"
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "total_tokens": 1738,
      "completion_tokens": 65,
      "prompt_tokens": 1673
    },
    "model_name": "gpt-3.5-turbo-instruct"
  },
  "run": null
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 9:chain:LLMChain] [1.26s] Exiting Chain run with output:
[0m{
  "text": " The price of Tiago iCNG ranges from Rs 6.55 l

{'answer': ' The price of Tiago iCNG ranges from Rs 6.55 lakh to Rs 8.1 lakh. \n',
 'sources': 'https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html'}

##### It is using mapping reduce method 

- When query is passed "what is the price of Tiago iCNG?" as question
    - Step 1: It retrived the similar looking chunks from the vector database, their are totall 4 chunks & question is same for all.  
        - The answer is in firsst chunk but still it retrives similar looking chunks related to query 
    - Step 2: From chunks you combine the question and ask four individual question with LLM  
        - prompt will be like for individual call to LLM [Use the following portion of a long document to see if any of the text is relevant to answer the question. \nReturn any relevant text verbatim]
    - Step 3: As a result 4 answers will be generated, Now it combines 4 answer to a summary chunk with query and give one more call to LLM
    
- And finnaly it gives "The price of Tiago iCNG ranges from Rs 6.55 lakh to Rs 8.1 lakh."