In [3]:
import os
import numpy as np
import pickle 
import pandas as pd
import streamlit as st
import langchain
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain_core.output_parsers import StrOutputParser
from langchain.agents import AgentType, initialize_agent, load_tools, Tool
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import TextLoader, UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.vectorstores import FAISS

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
import getpass
import os
from secret_keys import google_api

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = google_api

api_key = os.environ["GOOGLE_API_KEY"]

In [6]:
llm = GoogleGenerativeAI(model = "gemini-pro",max_output_tokens=250)

loader = TextLoader("ondc.txt", encoding='utf-8')
data = loader.load()
len(data)

1

In [7]:
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 200,
    chunk_overlap = 0,
    separators = ["\n\n","\n",".","-"," "]
)

doc = r_splitter.split_documents(data)
doc

[Document(metadata={'source': 'ondc.txt'}, page_content='#[1]ONDC | Open Network for Digital Commerce\n\n      [1] https://ondc.org/\n\n   IFRAME:\n   [2]https://www.googletagmanager.com/ns.html?id=GTM-MXJHTV5'),
 Document(metadata={'source': 'ondc.txt'}, page_content='[2] https://www.googletagmanager.com/ns.html?id=GTM-MXJHTV5\n\n   [3]ONDC | Open Network for Digital Commerce\n\n      [3] https://ondc.org/\n\n     Language icon [English__]'),
 Document(metadata={'source': 'ondc.txt'}, page_content='* [4]Complaints\n      https://ondc.org/complaints\n\n     * [5]Open Data\n      https://opendata.ondc.org/\n\n     * [6]Home\n      https://ondc.org/'),
 Document(metadata={'source': 'ondc.txt'}, page_content='* [7]About\n      https://ondc.org/about-ondc/\n\n     * [8]How to Shop\n      https://ondc.org/ondc-buyer-apps/\n\n     * [9]What to Buy\n      https://ondc.org/what-to-buy'),
 Document(metadata={'source': 'ondc.txt'}, page_content='* [10]Learn About ONDC\n      https://ondc.org/lea

In [8]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_index = FAISS.from_documents(doc, embeddings)

In [9]:
file_path = "vector_index_file.pkl"
with open(file_path,"wb") as f:
    pickle.dump(vector_index,f)

In [10]:
if os.path.exists(file_path):
    with open(file_path,"rb") as f:
        vectorIndex = pickle.load(f)

In [11]:
retriever = vectorIndex.as_retriever()
chain = RetrievalQAWithSourcesChain.from_llm(llm = llm, retriever = retriever)
chain

RetrievalQAWithSourcesChain(combine_documents_chain=MapReduceDocumentsChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template='Use the following portion of a long document to see if any of the text is relevant to answer the question. \nReturn any relevant text verbatim.\n{context}\nQuestion: {question}\nRelevant text, if any:'), llm=GoogleGenerativeAI(model='gemini-pro', max_output_tokens=250, client=genai.GenerativeModel(
    model_name='models/gemini-pro',
    generation_config={},
    safety_settings={},
    tools=None,
    system_instruction=None,
    cached_content=None
    model_name='models/gemini-pro',
    generation_config={},
    safety_settings={},
    tools=None,
    system_instruction=None,
    cached_content=None
))), document_prompt=PromptTemplate(input_variables=['page_content', 'source'], template='Content: {page_content}\nSource: {source}'), document_variable_name='summaries')), document_variable_name='context'), retriever=Vect

In [12]:
query = "what is the use of ondc"
langchain.debug =True
chain({"question":query},return_only_outputs= True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what is the use of ondc"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "with buyers and sellers. ONDC is merely a communication\n   protocol that allows Buyer Network Participants and Seller\n   Network Participants to interact with each other.",
      "question": "what is the use of ondc"
    },
    {
      "context": "How ONDC Works Video How ONDC Works Video\n   Watch it in English + 14 languages\n     * Assamese\n     * Bengali\n     * English\n     * Gujrati\n     * Hindi\n     * Kannada\n     * Kashmiri",
      "question": "what is the use of ondc"
    },
    {
  

{'answer': "I don't know.\n", 'sources': ''}