In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.utilities import GoogleSerperAPIWrapper
import os
from dotenv import load_dotenv
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import PromptTemplate 
from langchain.chains import LLMChain
from langchain_community.document_loaders import UnstructuredPDFLoader

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
load_dotenv()
Secret_key = os.getenv("Google_key")
SERPER_API_KEY = os.getenv("Serper_Key_LangChain")
#print(SERPER_API_KEY)

In [None]:

#input for Pdf
pdf_input = input("Enter the path to the PDF file: ")
#Load pdf 
pdf_Loader = PyPDFLoader(pdf_input)

#Split pdf into chunks

text_splitter = CharacterTextSplitter(separator="\n\n" ,  chunk_size= 1000, chunk_overlap=100)


response = pdf_Loader.load()
page_content = response[0].page_content

hasText = ''
for content in response:
    if content.page_content.strip():  # Check if there's any non-whitespace text
        
        hasText = "Text found in the PDF."
        break
    else:
        hasText = "No text found in the PDF."
        print(hasText)
        print("Moving to OCR process or alternative handling.")
        unstructured_loader = UnstructuredPDFLoader(pdf_input, strategy="ocr_only")
        ocr_response = unstructured_loader.load()
        ocr_content = "".join([ocr_response.page_content for ocr_response in ocr_response])
        print(ocr_content)



splitted_text = text_splitter.split_text(page_content or ocr_content)



#Embed the chunks using HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-mpnet-base-v2")

#vector store using Chroma
db = Chroma.from_texts(texts=splitted_text, 
                       embedding=embeddings, 
                       collection_name="my_collection")

query = input("Enter your question: ")
response_query = db.similarity_search(query, k=3)

#web search using Serper API
web_search = GoogleSerperAPIWrapper(serper_api_key=SERPER_API_KEY)  
#web_search.run(query)

#Context building
context = "\n".join([doc.page_content for doc in response_query])
web_search_results = web_search.run(query)
combined_context = f"{context}\n\n and Web Search Results:\n{web_search_results}"

# print(response_query)

#prompt template

sys_template = """You are a helpful assistant that Read and Analyse PDF Files. 
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know"""

template = PromptTemplate(
    input_variables=["content", "query"],
    template="Take the context of the given pdf: {content} and must answer the following question based on it: {query}"
)

#template.invoke({"content" : context, "query": query})

# LLM model
  
llm = GoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=Secret_key)
# messages = [
#     ("system", f"You are a helpful assistant that give answers based on the context provided."),
#    ("user", query),
    
# ]
response_llm = llm.invoke(template.format(content=context, query=query))


# #chainining

chain = LLMChain(llm=llm, prompt=template)

chain.run({"content" : context, "query" : query})   

print(response_llm)
#print(db)




Based on the provided context, it is **not possible to determine if an 80% ROI can be attained.**

Here's why:

1.  **Redshift App Context:** This section details the technical configuration for SAML 2.0 sign-on methods, Okta integration, and application settings. It contains no financial information, costs, benefits, or metrics related to return on investment.
2.  **Report on Recent Workshops and Conferences:** This report outlines the necessity, impact, and beneficiaries of two workshops aimed at strengthening India's public library system. While it discusses the positive impact on literacy, access to knowledge, and the potential for policy changes, it does **not** provide any financial data, project costs, or quantifiable monetary benefits that would allow for an ROI calculation. The benefits described are qualitative and societal.

To calculate ROI, you would need information on:
*   The **cost** of the "plan" (which isn't clearly defined in financial terms, but refers to the works