In [1]:
import os
import time
import textwrap
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

# Load the GROQ API key
groq_api_key = os.getenv('GROQ_API_KEY')

# Initialize LLM
llm = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="Llama3-8b-8192"
)

In [3]:
# Define the prompt
prompt = ChatPromptTemplate.from_template(
    """
    You are a document assistant that helps users to find information in a context.
    Please provide the most accurate response based on the context and inputs
    only give information that is in the context not in general
    <context>
    {context}
    <context>
    Questions:{input}
    """
)


In [4]:
def vector_embedding(uploaded_file_path):
    if "vectors" not in globals():
        # Load and process the PDF file
        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        loader = PyPDFLoader(uploaded_file_path)  # Load the PDF file
        docs = loader.load()  # Document Loading
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)  # Chunk Creation
        final_documents = text_splitter.split_documents(docs[:20])  # Splitting
        vectors = FAISS.from_documents(final_documents, embeddings)  # Vector embeddings
        return vectors, final_documents
    else:
        print("Document already embedded.")
        return vectors, final_documents

# Get the path of the PDF file from user
uploaded_file_path = "../Data/sample.pdf"
# Process the file to create the vector embeddings
vectors, final_documents = vector_embedding(uploaded_file_path)
print("Vector Store DB is ready")

Vector Store DB is ready


In [5]:
# Ask the user to enter their question
prompt1 = "what is this about?"

if prompt1:
    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever = vectors.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    start = time.process_time()
    response = retrieval_chain.invoke({'input': prompt1})
    print(f"Response time: {time.process_time() - start}")
    # print(f"Answer: {response['answer']}")
    wrapped_text = textwrap.fill(response['answer'], width=80)
    print(wrapped_text)
else:
    print("Please enter a valid question.")

Response time: 0.06782583899999972
Based on the provided context, it appears that this is a sample PDF file
containing a block of text in a decorative font. The text does not seem to have
any specific topic or theme, but rather appears to be a collection of Latin
phrases and sentences that are not grammatically correct.  It is difficult to
determine what this text is about without more context or information. If you
could provide more context or clarify what you are looking for, I may be able to
provide a more accurate answer.
