In [None]:
import os
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA, StuffDocumentsChain
from langchain_ollama import OllamaEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_openai import AzureChatOpenAI

In [6]:
os.environ["AZURE_OPENAI_API_KEY"] = "ce53c5fce80c4503927244333e40634c"
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://wdc-chat-llm.openai.azure.com/"
os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"] = "gpt-4o"
os.environ["OPENAI_API_VERSION"] = "2024-06-01"

In [None]:
# Load documents from a local folder
def load_documents_from_folder(folder_path):
    #loader = DirectoryLoader(folder_path,glob="**/*.pdf",show_progress=True)
    loader = PyPDFDirectoryLoader(folder_path)
    documents = loader.load()
    return documents

In [8]:
# Embed and vectorize documents using FAISS
def embed_and_vectorize_documents(documents):
    embeddings = OllamaEmbeddings(model="llama3.2")
    vector_store = FAISS.from_documents(documents, embeddings)
    return vector_store

In [17]:
# Implement RAG
def implement_rag(vector_store, query):
    """     llm  = OllamaLLM(
    model="llama3.2",
    top_k=3
    ) """
    
    llm = AzureChatOpenAI(
    azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment = os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    api_version = os.environ["OPENAI_API_VERSION"],
    temperature= 0.3
    )
    
    retriever = vector_store.as_retriever()
    prompt = PromptTemplate(template="Answer the question based on the context: {context}\n\nQuestion: {question}\nAnswer:",input_variables=["context", "question"])
    combine_documents_chain = StuffDocumentsChain(llm_chain=llm)
    rag_chain = RetrievalQA(retriever=retriever, combine_documents_chain=combine_documents_chain)
    result = rag_chain.run(query)
    return result

In [18]:
#if __name__ == "__main__":
def run_vector_AI():
    folder_path = r"D:\OneDrive - Wipro\Desktop\Temp2\Resume\\"

    #query=PromptTemplate(format="")
    # Load documents
    documents = load_documents_from_folder(folder_path)
    if not documents:
        print("No documents found in the specified folder.")
    else:
        print(f"Loaded {len(documents)} documents.")

    # Embed and vectorize documents
    vector_store = embed_and_vectorize_documents(documents)
    print("Vector Store created.")
    return vector_store


In [None]:
# Define the query
query = "What is the summary of the documents?"
vector_store = run_vector_AI()
# Implement RAG and get the result
result = implement_rag(vector_store,query)
print(result)

Loaded 26 documents.
