In [1]:
from langchain_openai import AzureOpenAIEmbeddings
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

api_key = os.getenv("AZURE_OPENAI_API_KEY")
api_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")

embedding_model = AzureOpenAIEmbeddings(
    api_key=api_key,
    azure_endpoint=api_endpoint,
    azure_deployment="text-embedding-ada-002", 
    api_version="2024-05-01-preview"
)



In [2]:
from langchain_community.document_loaders import PyPDFLoader
import getpass
import os
from dotenv import load_dotenv
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import AzureOpenAIEmbeddings, OpenAI, AzureChatOpenAI
from langchain_community.vectorstores import FAISS, Chroma
from langchain_community.llms import AzureOpenAI
from langchain.chains import RetrievalQA
import tiktoken
from langchain_community.document_loaders import AzureAIDocumentIntelligenceLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
import numpy as np
from openai import AzureOpenAI

In [5]:
from langchain_openai import AzureChatOpenAI
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

api_key = os.getenv("AZURE_OPENAI_API_KEY")
api_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")

llm = AzureChatOpenAI(
    model_name="gpt-35-turbo-0613",
    azure_endpoint=api_endpoint,
    openai_api_key=api_key,
    api_version="2024-05-01-preview",
    request_timeout=60
)



In [6]:
path = r"static\uploads\CertificateOfCompletion_5.pdf"

loader = PyPDFLoader(path)
documents = loader.load()

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
re_char_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=100,
    length_function=len,
    is_separator_regex=False,
)

chunks = re_char_splitter.split_documents(documents)  

In [8]:

doc_search = FAISS.from_documents(chunks, embedding_model)
chain = RetrievalQA.from_chain_type(llm, chain_type='stuff', retriever=doc_search.as_retriever())

In [9]:
question = "What is the docuemtn about?"

result = chain.run(question)
print(result)

  result = chain.run(question)


The document is a certificate of completion for Mohadeseh Yousofy who successfully completed the course "Information Security Awareness Refresher" on June 5th, 2024.


In [10]:
prompt= "Hoes it going?"

response = llm.invoke([{"role": "user", "content": prompt}])
response.content.strip()



'As an AI language model, I do not have emotions or feelings. But I am functioning well and ready to assist you with any task or question you may have. How can I assist you today?'

In [11]:
def handle_user_input(user_input):
    # Determine intent (simple keyword matching here; can replace with ML-based intent classifier)
    if "document" in user_input.lower() or "pdf" in user_input.lower():
        print("Processing document-related query...")
        try:
            result = chain.run(user_input)
            return f"Document Answer: {result}"
        except Exception as e:
            return f"Error processing document query: {e}"
    else:
        print("Processing general conversational query...")
        try:
            response = llm.invoke([{"role": "user", "content": user_input}])
            return response.content.strip()
        except Exception as e:
            return f"Error processing conversational query: {e}"


user_input_1 = "What is the document about?"
response_1 = handle_user_input(user_input_1)
print(response_1)

user_input_2 = "How's it going?"
response_2 = handle_user_input(user_input_2)
print(response_2)

Processing document-related query...
Document Answer: The document is a certificate of completion for a course on Information Security Awareness Refresher taken by Mohadeseh Yousofy on June 5, 2024.
Processing general conversational query...
As an AI language model, I don't have emotions. But I'm functioning properly and ready to assist you in any way I can. How can I help you today?


In [12]:
retriever = doc_search.as_retriever()

template = """You are an assistant that can answer questions based on a document or provide general assistance.
When the context is provided, answer using the context; otherwise, respond directly to the user's query.

Context: {context}
Question: {question}
Answer:"""

prompt = ChatPromptTemplate.from_template(template)


def rag_pipeline(user_input):
    retrieved_docs = retriever.get_relevant_documents(user_input)
    if retrieved_docs:

        context = "\n".join(doc.page_content for doc in retrieved_docs)
    else:
        context = "No relevant context found."  
    
    full_prompt = prompt.format(context=context, question=user_input)
    

    response = llm.invoke([{"role": "user", "content": full_prompt}])
    
    return response.content.strip()


In [13]:
user_input_1 = "What is the document about?"
response_1 = rag_pipeline(user_input_1)
print(response_1)

user_input_2 = "How's it going?"
response_2 = rag_pipeline(user_input_2)
print(response_2)

  retrieved_docs = retriever.get_relevant_documents(user_input)


The document is a Certificate of Completion for Mohadeseh Yousofy who successfully completed the course on Information Security Awareness Refresher on 5 June 2024.
As an AI language model, I do not have feelings or emotions. However, I am ready and available to assist you with any questions or tasks you may have. Is there anything specific you need help with?


In [14]:
retriever = doc_search.as_retriever()
def rag_pipeline(user_input, retriever=None):
    """
    Unified pipeline for both RetrievalQA and direct LLM interaction.
    
    Args:
        user_input (str): The query or input from the user.
        retriever (Retriever, optional): The retriever object for document search. Defaults to None.
    
    Returns:
        str: Response generated by the LLM.
    """
    context = ""
    
    # retrieval only if a retriever is provided
    if retriever:
        retrieved_docs = retriever.get_relevant_documents(user_input)
        
        if retrieved_docs:
            # Combining retrieved document chunks into a single context
            context = "\n".join(doc.page_content for doc in retrieved_docs)
    
    if not context:
        context = "No relevant context found."  

    prompt_text = f"""
    You are an assistant that can answer questions based on a document or provide general assistance.
    When the context is provided, answer using the context; otherwise, respond directly to the user's query.

    Context: {context}
    Question: {user_input}
    Answer:
    """

    response = llm.invoke([{"role": "user", "content": prompt_text}])
    
    return response.content.strip()



user_input_1 = "What is the document about?"
response_1 = rag_pipeline(user_input_1, retriever=retriever) 
print(response_1)

user_input_2 = "How's it going?"
response_2 = rag_pipeline(user_input_2)  
print(response_2)


The document is a Certificate of Completion for the course "Information Security Awareness Refresher" completed by Mohadeseh Yousofy on 5 June 2024.
As an AI language model, I do not have emotions, but I am here to assist you with any questions or concerns you may have. How can I assist you today?
