# Welcome to Document Chatter AI App

In [1]:
import os
from dotenv import load_dotenv

from typing import List, Dict, Any, Tuple, Optional

from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
import gradio as gr

# Environment Setup
This cell initializes the required libraries and sets up the environment variables, such as the OpenAI API key. Ensure you have the necessary packages installed before running this notebook.


In [2]:
# Retrieve OpenAI API key from .env file
load_dotenv()

# Initialize OpenAI API Key
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')


# Load Documents
This function handles loading and processing documents. It supports `.pdf` and `.txt` file formats and extracts their content for further processing. Unsupported file formats return an error.


In [3]:
def load_document(file_path: str) -> Tuple[Optional[List], Optional[str]]:
    """
    Load and process a document from the given file path.

    Parameters:
        file_path (str): The path to the document file.

    Returns:
        tuple: A list of documents and an error message (if any).
    """
    if file_path.endswith('.pdf'):
        loader = PyPDFLoader(file_path)
    elif file_path.endswith('.txt'):
        loader = TextLoader(file_path)
    else:
        return None, "Unsupported file format"

    documents = loader.load()
    return documents, None

# Create a Vectorstore
This function creates a FAISS vectorstore from the provided documents using OpenAI embeddings. The vectorstore is used for efficient retrieval during the question-answering process.


In [4]:
def create_vectorstore(documents: List) -> FAISS:
    """
    Create a vectorstore from the provided documents using OpenAI embeddings.

    Parameters:
        documents (list): The list of documents to embed and store.

    Returns:
        FAISS: The vectorstore containing the embedded documents.
    """
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(documents, embeddings)
    return vectorstore


# Upload and Process Documents
This function handles file uploads, processes the document to extract its content, and initializes the QA chain for interaction with the document.


In [5]:
# Initialize global variables
vectorstore: Optional[FAISS] = None
qa_chain: Optional[ConversationalRetrievalChain] = None

def upload_and_process(file: gr.File) -> str:
    """
    Handle file upload, process the document, and initialize the QA chain.

    Parameters:
        file (gr.File): The uploaded file to process.

    Returns:
        str: Status message indicating success or failure.
    """
    global vectorstore, qa_chain

    documents, error = load_document(file.name)
    if error:
        return "Error: " + error

    vectorstore = create_vectorstore(documents)
    llm = OpenAI(temperature=0.5)
    qa_chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever())

    return "Document processed and ready for chat!"


# Chat with Document
This function enables users to query the uploaded document interactively. It maintains chat history to provide contextual responses.


In [6]:
def chat_with_document(query: str, history: List[Dict[str, str]]) -> List[Dict[str, str]]:
    """
    Handle user queries by interacting with the uploaded document.

    Parameters:
        query (str): The user's query.
        history (list): The chat history to maintain context.

    Returns:
        list: Updated chat history including the assistant's response.
    """
    if not vectorstore or not qa_chain:
        history.append({"role": "assistant", "content": "Please upload and process a document first."})
        return history

    # Convert history into the format expected by ConversationalRetrievalChain
    formatted_history = [(entry["role"], entry["content"]) for entry in history]

    inputs = {"question": query, "chat_history": formatted_history}
    result = qa_chain.invoke(inputs)

    # Append the query and the response to the history
    history.append({"role": "user", "content": query})
    history.append({"role": "assistant", "content": result['answer']})

    return history


# Gradio UI Setup
This cell sets up the Gradio interface for the application. It includes:
- File upload for document processing.
- A text input field for user queries.
- A chatbot interface to display chat history and responses.


In [7]:
with gr.Blocks() as demo:
    gr.Markdown("# DocuChat: Interact with Your Documents")

    with gr.Row():
        file_input = gr.File(label="Upload Document")
        upload_button = gr.Button("Process Document")

    status_output = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        query_input = gr.Textbox(label="Your Question")
        submit_button = gr.Button("Ask")

    chat_history = gr.Chatbot(label="Chat History", type="messages")

    # File upload and processing
    upload_button.click(upload_and_process, inputs=file_input, outputs=status_output)

    # Chat functionality
    submit_button.click(chat_with_document, inputs=[query_input, chat_history], outputs=[chat_history])

# Launch the Application
This cell launches the Gradio application, allowing users to upload documents and interact with them through a chat interface.


In [8]:
# Run the app
demo.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


