## [Jump to Gradio UI ↓](#Gradio-UI)

In [1]:
import gradio as gr
from IPython.display import display, HTML
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings, OllamaEmbeddings

import ollama
import os
import logging
import re
import shutil
import sys

logging.info('Imports complete.')

In [2]:
# Reset root logger: Important if running notebook cells multiple times.
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

# Set up Logger.
logging.basicConfig(
    # level = logging.DEBUG,
    level = logging.INFO,
    format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers = [
        logging.FileHandler('./app.log', mode = 'w'), # Log to file + Overwrite log file each session.
        logging.StreamHandler(sys.stdout) # Log to console/notebook
    ]
)

logger = logging.getLogger('deepseek_pdf_rag')

# Set up key variables.
embedding_model_name = 'BAAI/bge-small-en-v1.5'
logger.info(f'Embedding Model Name: {embedding_model_name}')

logger.info('Setup complete.')

2025-03-16 14:31:49,865 - deepseek_pdf_rag - INFO - Embedding Model Name: BAAI/bge-small-en-v1.5
2025-03-16 14:31:49,865 - deepseek_pdf_rag - INFO - Setup complete.


In [3]:
def process_pdf(pdf_file_path, generation_model_name, embedding_model_name, chroma_persist_directory_path):

    if pdf_file_path is None:
        
        logger.warning('No PDF file provided: Upload PDF file.')
        return None, None, None

    try:

        # Clear vector store, if it exists.
        if os.path.exists(chroma_persist_directory_path):
            logger.info(f'Clearing existing vector store at {chroma_persist_directory_path}')
            shutil.rmtree(chroma_persist_directory_path)
            logger.info(f'Successfully cleared existing vector store.')

        logger.info(f'Attempting to load PDF from: {pdf_file_path}')
        loader = PyMuPDFLoader(pdf_file_path)
        data = loader.load()
        logger.info(f'Successfully loaded PDF with {len(data)} pages.')
    
    	# For RecursiveCharacterTextSplitter(), chunks are characters; not tokens.
        # For TokenTextSplitter(), chunks are tokens.
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
        chunks = text_splitter.split_documents(data)
        logger.info(f'Split PDF into {len(chunks)} chunks.')

        try:

            logger.info(f'Initializing embeddings with model: {embedding_model_name}')
            # embeddings = OllamaEmbeddings(model = generation_model_name)
            embeddings = HuggingFaceEmbeddings(model_name = embedding_model_name)
            logger.info('Embeddings initialized successfully.')

            logger.info(f'Creating vector store in {chroma_persist_directory_path}')
            vectorstore = Chroma.from_documents(
                documents = chunks,
                embedding = embeddings,
                persist_directory = chroma_persist_directory_path)
            logger.info(f'Vector store created successfully with {embedding_model_name}.')

            # Chroma(): LangChain class imported from `langchain_community.vectorstores`.
            # as_retriever(): LangChain method that returns vector store retriever initialized for specific vector store.
            retriever = vectorstore.as_retriever()
            logger.info(f'Retriever created successfully with {embedding_model_name}.')
        
            return text_splitter, vectorstore, retriever

        except Exception as e:

            logger.error(f'Error with embeddings, vector store or retriever: {e}', exc_info=True) # exc_info=True includes traceback)
            # Fall back to Ollama embeddings
            logger.info(f"Falling back to OllamaEmbeddings with {generation_model_name}.")

            # Clear vector store, if it exists.
            if os.path.exists(chroma_persist_directory_path):
                logger.info(f'Clearing existing vector store at {chroma_persist_directory_path}')
                shutil.rmtree(chroma_persist_directory_path)
    
            embeddings = OllamaEmbeddings(model = generation_model_name)
            
            vectorstore = Chroma.from_documents(
                documents=chunks, embedding=embeddings, persist_directory=chroma_persist_directory_path
            )
            logger.info(f'Vector store created successfully with {generation_model_name}.')
    
            retriever = vectorstore.as_retriever()
            logger.info(f'Retriever created successfully with {generation_model_name}.')
    
            return text_splitter, vectorstore, retriever

    except Exception as e:

        logger.error(f"Error processing PDF: {e}", exc_info=True)  # exc_info=True includes traceback)
        return None, None, None

In [4]:
def combine_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [5]:
def ollama_llm(question, context, generation_model_name, include_thinking = True):
    formatted_prompt = f"Question: {question}\n\nContext: {context}"

    response = ollama.chat(
        model=generation_model_name,
        messages=[{"role": "user", "content": formatted_prompt}],
    )

    response_content = response["message"]["content"]

    if include_thinking:
        
        final_answer = response_content

    else:

        # Remove content between <think> and </think> tags.
        # .: Matches any character except newline.
        # *: Matches 0 or more of previous character.
        # ?: Non-Greedy / Lazy. Matches minimum number of characters needed to find the next </think>.
        # flags=re.DOTALL: Makes dot (.) character match any character, including newline.
        final_answer = re.sub(r"<think>.*?</think>", "", response_content, flags=re.DOTALL).strip()

    return final_answer

In [6]:
def rag_chain(question, text_splitter, vectorstore, retriever, generation_model_name):
    
    retrieved_docs = retriever.invoke(question)
    
    formatted_content = combine_docs(retrieved_docs)
    
    return ollama_llm(question, formatted_content, generation_model_name, include_thinking = False)

In [7]:
def ask_question(pdf_file_path, question, generation_model_name, embedding_model_name, chroma_persist_directory_path):

    if pdf_file_path is None or pdf_file_path == "":
        logger.info('No PDF uploaded. Please upload a PDF file.')
        return None

    text_splitter, vectorstore, retriever = process_pdf(pdf_file_path, generation_model_name, embedding_model_name, chroma_persist_directory_path)

    if text_splitter is None:
        logger.error('Error processing PDF. Check console for details.')
        return None

    result = rag_chain(question, text_splitter, vectorstore, retriever, generation_model_name)
    return result

In [8]:
# # generation_model_name = 'deepseek-r1'
# # generation_model_name = 'deepseek-r1:8b'
# generation_model_name = 'deepseek-r1:1.5b'

# response = ollama.chat(
	
# 	model = generation_model_name,
    
# 	messages=[
# 				{"role": "user", "content": "List the 9 planets."},
#     			],

# )

In [9]:
# generation_model_name = 'deepseek-r1'
# generation_model_name = 'deepseek-r1:8b'
generation_model_name_default = 'deepseek-r1:1.5b'
chroma_persist_directory_default = f"./chroma_db_{embedding_model_name.replace('/', '_')}"

# # Test the processing directly.
# test_pdf_path = "./Hadoop Migration Custoner Pitch Enablement - For Partner AEs and SAs.pdf"
# try:
#     text_splitter, vectorstore, retriever = process_pdf(test_pdf_path, model_name_default, embedding_model_name, chroma_persist_directory_default)
#     print("Success!")
# except Exception as e:
#     print(f"Error: {e}")
#     traceback.print_exc()

interface = gr.Interface(
    
    fn=ask_question,

    inputs=[
        
        gr.File(label="Upload PDF (Optional)", type="filepath"),

        gr.Textbox(label="Ask a question"),

        gr.Dropdown(choices=["deepseek-r1", "deepseek-r1:8b", "deepseek-r1:1.5b"],
                    value=generation_model_name_default,
                    label="Generation Model"),

        gr.Textbox(value=embedding_model_name,
                   label="Embedding Model"),
        
        gr.Textbox(value=chroma_persist_directory_default,
                   label="Chroma DB Directory")
    ],

    outputs="text",

    title="Ask questions about your PDF",

    description="Use DeepSeek-R1 for answers and BGE for semantic search on your PDF document.",
)


## Gradio UI

In [10]:
interface.launch()

2025-03-16 14:31:50,175 - httpx - INFO - HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
Running on local URL:  http://127.0.0.1:7860
2025-03-16 14:31:50,259 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
2025-03-16 14:31:50,273 - httpx - INFO - HTTP Request: GET http://127.0.0.1:7860/startup-events "HTTP/1.1 200 OK"
2025-03-16 14:31:50,495 - botocore.credentials - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2025-03-16 14:31:50,861 - httpx - INFO - HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"

To create a public link, set `share=True` in `launch()`.


