# Ollama PDF RAG Notebook

## Import Libraries


In [None]:
# Imports
from langchain_community.document_loaders import UnstructuredPDFLoader
# from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Jupyter-specific imports
from IPython.display import display, Markdown

# Set environment variable for protobuf
import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

In [None]:
!pip install unstructured_inference

## Load PDF

In [None]:
# Load PDF
local_path = "scammer-agent.pdf"
if local_path:
    loader = UnstructuredPDFLoader(file_path=local_path)
    data = loader.load()
    print(f"PDF loaded successfully: {local_path}")
else:
    print("Upload a PDF file")

## Split text into chunks

In [None]:
# Split text into chunks
# data = "你好，我是一个学生"
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_text(data)
print(f"Text split into {len(chunks)} chunks")

## Create vector database

In [None]:
# Create vector database
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from langchain_huggingface import HuggingFaceEmbeddings
embeddings_path = 'shared-nvme/embedding_models/text2vec-base-chinese'
# embeddings_path = "/home/ubuntu/embedding_models/bge-large-zh-v1.5"
embeddings = HuggingFaceEmbeddings(
    model_name=embeddings_path,
    model_kwargs={
        'device': device,
        'local_files_only': True  # 指定使用本地模型
    },
    encode_kwargs={
        'normalize_embeddings': True,
        'batch_size': 32
    }
)
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    collection_name="local-rag"
)
print("Vector database created successfully")

## Set up LLM and Retrieval

In [None]:
# Set up LLM and retrieval
# local_model = "llama3.2"  # or whichever model you prefer
# llm = ChatOllama(model=local_model)
from langchain_huggingface import HuggingFaceEmbeddings
import torch
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

llm_path = 'shared-nvme/llm_models/Qwen2.5-0.5B-Instruct'
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = llm_path,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model = FastLanguageModel.for_inference(model)
print("model ok")

In [None]:
# Query prompt template
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 2
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

# Set up retriever
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

## Create chain

In [None]:
# RAG prompt template
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [None]:
# Create chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

## Chat with PDF

In [None]:
def chat_with_pdf(question):
    """
    Chat with the PDF using the RAG chain.
    """
    return display(Markdown(chain.invoke(question)))

In [None]:
# Example 1
chat_with_pdf("What is the main idea of this document?")

In [None]:
# Example 2
chat_with_pdf("What is the purpose of the scammer agent?")

In [None]:
# Example 3
chat_with_pdf("Can you explain the case study highlighted in the document?")

## Clean up (optional)

In [None]:
# Optional: Clean up when done 
vector_db.delete_collection()
print("Vector database deleted successfully")