## mbqr_rag_scratchpad

_WIP - NOT FOR DISTRIBUTION_

> `mbqr_rag_scratchpad.ipynb`<br>
> Simone J. Skeen (08-10-2025)

### 1. Prepare
Installs, imports, requisite packages; customizes outputs.
***

In [None]:
%%capture

!pip install langchain faiss-cpu tiktoken chromadb
#!pip install --upgrade langchain-community
!pip install --upgrade langchain langchain-core langchain-community pydantic==2.6.4
!pip install pymupdf
!pip install pypdf
!pip install streamlit

Anaconda Prompt (anaconda3) > `ollama pull nomic-embed-text`

In [None]:
import numpy as np, os, pandas as pd, streamlit as st, warnings

import langchain, pydantic
print("LangChain:", langchain.__version__)
print("Pydantic:", pydantic.__version__)

from langchain_community.document_loaders import PyPDFLoader
from langchain_community.llms import Ollama
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings import OllamaEmbeddings
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.options.mode.copy_on_write = True

pd.set_option(
    'display.max_columns',
    None,
    )

pd.set_option(
    'display.max_rows',
    None,
    )

for c in (FutureWarning, UserWarning):
    warnings.simplefilter(
        action = 'ignore',
        category = c,
        )

### 2. Write
Defines requisite custom functions: `rag_tune` module.
***

In [None]:
# set wd (local)

CODE_DIR = 'C:/Users/sskee/OneDrive/Documents/01_brown/active/mbqr_rag/code'
os.chdir(CODE_DIR)
%pwd

#### `build_retrieval_qa_chain`

In [None]:
%%writefile rag_tune.py

from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

def build_retrieval_qa_chain(
    llm,
    retriever,
    chain_type = 'stuff',
    *,
    prompt = None,
    question_prompt = None,
    refine_prompt = None,
    combine_prompt = None,
    ):
    
    '''
    Build a RetrievalQA chain with support for external prompt injection.

    Parameters:
    - llm: the LLM object (e.g., Ollama instance)
    - retriever: a LangChain retriever (e.g., FAISS)
    - chain_type: 'stuff', 'map_reduce', or 'refine'
    - prompt: for 'stuff' chains
    - question_prompt & refine_prompt: for 'refine' chains
    - question_prompt & combine_prompt: for 'map_reduce' chains
    '''

    if chain_type == 'stuff':
        prompt = prompt or PromptTemplate(
            input_variables=['context', 'question'],
            template = '''
    You are a knowledgable conversational agent that offers accurate, succinct, responses 
        based on the provided context.

    Context:
    {context}

    Question:
    {question}
    '''
        )
        
        chain_type_kwargs = {'prompt': prompt}

    elif chain_type == 'map_reduce':
        question_prompt = question_prompt or PromptTemplate(
            input_variables = ['context', 'question'],
            template = '''
    Examine the following context to respond to the query as accurately as possible.

    Context:
    {context}

    Question:
    {question}

    Answer:
    '''
        )
        
        combine_prompt = combine_prompt or PromptTemplate(
            input_variables = ['summaries', 'question'],
            template = '''
    You are a knowledgable conversational agent that synthesizes multiple responses to 
        create a single comprehensive response.

    Summaries:
    {summaries}

    Question:
    {question}

    Final Answer:
    '''
        )
        
        chain_type_kwargs = {
            'question_prompt': question_prompt,
            'combine_prompt': combine_prompt,
            }

    elif chain_type == 'refine':
        question_prompt = question_prompt or PromptTemplate(
            input_variables = ['context', 'question'],
            template = '''
    You are a knowledgable conversational agent that offers accurate, succinct, responses 
        based on the provided context.

    Context:
    {context}

    Question:
    {question}

    Answer:
    '''
        )
        
        refine_prompt = refine_prompt or PromptTemplate(
            input_variables = ['context', 'question', 'existing_answer'],
            template = '''
        You are improving an existing response using new context.

        Existing Answer:
        {existing_answer}

        New Context:
        {context}

        Question:
        {question}

        Refined Answer:
        '''
        )
        chain_type_kwargs = {
            'question_prompt': question_prompt,
            'refine_prompt': refine_prompt
        }

    else:
        raise ValueError(f"Unsupported chain_type: {chain_type}")

    return RetrievalQA.from_chain_type(
        llm = llm,
        retriever = retriever,
        chain_type = chain_type,
        return_source_documents = True,
        chain_type_kwargs = chain_type_kwargs
        )

#### `query_and_stream`

In [None]:
%%writefile -a rag_tune.py

from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

def query_and_stream(
    llm,
    retriever,
    query,
    prompt_template = None,
    show_sources = True,
    ):
    
    '''
    Wrapper that replicates RetrievalQA (chain_type = 'stuff') behavior with token streaming.

    Parameters:
    - llm: Ollama LLM (streamable, e.g. Deepseek-R1)
    - retriever: pre-specfied (external) vector store retriever
    - query: user question
    - prompt_template: optional PromptTemplate (context + question)
    - show_sources: determines whether to print the source documents after the answer
    '''

    # define PromptTemplate if none is passed
    
    if prompt_template is None:
        prompt_template = PromptTemplate(
            input_variables = ['context', 'question'],
            template = '''
    You are a knowledgable conversational agent that offers accurate, succinct, responses 
        based on the provided context.

        Context:
        {context}

        Question:
        {question}

        Answer:'''
            
        )

    # manually replicate RetrievalQA behavior (chain_type = 'stuff')

    #docs = retriever.get_relevant_documents(query) ### deprecated
    docs = retriever.invoke(query)

    if not docs:
        print("No relevant documents found.")
        return

    # concatenate ('stuff') context
    
    context = "\n\n".join(doc.page_content for doc in docs)

    # format prompt
    
    prompt = prompt_template.format(
        context = context, 
        question = query,
    )

    # stream response token-by-token
    
    print("\n🔮\n")
    for token in llm.stream(prompt):
        print(
            token, 
            end = "", 
            flush = True,
            )

    print("\n\n🍂")

    # print source metadata (optional)
    
    if show_sources:
        print("\nknowledge excerpts:\n")
        for i, doc in enumerate(docs):
            
        ### SJS 8/9: verbose w/ page_content...
            
            meta = doc.metadata
            print(f"--- excerpt {i+1} ---")
            print(f"metadata: {meta}")
            print(doc.page_content[:1000], "...\n")
            
        ### SJS 8/9: cleaner - metadata _only_
        
            #yield f"\n[{i+1}] {doc.metadata.get('source', 'Unknown')}, Page: {doc.metadata.get('page', 'N/A')}"       

#### _Import_

In [None]:
#%pwd
from rag_tune import(
    build_retrieval_qa_chain,
    query_and_stream,
    )

### 3. Preprocess + Initialize
Loads, splits, chunks knowledge base documents; initializes LLM, retriever; vectorizes.
***

#### _Load, inspect_

In [None]:
from langchain_community.document_loaders import PyMuPDFLoader

# (re)set wd

KNOW_DIR = 'C:/Users/sskee/OneDrive/Documents/01_brown/active/mbqr_rag/knowledge'

# load pdf knowledge base

pdf_paths = [
    os.path.join(KNOW_DIR, "mbqr_manual_rag_db.pdf"),
    os.path.join(KNOW_DIR, "mbqr_scripts_rag_db.pdf"),
    os.path.join(KNOW_DIR, "poems_of protest_resistance_empowerment_rag_db_prelim.pdf"),
    ]

all_documents = []

# group pdf by file

for path in pdf_paths:
#    loader = PyPDFLoader(path)
    loader = PyMuPDFLoader(path)
    docs = loader.load()
    all_documents.append(docs) ### append "list of lists"

    print(f"{os.path.basename(path)}: {len(docs)} p/p. loaded")
    
# spot check: leading 500 characters, each file

for i, doc_pages in enumerate(all_documents):
    print(f"\n--------------- knowledge source {i+1} ---------------")
    print(f"\n")
    print(doc_pages[0].page_content[:1000])

#### _Vectorize_

In [None]:
# flatten into single list

flat_documents = [page for doc in all_documents for page in doc]

# chunk into "chunked" excerpts

splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000, 
    chunk_overlap = 200, 
    separators = ['\n\n', '\n', ' ', ''],
    )

chunked_documents = splitter.split_documents(flat_documents)
print(f"chunks chunked: {len(chunked_documents)}")

# embed + create FAISS vector db

embedding = OllamaEmbeddings(model = "nomic-embed-text")
db = FAISS.from_documents(chunked_documents, embedding)

# save

db.save_local("faiss_index")

In [None]:
# load locally saved vector db

#db = FAISS.load_local(
#    "faiss_index", 
#    embeddings = embedding,
#    allow_dangerous_deserialization = True,
#    )

#### _Input_ `system_prompt` 💭

In [None]:
system_prompt = '''
    You are an agent with access to very high quality evidence-based mindfulness skills instruction in your provided context. 
    You will be prompted with everyday stressors and problems. Your task is to:
    
        1.) search your provided context,
        2.) summarize in-context knowledge on stress and resilience,
        3.) recommend specific skills and practices that might benefit the user _given_ their reported stressors.
        
    - ALWAYS consult your context first when responding. 
    - NEVER return recommendations from sources other than your context. 
    - You are warm, empowering, and prioritize empathy in your tone and response contents. 
    - You maintain a sixth-grade reading level in your responses. 
    - Do not assume the user is LGBTQ+
    - You are concise: you limit responses to 200 words.
    - If prompted for an inspiring quote, curate from the poetry in your context.
    - Refer to your context as your "mindfulness knowledge." Do NOT refer to your "context."
    - At the close of each response, encourage the user to practice the recommended skill.
    '''

#### _Input_ `query` 💬

In [None]:
query = '''
    I sometimes struggle with negative feelings toward my body
    '''

In [None]:
#     I have too much to deal with today! I feel so overwhelmed I can't even start
#     I sometimes struggle with negative feelings toward my body
#     Please tell me an inspiring quote. The world feels like too much lately

#### _Configure LLM, PromptTemplate_

Anaconda Prompt (anaconda3) > `ollama pull deepseek-r1:14b`

##### Default (`chain_type = 'stuff'`)

In [None]:
# config llm via ollama

llm = Ollama(
    model = 'deepseek-r1:14b', ### model tag for app: 'deepseek-v2' (16b); for dx / reasoning: 'deepseek-r1:14b' (14b)
    base_url = 'http://localhost:11434',
    temperature = 0.6, ### args / params: https://api.python.langchain.com/en/latest/llms/langchain_community.llms.ollama.Ollama.html
    mirostat_eta = 0.1,
    mirostat_tau = 5.0,
    top_p = 0.9,
    top_k = 40,
    num_ctx = 2048,
    num_gpu = 1,
    num_predict = 768,
    repeat_last_n = 64,
    stop = None,
    )

# config prompttemplate (default)

prompt = PromptTemplate(
    input_variables = ['context', 'question'],
    template = '''
        {system_prompt}

        Context:
        {context}

        Question:
        {question}
        '''.strip(),
    ).partial(system_prompt = system_prompt)

# set up retriever

retriever = db.as_retriever(
    search_type = 'similarity', ### cosine similarity / vector distance
    search_kwargs = {'k': 4}, ### top k = 4 chunks based on doc similarity w/in FAISS vector store
    )

        ### SJS 8/7: alternate search_type options below...custom fx tktk...

#retriever = db.as_retriever(
#    search_type = 'mmr',
#    search_kwargs = {
#        'k': 10, 
#        'fetch_k': 20, 
#        'lambda_mult': 0.5,
#        }
#    )

#retriever = db.as_retriever(
#    search_type = 'similarity_score_threshold',
#    search_kwargs = {'score_threshold': 0.8},
#    )

In [None]:
# non-streaming qa_chain config

#qa_chain = RetrievalQA.from_chain_type(
#    llm = llm,
#    retriever = retriever,
#    chain_type = 'stuff', 
#    return_source_documents = True,
#    chain_type_kwargs = {'prompt': prompt},
#    )

# query

#query = query
#result = qa_chain({"query": query})

#print("Answer:", result['result'])

#for doc in result['source_documents']:
#    print("\nSource:", doc.metadata)
#    print(doc.page_content[:500])  # Truncated content

### 4. Query
Prompt QA chain, inspect / rate response.
***

In [None]:
query_and_stream(
    llm, 
    retriever, 
    query,
    prompt_template = prompt,
    show_sources = True,
    )

> End of mbqr_rag_scratchpad.ipynb