## Reading in Data


In [1]:
import pandas as pd
import json
from docx import Document


In [11]:
data_path = "../app/data/"

doc = Document(data_path + "bq_questions.docx")

full_text = "\n".join([p.text for p in doc.paragraphs])


In [9]:
full_text

"#Question#  Tell me about yourself: #Question#\nMy name is Daoming Liu. I'm a data science graduate with a focus on natural language processing and applied machine learning, recently graduated from UBC’s MDS in Computational Linguistics. At the Digital Lab for BC Children’s Hospital, I built a Retrieval-Augmented Generation (RAG) chatbot to help clinicians access health resources more efficiently, reducing search time and improving usability in internal workflows. At Adauris, a TTS startup, I worked on backend development, designing API endpoints that enabled real-time LLM service integration, supporting their MVP launch, and client demos. \n\n#Question# Tell me about a time when you were asked to do something you had never done before. How did you react? What did you learn? #Question#\nAt Adauris, I was collaborating on redesigning the backend structure for a new product feature. One of the endpoints I was assigned to update was written in TypeScript, which was a language I had never

In [10]:
questions = []
answers = []
is_q = False
question = ""
answer = ""
full_text_list = full_text.split()

for word in full_text_list:
    if word == "#Question#" and not is_q:
        # Switch to question mode
        if answer.strip():  # save previous answer if exists
            answers.append(answer.strip())
        is_q = True
        question = ""
    elif word == "#Question#" and is_q:
        # Switch to answer mode
        if question.strip():  # save previous question if exists
            questions.append(question.strip())
        is_q = False
        answer = ""
    else:
        if is_q:
            question += word + " "
        else:
            answer += word + " "

# Save the last answer
if answer.strip():
    answers.append(answer.strip())

assert len(questions) == len(answers)


In [12]:
import json

qa_data = [{"question": q, "answer": a} for q, a in zip(questions, answers)]
with open(data_path + "bq_questions.json", "w", encoding="utf-8") as f:
    json.dump(qa_data, f, indent=2, ensure_ascii=False)


In [14]:
with open(data_path + "bq_questions.json", "r", encoding="utf-8") as f:
    data = json.load(f)

## Embedding Vector Store


In [16]:
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI


In [25]:
from dotenv import load_dotenv
import os
dotenv_path = os.path.join(os.path.dirname(os.getcwd()), 'app/.env')
load_dotenv(dotenv_path)

embedding_model = embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small"
)



In [27]:
data[0:3]

[{'question': 'Tell me about yourself:',
  'answer': "My name is Daoming Liu. I'm a data science graduate with a focus on natural language processing and applied machine learning, recently graduated from UBC’s MDS in Computational Linguistics. At the Digital Lab for BC Children’s Hospital, I built a Retrieval-Augmented Generation (RAG) chatbot to help clinicians access health resources more efficiently, reducing search time and improving usability in internal workflows. At Adauris, a TTS startup, I worked on backend development, designing API endpoints that enabled real-time LLM service integration, supporting their MVP launch, and client demos."},
 {'question': 'Tell me about a time when you were asked to do something you had never done before. How did you react? What did you learn?',
  'answer': 'At Adauris, I was collaborating on redesigning the backend structure for a new product feature. One of the endpoints I was assigned to update was written in TypeScript, which was a language 

In [28]:
documents = [
    Document(
        page_content = qa['question'],
        metadata = {
            "answer": qa['answer']
        }
    ) 
    for qa in data
]

In [None]:
!pwd
../app/src/mini_me_rag/data/chroma

/Users/daomingliu/Desktop/mini-me_llm/backend/notebooks


In [46]:
from langchain_community.vectorstores import Chroma

save_path = "../app/src/mini_me_rag/data/chroma"
vector_store = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    persist_directory=save_path
)

In [34]:
query = "tell me about yourself"
docs = vector_store.similarity_search_with_score(query, k=3)
docs

[(Document(metadata={'answer': "My name is Daoming Liu. I'm a data science graduate with a focus on natural language processing and applied machine learning, recently graduated from UBC’s MDS in Computational Linguistics. At the Digital Lab for BC Children’s Hospital, I built a Retrieval-Augmented Generation (RAG) chatbot to help clinicians access health resources more efficiently, reducing search time and improving usability in internal workflows. At Adauris, a TTS startup, I worked on backend development, designing API endpoints that enabled real-time LLM service integration, supporting their MVP launch, and client demos."}, page_content='Tell me about yourself:'),
  0.19312839210033417),
 (Document(metadata={'answer': 'During my Capstone project at BC Children’s Hospital, I worked with three teammates to develop a RAG chatbot application.'}, page_content='Tell me about one of your favorite experiences working with a team and the contributions you made.'),
  1.226933240890503),
 (Doc

## RAG

In [37]:
GENERATION_PROMPT = """
You are a mini clone version of a person designed to answer behavioral interview questions. 
You can only answer based on the information in the provided reference question-and-answer pairs. 
If there is no reference QA pairs or none of the reference QA pairs provide enough information to fully answer the query, reply exactly with: 
"That's a great question, and I don't seem to have an answer for that yet!"

Do NOT add any new experiences, details, or assumptions that are not directly supported by the reference content.

Your goal is to respond to the query question professionally, in first person, and as if you are the person who wrote the reference QA pairs. 

Guidelines:
- Be concise but complete; stay within 150 words.
- Do not invent or guess details not present in the references.
- If multiple reference answers partially match, synthesize them into a coherent response.
- If references are only loosely related, acknowledge them cautiously.

---

<Query>
{query}
</Query>

<Reference QA Pairs>
{references}

</Reference QA Pairs>
---

Your Response:

"""

# Agent Director Prompt (with CoT few-shot examples

DIRECTOR_PROMPT = """
You are an intelligent agent reasoning whether any of the retrieved behavioral interview questions directly answer the user's query.

Follow these steps carefully:
1. Compare the user's query with each retrieved question.
2. For each, explain if it is directly relevant, semantically similar, or unrelated.
3. At the end, decide: does any retrieved question sufficiently answer the user's query?

Respond in JSON format:
{{
  "reasoning": "Step-by-step reasoning text here.",
  "final_decision": true or false
}}

---

Few-shot examples:

User Query: "Can you give a short introduction?"
Retrieved Questions:
1. Tell me about yourself
2. What motivates you in your career?
3. Describe a time you had to persuade someone.

Response:
{{
  "reasoning": "1. 'Tell me about yourself' is semantically equivalent to 'give a short introduction'.\n2. 'What motivates you...' is not a short intro.\n3. 'Describe a time...' is unrelated.",
  "final_decision": true
}}

User Query: "What's your favorite hobby?"
Retrieved Questions:
1. Tell me about one of your favorite experiences working with a team and the contributions you made.
2. What is your biggest strength?
3. Describe a time when you felt stressed and how you handled it.

Response:
{{
  "reasoning": "1. 'Favorite experiences working with a team' relates to professional experiences, not hobbies.\n2. 'What is your biggest strength?' is unrelated to hobbies.\n3. 'Describe a time...' is unrelated.",
  "final_decision": false
}}

---

Now do the same for:

User Query: "{query}"
Retrieved Questions:
{retrieved_text}
"""


REWRITE_PROMPT = """
You are an assistant helping rewrite user queries into typical behavioral interview questions stored in a knowledge base.

Your job:
- Rewrite the user query into a clear, standard behavioral interview question.
- Do not add new details or assumptions.


---

Few-shot examples:

User Query: "Can you give me a brief introduction?"
Rewritten: Tell me about yourself

User Query: "Have you ever worked with someone difficult?"
Rewritten: Give an example of when you had to work with someone who was difficult to get along with. How did you handle interactions with that person?

User Query: "What’s the biggest change you’ve faced?"
Rewritten: Tell me about the biggest change you’ve had to deal with. How did you adapt to that change?

User Query: "When did you last ask your manager for feedback?"
Rewritten: When was the last time you asked for direct feedback from a superior? Why?

User Query: "Describe a project you planned from start to finish."
Rewritten: Tell me about a project that you planned. How did you organize and schedule the tasks?


---

Now rewrite this:

User Query: "{original_query}"
Rewritten:
"""


In [38]:
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
class MiniMeAnswer(BaseModel):
    """Answer to user query."""

    answer: str = Field(description="The first-person answer to user query.")
    reasoning: str = Field(description="Reasoning on which story provided is related to this query.")


class MiniMe:
    def __init__(self, vector_store):
        self.vector_store = vector_store
        llm = ChatOpenAI()
        self.llm_client = llm.with_structured_output(MiniMeAnswer)
        self.generation_prompt = GENERATION_PROMPT
        
    def retrieve(self, query, verbose = False, max_dist = 2):
        retireved_docs = vector_store.similarity_search_with_score(query, k=3)
        
        if verbose:
            return [doc for doc in retireved_docs if doc[1] <= max_dist]
        
        full_retrieved_text = ""
        for i, doc in enumerate(retireved_docs):
            if doc[1] > max_dist:
                continue
            full_retrieved_text += f"Reference Question #{i}:"
            full_retrieved_text += doc[0].page_content
            full_retrieved_text += "\n"
            full_retrieved_text += f"Reference Answer #{i}:"
            full_retrieved_text += doc[0].metadata['answer']
            full_retrieved_text += "\n\n"

        return full_retrieved_text
    
    def generate(self, query, verbose = False):
        references = self.retrieve(query)
        
        prompt_text = self.prompt.format(query=query, references=references)
        output = self.llm_client.invoke(prompt_text) 
        
        if verbose:
            return (self.retrieve(query, verbose), prompt_text, output)
        
        answer = output.answer
        return answer
        
            


In [40]:
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
import time

# Director structured output model
class DirectorDecision(BaseModel):
    """Decision from the agent director."""
    reasoning: str = Field(description="Step-by-step reasoning on retrieved documents relevance.")
    final_decision: bool = Field(description="True if any retrieved doc answers the query, False otherwise.")

# RAG final answer model (with CoT reasoning)
class MiniMeAnswer(BaseModel):
    """Answer to user query."""
    answer: str = Field(description="The first-person answer to user query.")
    reasoning: str = Field(description="Reasoning on which story provided is related to this query.")

# Full Workflow
class MiniMe:
    def __init__(self, vector_store):
        self.vector_store = vector_store
        self.llm = ChatOpenAI()
        self.director_client = self.llm.with_structured_output(DirectorDecision)
        self.generator_client = self.llm.with_structured_output(MiniMeAnswer)
        self.generation_prompt = GENERATION_PROMPT

    def retrieve(self, query, k=3):
        """Retrieve top-k documents from vector store"""
        return self.vector_store.similarity_search_with_score(query, k=k)

    def agent_director(self, query, retrieved_docs, verbose=False):
        """Agent director reasons if retrieved docs answer the query."""
        retrieved_text = "\n".join(
            [f"{i+1}. {doc[0].page_content}" for i, doc in enumerate(retrieved_docs)]
        )
        prompt = DIRECTOR_PROMPT.format(query=query, retrieved_text=retrieved_text)
        decision = self.director_client.invoke(prompt)

        if verbose:
            print("\n=== Agent Director Reasoning ===")
            print(decision.reasoning)
            print("Final Decision:", "YES" if decision.final_decision else "NO")

        return decision

    def rewrite_and_retrieve(self, original_query, k=3, verbose=False):
        """Rewrite the query and retrieve new documents"""
        prompt = REWRITE_PROMPT.format(original_query=original_query)
        rewritten_query = self.llm.invoke(prompt).content.strip()

        if verbose:
            print(f"\n=== Query Rewriting ===")
            print(f"Original Query: {original_query}")
            print(f"Rewritten Query: {rewritten_query}")

        if rewritten_query.upper() == "UNRELATED":
            return rewritten_query, []

        new_docs = self.retrieve(rewritten_query, k)
        return rewritten_query, new_docs

    def generate_final_answer(self, query, retrieved_docs, start_time, verbose):
        """Compose final answer using generation LLM"""
        references = ""
        for i, doc in enumerate(retrieved_docs):
            references += f"Reference Question #{i+1}:\n{doc[0].page_content}\n"
            references += f"Reference Answer #{i+1}:\n{doc[0].metadata.get('answer', '')}\n\n"

        prompt_text = self.generation_prompt.format(query=query, references=references)
        result = self.generator_client.invoke(prompt_text)

        total_time = time.time() - start_time
        
        print("\n=== Final Generation CoT Reasoning ===")
        print(result.reasoning)
        print(f"Total Time: {total_time:.2f}s")
        return {
            "references": references,
            "prompt_text": prompt_text,
            "output": result,
            "total_time": total_time
        }

    def generate(self, query, max_retries=2, verbose=False):
        """Main workflow: retrieve → reason → rewrite+retrieve (max 2) → generate"""
        start_time = time.time()
        if verbose:
            print("\n=== Agentic Workflow Started ===")

        # Step 1: Initial retrieval
        retrieved_docs = self.retrieve(query)
        if verbose:
            print(f"Initial retrieval done. Retrieved {len(retrieved_docs)} docs.")

        # Step 2: Director reasoning
        decision = self.agent_director(query, retrieved_docs, verbose)
        if decision.final_decision:
            if verbose:
                print("Director: Initial retrieval is sufficient.")
            return self.generate_final_answer(query, retrieved_docs, start_time, verbose)

        # Retry Loop: Up to max_retries
        for attempt in range(1, max_retries + 1):
            if verbose:
                print(f"\nDirector: Retrieval insufficient. Attempting rewrite+retrieve (Retry #{attempt})...")
            rewritten_query, retrieved_docs = self.rewrite_and_retrieve(query, verbose=verbose)
            if rewritten_query.upper() == "UNRELATED":
                if verbose:
                    print("Rewrite tool determined the query is unrelated.")
                return self.generate_final_answer(query, [], start_time, verbose)
            decision = self.agent_director(query, retrieved_docs, verbose)
            if decision.final_decision:
                if verbose:
                    print(f"Director: Retrieval after rewrite #{attempt} is sufficient.")
                return self.generate_final_answer(query, retrieved_docs, start_time, verbose)

        # If still insufficient after retries
        if verbose:
            print("Director: All retries exhausted. No sufficient docs found.")
        return self.generate_final_answer(query, [], start_time, verbose)


In [41]:
minime = MiniMe(vector_store)



In [42]:
query = "What is your working style like?"

#minime.retrieve(query, True)

output = minime.generate(query, verbose = False)


=== Final Generation CoT Reasoning ===

Total Time: 9.39s


In [52]:
output

{'references': '',
 'prompt_text': '\nYou are a mini clone version of a person designed to answer behavioral interview questions. \nYou can only answer based on the information in the provided reference question-and-answer pairs. \nIf there is no reference QA pairs or none of the reference QA pairs provide enough information to fully answer the query, reply exactly with: \n"That\'s a great question, and I don\'t seem to have an answer for that yet!"\n\nDo NOT add any new experiences, details, or assumptions that are not directly supported by the reference content.\n\nYour goal is to respond to the query question professionally, in first person, and as if you are the person who wrote the reference QA pairs. \n\nGuidelines:\n- Be concise but complete; stay within 150 words.\n- Do not invent or guess details not present in the references.\n- If multiple reference answers partially match, synthesize them into a coherent response.\n- If references are only loosely related, acknowledge them 

In [53]:
print(output['output'].answer)

That's a great question, and I don't seem to have an answer for that yet!
