In [31]:
import os
import glob
import gradio as gr
import google.generativeai as genai
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
model=genai.GenerativeModel('gemini-2.0-flash')

In [32]:
glob.glob("knowledge-base/employees/*")


['knowledge-base/employees\\Alex Chen.md',
 'knowledge-base/employees\\Alex Harper.md',
 'knowledge-base/employees\\Alex Thomson.md',
 'knowledge-base/employees\\Avery Lancaster.md',
 'knowledge-base/employees\\Emily Carter.md',
 'knowledge-base/employees\\Emily Tran.md',
 'knowledge-base/employees\\Jordan Blake.md',
 'knowledge-base/employees\\Jordan K. Bishop.md',
 'knowledge-base/employees\\Maxine Thompson.md',
 'knowledge-base/employees\\Oliver Spencer.md',
 'knowledge-base/employees\\Samantha Greene.md',
 'knowledge-base/employees\\Samuel Trenton.md']

In [33]:
context = {}

employees = glob.glob("knowledge-base/employees/*")

for employee in employees:
    name = employee.split(' ')[-1][:-3]
    doc = ""
    with open(employee, "r", encoding="utf-8") as f:
        doc = f.read()
    context[name]=doc

In [34]:
context['Chen']


'# HR Record\n\n# Alex Chen\n\n## Summary\n- **Date of Birth:** March 15, 1990  \n- **Job Title:** Backend Software Engineer  \n- **Location:** San Francisco, California  \n\n## Insurellm Career Progression\n- **April 2020:** Joined Insurellm as a Junior Backend Developer. Focused on building APIs to enhance customer data security.\n- **October 2021:** Promoted to Backend Software Engineer. Took on leadership for a key project developing a microservices architecture to support the company\'s growing platform.\n- **March 2023:** Awarded the title of Senior Backend Software Engineer due to exemplary performance in scaling backend services, reducing downtime by 30% over six months.\n\n## Annual Performance History\n- **2020:**  \n  - Completed onboarding successfully.  \n  - Met expectations in delivering project milestones.  \n  - Received positive feedback from the team leads.\n\n- **2021:**  \n  - Achieved a 95% success rate in project delivery timelines.  \n  - Awarded "Rising Star" a

In [35]:
context["Lancaster"]


"# Avery Lancaster\n\n## Summary\n- **Date of Birth**: March 15, 1985  \n- **Job Title**: Co-Founder & Chief Executive Officer (CEO)  \n- **Location**: San Francisco, California  \n\n## Insurellm Career Progression\n- **2015 - Present**: Co-Founder & CEO  \n  Avery Lancaster co-founded Insurellm in 2015 and has since guided the company to its current position as a leading Insurance Tech provider. Avery is known for her innovative leadership strategies and risk management expertise that have catapulted the company into the mainstream insurance market.  \n\n- **2013 - 2015**: Senior Product Manager at Innovate Insurance Solutions  \n  Before launching Insurellm, Avery was a leading Senior Product Manager at Innovate Insurance Solutions, where she developed groundbreaking insurance products aimed at the tech sector.  \n\n- **2010 - 2013**: Business Analyst at Edge Analytics  \n  Prior to joining Innovate, Avery worked as a Business Analyst, focusing on market trends and consumer preferenc

In [36]:
products = glob.glob("knowledge-base/products/*")

for product in products:
    name = product.split(os.sep)[-1][:-3]
    doc = ""
    with open(product, "r", encoding="utf-8") as f:
        doc = f.read()
    context[name]=doc

In [37]:
context.keys()

dict_keys(['Chen', 'Harper', 'Thomson', 'Lancaster', 'Carter', 'Tran', 'Blake', 'Bishop', 'Thompson', 'Spencer', 'Greene', 'Trenton', 'Carllm', 'Homellm', 'Markellm', 'Rellm'])

In [38]:
system_message = "You are an expert in answering accurate questions about Insurellm, the Insurance Tech company. Give brief, accurate answers. If you don't know the answer, say so. Do not make anything up if you haven't been provided with relevant context."

In [39]:
def get_relevant_context(message):
    relevant_context = []
    for context_title, context_details in context.items():
        if context_title.lower() in message.lower():
            relevant_context.append(context_details)
    return relevant_context   

In [40]:
get_relevant_context("Who is lancaster?")

["# Avery Lancaster\n\n## Summary\n- **Date of Birth**: March 15, 1985  \n- **Job Title**: Co-Founder & Chief Executive Officer (CEO)  \n- **Location**: San Francisco, California  \n\n## Insurellm Career Progression\n- **2015 - Present**: Co-Founder & CEO  \n  Avery Lancaster co-founded Insurellm in 2015 and has since guided the company to its current position as a leading Insurance Tech provider. Avery is known for her innovative leadership strategies and risk management expertise that have catapulted the company into the mainstream insurance market.  \n\n- **2013 - 2015**: Senior Product Manager at Innovate Insurance Solutions  \n  Before launching Insurellm, Avery was a leading Senior Product Manager at Innovate Insurance Solutions, where she developed groundbreaking insurance products aimed at the tech sector.  \n\n- **2010 - 2013**: Business Analyst at Edge Analytics  \n  Prior to joining Innovate, Avery worked as a Business Analyst, focusing on market trends and consumer preferen

In [41]:
get_relevant_context("Who is Avery and what is carllm?")

['# Product Summary\n\n# Carllm\n\n## Summary\n\nCarllm is an innovative auto insurance product developed by Insurellm, designed to streamline the way insurance companies offer coverage to their customers. Powered by cutting-edge artificial intelligence, Carllm utilizes advanced algorithms to deliver personalized auto insurance solutions, ensuring optimal coverage while minimizing costs. With a robust infrastructure that supports both B2B and B2C customers, Carllm redefines the auto insurance landscape and empowers insurance providers to enhance customer satisfaction and retention.\n\n## Features\n\n- **AI-Powered Risk Assessment**: Carllm leverages artificial intelligence to analyze driver behavior, vehicle conditions, and historical claims data. This enables insurers to make informed decisions and set competitive premiums that reflect true risk profiles.\n\n- **Instant Quoting**: With Carllm, insurance companies can offer near-instant quotes to customers, enhancing the customer exper

In [None]:
class GeminiEmbeddingFunction(EmbeddingFunction):
    def __call__(self, input: Documents) -> Embeddings:
        model = "models/embedding-001"
        title = "embedding-doc"

        embeddings = []
        for doc in input:
            response = genai.embed_content(
                model=model,
                content=doc,
                task_type="retrieval_document",
                title=title
            )
            embeddings.append(response["embedding"])

        return embeddings

In [45]:
import chromadb

def create_chroma_db(documents, name):
    chroma_client = chromadb.Client()
    db = chroma_client.create_collection(
        name=name,
        embedding_function=GeminiEmbeddingFunction()
    )

    for i, doc in enumerate(documents):
        db.add(documents=[doc], ids=[str(i)])
    
    return db

md_files = glob.glob("knowledge-base/employees/*.md")

# Read the content from each markdown file
documents = []
for file_path in md_files:
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
        documents.append(content)


In [48]:
import chromadb

# Create ChromaDB client
chroma_client = chromadb.Client()

# Create collection with Gemini embedding function
db = chroma_client.create_collection(
    name="employee-knowledge-db",
    embedding_function=GeminiEmbeddingFunction()
)

# Add documents to ChromaDB collection
for i, doc in enumerate(documents):
    db.add(
        documents=[doc],
        ids=[f"doc_{i}"]
    )


  embedding_function=GeminiEmbeddingFunction()


InternalError: Collection [employee-knowledge-db] already exists

In [None]:
def get_relevant_passage(query, db):
    passage = db.query(query_texts=[query], n_results=1)['documents'][0][0]
    return passage


passage = get_relevant_passage(query, db)
passage


'# HR Record\n\n# Emily Tran\n\n## Summary\n- **Date of Birth:** March 18, 1991  \n- **Job Title:** Digital Marketing Specialist  \n- **Location:** San Francisco, CA  \n\n---\n\n## Insurellm Career Progression\n- **February 2020 - Present**: Digital Marketing Specialist  \n   - Emily Tran has been pivotal in enhancing Insurellm\'s online presence through targeted social media campaigns and SEO strategies.\n   - Successfully managed a team of interns for the \'Spring Into Safety\' initiative, increasing customer engagement by 35%.\n\n- **June 2018 - January 2020**: Marketing Coordinator  \n  - Assisted in the development and execution of marketing campaigns to promote Insurellm\'s products.\n  - Collected and analyzed data on customer demographics to inform Insurellm’s marketing strategies.\n\n- **January 2017 - May 2018**: Marketing Intern  \n  - Supported the Marketing team by collaborating on content creation and digital advertising projects.\n  - Gained hands-on experience with mark

In [None]:
def make_prompt(query, relevant_passage):
    passage = relevant_passage.replace('"', '').replace("'", "").replace("\n", " ")
    return f"""
You are a helpful assistant. Use the passage to answer clearly.
QUESTION: {query}
PASSAGE: {passage}
ANSWER:
"""

In [None]:
query = "How do you use the touchscreen in the Google car?"
prompt = make_prompt(query, passage)


In [None]:
def chat(message, history):
    try:
        context = get_relevant_passage(message)
        prompt = make_prompt(message, context)
        response = model.generate_content(prompt)
        return f"**Prompt Used:**\n{prompt}\n\n**Gemini Answer:**\n{response.text}"
    except Exception as e:
        return f" **Error:** {e}"

# 4. Launch chatbot
gr.ChatInterface(fn=chat, title="Gemini RAG Chatbot").launch()

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


