# LLM RAG Chatbot Prototype

## Import packages

In [1]:
#!pip install langchain==0.1.0

In [2]:
#!pip install openai==1.7.2

In [3]:
#!pip install langchain-openai==0.0.2

In [4]:
#!pip install langchain-community==0.0.12

In [5]:
#!pip install langchainhub==0.1.14

In [6]:
#!pip install python-dotenv

In [7]:
#!pip install chromadb==0.4.22

## langchain_intro/chatbot.py

### Chat Models

In [8]:
import dotenv

In [9]:
from langchain_openai import ChatOpenAI

In [10]:
dotenv.load_dotenv()

True

In [11]:
chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

In [12]:
# To use 'chat_model'
from langchain.schema.messages import HumanMessage, SystemMessage
#from langchain_intro.chatbot import chat_model

In [13]:
messages = [
    SystemMessage(
        content="""You're an assitant knowledgeable about healthcare. Only answer healthcare-related questions."""
    ),
    HumanMessage(content="What is Medicaid managed care?"),
]

In [14]:
chat_model.invoke(messages)

AIMessage(content='Medicaid managed care is a system in which states contract with managed care organizations (MCOs) to provide healthcare services to Medicaid beneficiaries. These MCOs are responsible for coordinating and delivering healthcare services to enrollees in exchange for a fixed monthly payment per enrollee. This model aims to improve care coordination, control costs, and enhance quality of care for Medicaid beneficiaries.')

In [15]:
messages = [
    SystemMessage(
        content="""You're an assitant knowledgeable about healthcare. Only answer healthcare-related questions."""
    ),
    HumanMessage(content="How do I change a tire?"),
]

In [16]:
chat_model.invoke(messages)

AIMessage(content="I'm here to help with healthcare-related questions. If you have any questions about health, wellness, or medical topics, feel free to ask!")

In [17]:
chat_model.invoke("What is blood pressure?")

AIMessage(content='Blood pressure is the force of blood pushing against the walls of the arteries as the heart pumps blood throughout the body. It is measured in millimeters of mercury (mmHg) and is typically expressed as two numbers, such as 120/80 mmHg. The first number (systolic pressure) represents the pressure in the arteries when the heart beats, while the second number (diastolic pressure) represents the pressure in the arteries when the heart is at rest between beats. High blood pressure, also known as hypertension, can increase the risk of heart disease, stroke, and other health problems.')

### Prompt Templates

In [18]:
from langchain.prompts import ChatPromptTemplate

In [19]:
review_template_str = """Your job is to use patient reviews to answer questions about their experience at a hospital. Use the following context to answer questions. 
Be as detailed as possible, but don't make up any information that's not from the context. If you don't know an answer, say you don't know.

{context}

{question}
"""

In [20]:
review_template = ChatPromptTemplate.from_template(review_template_str)

In [21]:
type(review_template)

langchain_core.prompts.chat.ChatPromptTemplate

In [22]:
context = "I had a great stay!"
question = "Did anyone have a positive experience?"

In [23]:
review_template.format(context=context, question=question)

"Human: Your job is to use patient reviews to answer questions about their experience at a hospital. Use the following context to answer questions. \nBe as detailed as possible, but don't make up any information that's not from the context. If you don't know an answer, say you don't know.\n\nI had a great stay!\n\nDid anyone have a positive experience?\n"

In [24]:
type(review_template.format(context=context, question=question))

str

In [25]:
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)

In [26]:
review_system_template_str = """Your job is to use patient
reviews to answer questions about their experience at a hospital. Use the following context to answer questions.
Be as detailed as possible, but don't make up any information that's not from the context. If you don't know an answer, say you don't know.

{context}
"""

In [27]:
review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"], template=review_system_template_str
    )
)

In [28]:
review_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["question"], template="{question}"
    )
)

In [29]:
messages = [review_system_prompt, review_human_prompt]

In [30]:
review_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"],
    messages=messages,
)

In [31]:
context = "I had a great stay!"
question = "Did anyone have a positive experience?"

In [32]:
review_prompt_template.format_messages(context=context, question=question)

[SystemMessage(content="Your job is to use patient\nreviews to answer questions about their experience at a hospital. Use the following context to answer questions.\nBe as detailed as possible, but don't make up any information that's not from the context. If you don't know an answer, say you don't know.\n\nI had a great stay!\n"),
 HumanMessage(content='Did anyone have a positive experience?')]

### Chains and LangChain Expression Language (LCEL)

How to combine chat models and prompt templates, build a chain using LCEL to unlock core functionality of building modular customized interfaces over chat models

In [33]:
import dotenv

In [34]:
from langchain_openai import ChatOpenAI

In [35]:
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)

In [36]:
dotenv.load_dotenv()

True

In [37]:
review_template_str = """Your job is to use patient
reviews to answer questions about their experience at 
a hospital. Use the following context to answer questions.
Be as detailed as possible, but don't make up any information
that's not from the context. If you don't know an answer, say
you don't know.

{context}
"""

In [38]:
review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"],
        template=review_template_str,
    )
)

In [39]:
review_human_prompt = HumanMessagePromptTemplate(
    prompt = PromptTemplate(
        input_variables=["question"],
        template="{question}",
    )
)

In [40]:
messages = [review_system_prompt, review_human_prompt]

In [41]:
review_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"],
    messages=messages,
)

In [42]:
chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

In [43]:
review_chain = review_prompt_template | chat_model

In [44]:
# REPL session
#from langchain_intro.chatbot import review_chain

In [45]:
context = "I had a great stay!"
question = "Did anyone have a positive experience?"

In [46]:
review_chain.invoke({"context": context, "question": question})

AIMessage(content='Yes, based on the review provided, the patient had a great stay at the hospital, indicating a positive experience.')

In [47]:
# Add an output parser
from langchain_core.output_parsers import StrOutputParser

In [48]:
output_parser = StrOutputParser()

In [49]:
review_chain = review_prompt_template | chat_model | output_parser

In [50]:
# REPL session
#from langchain_intro.chatbot import review_chain

context = "I had a great stay!"
question = "Did anyone have a positive experience?"

review_chain.invoke({"context": context, "question": question})

'Yes, based on the review provided, the patient had a great stay at the hospital, indicating a positive experience.'

## langchain_intro/create_retriever.py

### Retrieval Objects

In [59]:
import dotenv
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

In [60]:
REVIEWS_CSV_PATH = "data/reviews.csv"
REVIEWS_CHROMA_PATH = "chroma_data"

In [61]:
dotenv.load_dotenv()

True

In [62]:
loader = CSVLoader(file_path=REVIEWS_CSV_PATH, source_column="review")
reviews = loader.load()

In [65]:
reviews_vector_db = Chroma.from_documents(
    reviews, OpenAIEmbeddings(), persist_directory=REVIEWS_CHROMA_PATH
)

In [67]:
# Perform semantic search over the review embeddings
import dotenv
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

In [68]:
REVIEWS_CHROMA_PATH = "chroma_data/"

In [69]:
dotenv.load_dotenv()

True

In [70]:
reviews_vector_db = Chroma(
    persist_directory=REVIEWS_CHROMA_PATH,
    embedding_function=OpenAIEmbeddings(),
)

In [71]:
question = """Has anyone complained about communication with the hospital staff?"""
relevant_docs = reviews_vector_db.similarity_search(question, k=3)

In [72]:
relevant_docs[0].page_content

'review_id: 73\nvisit_id: 7696\nreview: I had a frustrating experience at the hospital. The communication between the medical staff and me was unclear, leading to misunderstandings about my treatment plan. Improvement is needed in this area.\nphysician_name: Maria Thompson\nhospital_name: Little-Spencer\npatient_name: Terri Smith'

In [73]:
relevant_docs[1].page_content

'review_id: 785\nvisit_id: 2593\nreview: My stay at the hospital was challenging. The medical care was adequate, but the lack of communication from the staff created some frustration.\nphysician_name: Brittany Harris\nhospital_name: Jones, Taylor and Garcia\npatient_name: Ryan Jacobs'

In [74]:
relevant_docs[2].page_content

'review_id: 521\nvisit_id: 631\nreview: I had a challenging time at the hospital. The medical care was adequate, but the lack of communication between the staff and me left me feeling frustrated and confused about my treatment plan.\nphysician_name: Samantha Mendez\nhospital_name: Richardson-Powell\npatient_name: Kurt Gordon'

## langchain_intro/chatbot.py

In [53]:
import dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.schema.runnable import RunnablePassthrough
from langchain.agents import (
    create_openai_functions_agent,
    Tool,
    AgentExecutor,
)
from langchain import hub
#from langchain_intro.tools import get_current_wait_time

REVIEWS_CHROMA_PATH = "chroma_data/"

dotenv.load_dotenv()

review_template_str = """Your job is to use patient
reviews to answer questions about their experience at 
a hospital. Use the following context to answer questions.
Be as detailed as possible, but don't make up any information
that's not from the context. If you don't know an answer, say
you don't know.

{context}
"""

review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"],
        template=review_template_str,
    )
)

review_human_prompt = HumanMessagePromptTemplate(
    prompt = PromptTemplate(
        input_variables=["question"],
        template="{question}",
    )
)

messages = [review_system_prompt, review_human_prompt]

review_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"],
    messages=messages,
)

chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

reviews_vector_db = Chroma(
    persist_directory=REVIEWS_CHROMA_PATH,
    embedding_function=OpenAIEmbeddings()
)

reviews_retriever = reviews_vector_db.as_retriever(k=10)

review_chain = (
    {"context": reviews_retriever, "question": RunnablePassthrough()}
    | review_prompt_template
    | chat_model
    | StrOutputParser()
)

tools = [
    Tool(
        name="Reviews",
        func=review_chain.invoke,
        description="""Useful when you need to answer questions
        about patient reviews or experiences at the hospital.
        Not useful for answering questions about specific visit 
        details such as payer, billing, treatment, diagnosis,
        chief complaint, hospital, or physician information.
        Pass the entire question as input to the tool. For instance,
        if the question is "What do patients think about the triage system?",
        the input should be "What do patients think about the triage system?"
        """,
    ),
    Tool(
        name="Waits",
        func=get_current_wait_time,
        description="""Use when asked about current wait times
        at a specific hospital. This tool can only get the current
        wait time at a hospital and does not have any information about 
        aggregate or historical wait times. This tool returns wait times in
        minutes. Do not pass the word "hospital" as input,
        only the hospital name itself. For instance, if the questions is
        "What is the wait time at hospital A?", the input should be "A".
        """,
    ),
]

hospital_agent_prompt = hub.pull("hwchase17/openai-functions-agent")

agent_chat_model = ChatOpenAI(
    model="gpt-3.5-turbo-1106",
    temperature=0,
)

hospital_agent = create_openai_functions_agent(
    llm=agent_chat_model,
    prompt=hospital_agent_prompt,
    tools=tools,
)

hospital_agent_executor = AgentExecutor(
    agent=hospital_agent,
    tools=tools,
    return_intermediate_steps=True,
    verbose=True,
)

## Agents

### langchain_intro/tools.py

In [54]:
import random
import time

def get_current_wait_time(hospital: str) -> int | str:
    """Dummy function to generate fake wait times"""

    if hospital not in ["A", "B", "C", "D"]:
        return f"Hospital {hospital} does not exist"

    # Simulate API call delay
    time.sleep(1)

    return random.randint(0, 10000)