# Introduction


This notebook is for:

* Testing the finetuned gemma2_instruct_2b_en model published on Kaggle.
* Uploading finetuned model on Huggingface.
* Building a chatbot with the the finetuned model by experimenting with different methods.
* Expanding model functionality by providing external reference context to support the LLM in generating factual response.

## load necessary packages

In [None]:
# Install Keras 3 last. See https://keras.io/getting_started/ for more details.
!pip install -q -U keras-nlp
!pip install -q -U keras>=3
!pip install -q -U huggingface_hub

In [None]:
%%capture

%pip install langchain langchain-community langchain-google-vertexai
%pip install langchainhub
%pip install langchain-groq
%pip install gradio

In [None]:
!pip install PyMuPDF sentence-transformers langchain chromadb huggingface-hub
!pip install langchain_core python-docx

In [None]:
# pip install --upgrade -q langchain langchain-google-vertexai

In [None]:
from IPython.display import Markdown
import textwrap

import os
import keras
import keras_nlp
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from docx import Document

In [None]:
os.environ["KERAS_BACKEND"] = "jax"  # Or "torch" or "tensorflow".
# Avoid memory fragmentation on JAX backend.
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]="1.00"

# Saving finetuned model variant on Huggingface

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("hftoken")

In [None]:
from huggingface_hub import login

login(secret_value_0)

In [None]:
!pip install -q -U huggingface_hub

In [None]:
from huggingface_hub import create_repo

repo_name = "ddi-finetuned-gemma2"
create_repo(repo_name, exist_ok=True)

In [None]:
from huggingface_hub import upload_folder

# Upload entire model directory to Hugging Face
upload_folder(
    repo_id="rukayatadedeji/ddi-finetuned-gemma2",
    folder_path="/kaggle/input/bert/keras/ddi_gemma2_e10/1",
    commit_message="Upload ddi finetuned Gemma2 model package"
)


In [None]:
# define helper function

def display_chat(prompt, response):
  '''Displays an LLM prompt and response in a pretty way.'''
  prompt = prompt.replace('\n\n','<br><br>')
  prompt = prompt.replace('\n','<br>')
  formatted_prompt = "<font size='+1' color='brown'>🙋‍♂️<blockquote>" + prompt + "</blockquote></font>"
  response = response.replace('•', '  *')
  response = textwrap.indent(response, '', predicate=lambda _: True)
  response = response.replace('\n\n','<br><br>')
  response = response.replace('\n','<br>')
  response = response.replace("```","")
  formatted_text = "<font size='+1' color='teal'>🤖<blockquote>" + response + "</blockquote></font>"
  return Markdown(formatted_prompt+formatted_text)

# Using Langchain for Chatbot development

In [None]:
from langchain_google_vertexai import GemmaLocalKaggle
llm = GemmaLocalKaggle(model_name="/kaggle/input/bert/keras/ddi_gemma2/1")

In [None]:
output = llm.invoke("Can I take fluconazole with my simvastatin medication?", max_tokens=300)
print(output)

In [None]:
from langchain_google_vertexai import GemmaChatLocalKaggle
llm = GemmaChatLocalKaggle(model_name="/kaggle/input/bert/keras/ddi_gemma2/1")

In [None]:
from langchain_core.messages import (
    HumanMessage
)

message1 = HumanMessage(content="Hi! Can I take fluconazole with my simvastatin medication?")
answer1 = llm.invoke([message1], max_tokens=300, parse_response=True)
print(answer1)

In [None]:
message2 = HumanMessage(content="Which drugs would result in severe adverse effect when used with Goserelin?")
answer2 = llm.invoke([message1, answer1, message2], max_tokens=600, parse_response=True)

print(answer2)

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

In [None]:
template = ("""
    You are a highly knowledgeable drug information assistant specializing in drug interactions enquiries.
    Your goal is to provide an accurate response and relevant information on question delimited by triple backticks.
    If you don't know the answer, honestly respond that you don't have the information. Avoid guessing or providing incomplete information.
    
    Example:
    Instruction: Can I take warfarin with ibuprofen?
    Response: Warfarin and ibuprofen can interact and increase the risk of bleeding. Ibuprofen is a drug with antiplatelet properties and may increase anticoagulation effect of warfarin. It is recommended to avoid using them together or consult your healthcare provider for alternatives.

    Instruction: ```{instruction}```
    
    Response:
""")

In [None]:
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

chain = prompt | llm | output_parser

In [None]:
response = chain.invoke("Can I take fluconazole with my simvastatin medication?")

In [None]:
Markdown(response)

In [None]:
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationSummaryMemory

In [None]:
template1='''The following is a friendly conversation between a human and an AI.
The AI is a highly knowledgeable drug information assistant specializing in drug interactions enquiries.
The AI's goal is to provide an accurate response and relevant information on question.
If the AI does not know the answer to a question, it truthfully says it does not know.


Current conversation:\n{history}\nHuman: {input}\nAI:'''

In [None]:
from langchain.prompts import PromptTemplate
chat_prompt = PromptTemplate(input_variables=['history', 'input'], 
                            template=template1)

In [None]:
summary_memory = ConversationSummaryMemory(llm=llm)
conversation = ConversationChain(
    llm=llm,
    prompt=chat_prompt,
    memory=summary_memory,
    verbose=True
)

In [None]:
conversation.predict(input='Can I take fluconazole with my simvastatin medication?')

In [None]:
summary_memory.clear()

In [None]:
input_query = 'Is it safe to take ibuprofen while on warfarin?'
response = conversation.predict(input=input_query)

In [None]:
conversation.predict(input='Can I take artemether lumefantrine malaria drug with my vitamin c supplement?')

# A simple Chatbot

In [None]:
# Load the model
gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset("/kaggle/input/bert/keras/ddi_gemma2_e10/1")
gemma_lm.summary()

In [None]:
%%time
# sample the softmax probabilities of the model
sampler = keras_nlp.samplers.TopKSampler(k=5, seed=2)
gemma_lm.compile(sampler=sampler)

## Define a Chat class which maintains conversation history
from https://ai.google.dev/gemma/docs/gemma_chat

In [None]:
class ChatState():
  """
  Manages the conversation history for a turn-based chatbot
  Follows the turn-based conversation guidelines for the Gemma family of models
  documented at https://ai.google.dev/gemma/docs/formatting
  """

  __START_TURN_USER__ = "user\n"
  __START_TURN_MODEL__ = "model\n"
  __END_TURN__ = "\n"

  def __init__(self, model, system=""):
    """
    Initializes the chat state.

    Args:
        model: The language model to use for generating responses.
        system: (Optional) System instructions or bot description.
    """
    self.model = model
    self.system = system
    self.history = []

  def add_to_history_as_user(self, message):
      """
      Adds a user message to the history with start/end turn markers.
      """
      self.history.append(self.__START_TURN_USER__ + message + self.__END_TURN__)

  def add_to_history_as_model(self, message):
      """
      Adds a model response to the history with start/end turn markers.
      """
      self.history.append(self.__START_TURN_MODEL__ + message + self.__END_TURN__)

  def get_history(self):
      """
      Returns the entire chat history as a single string.
      """
      return "".join([*self.history])

  def get_full_prompt(self):
    """
    Builds the prompt for the language model, including history and system description.
    """
    prompt = self.get_history() + self.__START_TURN_MODEL__
    if len(self.system)>0:
      prompt = self.system + "\n" + prompt
    return prompt

  def send_message(self, message):
    """
    Handles sending a user message and getting a model response.

    Args:
        message: The user's message.

    Returns:
        The model's response.
    """
    self.add_to_history_as_user(message)
    prompt = self.get_full_prompt()
    response = self.model.generate(prompt, max_length=1024)
    result = response.replace(prompt, "")  # Extract only the new response
    self.add_to_history_as_model(result)
    return result

In [None]:
# Initialize the Chat object with the model
chat = ChatState(gemma_lm)

In [None]:
# First prompt
message = f"Which drugs would result in severe adverse effect when used with Goserelin?"
display_chat(message, chat.send_message(message))

In [None]:
# Send a follow-up prompt
message = f"Which drug are we discussing?"
display_chat(message, chat.send_message(message))

In [None]:
# Second prompt
message = f"Can I take fluconazole with my simvastatin?"
display_chat(message, chat.send_message(message))

## Will a simple system message improve Chatbot response?

In [None]:
template1='The following is a friendly conversation between a human and an AI Pharmacist.'

In [None]:
# Initialize the Chat object with the model and system message
chat1 = ChatState(model=gemma_lm, system=template1)

In [None]:
# Test impact of system message on chatbot
message = f"Which drugs would result in severe adverse effect when used with Goserelin?"
display_chat(message, chat1.send_message(message))

In [None]:
# Send a follow-up prompt
message = f"I am currently taking fluoxetin for my depression and i also use carbamazepin for epilepsy. I am taking this goserelin to treat breast cancer?"
display_chat(message, chat1.send_message(message))

In [None]:
# Send a follow-up prompt
message = f"Which drugs are we discussing?"
display_chat(message, chat1.send_message(message))

In [None]:
# Send a follow-up prompt
message = f"Are you sure of the answer you provided about the interaction between goserelin with fluoxetin and goserilin with carbamazepine?"
display_chat(message, chat1.send_message(message))

In [None]:
# Second prompt
message = f"Can I take fluconazole with my simvastatin?"
display_chat(message, chat.send_message(message))

# Implementing RAG

In [None]:
import fitz  # PyMuPDF
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

### Define some useful functions

In [None]:
# Load content from a PDF
def load_pdf(file_path):
    pdf = fitz.open(file_path)
    texts = []
    for page_num in range(10, pdf.page_count - 214):
        page = pdf[page_num]
        text = page.get_text()
        texts.append(text)
    pdf.close()
    return texts

# Useful in our RAG implementation
class DocumentWithText:
    def __init__(self, content, metadata=None):
        self.page_content = content
        self.metadata = metadata if metadata is not None else {}

# Load and split context docx documents for RAG
def load_and_split_documents(file_path):
    # Load the Word document
    doc = Document(file_path)
    documents = [DocumentWithText(paragraph.text) for paragraph in doc.paragraphs if paragraph.text]

    # Here you can choose how to split the text
    text_splitter = CharacterTextSplitter(chunk_size=9000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    return texts

### Load the PDF and Chunk It As a RAG Database

In [None]:
pdf_texts = load_pdf("/kaggle/input/dd1-v1/Stockley_Drug Interactions.pdf")

# Split each page's text into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents = []
for page_text in pdf_texts:
    page_chunks = splitter.split_text(page_text)
    documents.extend(page_chunks)

print("documents len=", len(documents), "sample=", documents[1])

### Create embeddings from the documents and vector database retriever needed for the RAG-based chatbot

In [None]:
# Create special documents list required by Chroma
texts = [DocumentWithText(doc) for doc in documents]

# Load the embeddings model
# NOTE: You might need to experiment with different models
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create a Chroma vector database from the documents
# Important: Make sure to delete previous db (if any) or else retrieval returns lots of duplicates :)
try:
  db.delete_collection()
except:
  pass
db = Chroma.from_documents(texts, embeddings, persist_directory="/kaggle/working/chroma_db")

### Downloading the vector database for future use

In [None]:
!zip -r file.zip /kaggle/working/chroma_db

In [None]:
!ls

In [None]:
from IPython.display import FileLink
FileLink(r'file.zip')

In [None]:
# Create a retriever from the vector database
# NOTE: You might need to experiment with retrieval parameters
# search_type="similarity_score_threshold", search_kwargs={"k": 3, "score_threshold": 0.2}
retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 2, 'fetch_k': 50})

### Test the retriever on some 'prompts'

In [None]:
ret_docs = retriever.invoke("What is the interraction between ramipril and aliskiren interaction?")
print("retreived", len(ret_docs), "documents")
print(ret_docs)

# Providing Context to the Chatbot

In [None]:
# reloading database for use in chatbot
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

# Load the embeddings model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
persist_directory = "/kaggle/input/dd1-v1/file/kaggle/working/chroma_db"
vector_store = Chroma(
    persist_directory=persist_directory,
    embedding_function=embeddings  # Use the same embedding function
)

In [None]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={'k': 2, 'fetch_k': 50})

In [None]:
def retrieve_context_from_chroma(query, retriever):
    """
    Retrieves relevant context from Chroma database based on the user's message.
    
    Args:
        query: The user's message as the query.
        client: The Chroma client to connect with the database.

    Returns:
        Relevant text from the Chroma database.
    """
    # This searches for relevant documents within the Chroma database
    results = retriever.invoke(query)
    context_texts = [result.page_content for result in results]
    return "\n".join(context_texts)

In [None]:
# Modified chat class for context
class ChatState():
  """
  Manages the conversation history for a turn-based chatbot
  Follows the turn-based conversation guidelines for the Gemma family of models
  documented at https://ai.google.dev/gemma/docs/formatting
  """

  __START_TURN_USER__ = "user\n"
  __START_TURN_MODEL__ = "model\n"
  __END_TURN__ = "\n"

  def __init__(self, model, system=""):
    """
    Initializes the chat state.

    Args:
        model: The language model to use for generating responses.
        system: (Optional) System instructions or bot description.
    """
    self.model = model
    self.system = system
    self.history = []

  def add_to_history_as_user(self, message):
      """
      Adds a user message to the history with start/end turn markers.
      """
      self.history.append(self.__START_TURN_USER__ + message + self.__END_TURN__)

  def add_to_history_as_model(self, message):
      """
      Adds a model response to the history with start/end turn markers.
      """
      self.history.append(self.__START_TURN_MODEL__ + message + self.__END_TURN__)

  def get_history(self):
      """
      Returns the entire chat history as a single string.
      """
      return "".join([*self.history])

  def get_full_prompt(self):
    """
    Builds the prompt for the language model, including history and system description.
    """
    prompt = self.get_history() + self.__START_TURN_MODEL__
    if len(self.system)>0:
      prompt = self.system + "\n" + prompt
    return prompt

  def send_message(self, message):
    """
    Handles sending a user message and getting a model response.

    Args:
        message: The user's message.

    Returns:
        The model's response.
    """
    self.add_to_history_as_user(message)
        
    # Step 2: Retrieve context from Chroma
    chroma_context = retrieve_context_from_chroma(message, retriever)
        
    # Step 3: Construct prompt with retrieved context
    prompt = self.get_full_prompt()
    full_prompt = chroma_context + "\n\n" + prompt
        
    # Generate response with full prompt
    response = self.model.generate(full_prompt, max_length=1024)
    result = response.replace(full_prompt, "")  # Extract only the new response
        
    # Add the result to chat history
    self.add_to_history_as_model(result)
    return result

In [None]:
# Test with your ChatState as before
chat2 = ChatState(model=gemma_lm, system=template1)
message = "Is there an interaction between goserelin with carbamazepine?"
display_chat(message, chat2.send_message(message))

In [None]:
# compare with retrieved context from vector store
retrieve_context_from_chroma(message, retriever)

In [None]:
# Test with your ChatState as before
chat2 = ChatState(model=gemma_lm, system=template1)
message = "Is there an interaction between goserelin with fluoxetin?"
display_chat(message, chat2.send_message(message))

In [None]:
# compare with retrieved context from vector store
retrieve_context_from_chroma(message, retriever)

In [None]:
chat2.send_message(message)

# A Simple User Interface

In [None]:
def chat_with_model(input, history):
    '''Generates a response from the finetuned Gemma model'''
    
    answer = chat2.send_message(input)
    response = {"role": "assistant", "content": ""}
    response['content'] += answer
    yield response

In [None]:
%%time

# Create a simple gradio chat interface and launch it
import gradio as gr
# Launch the demo
demo = gr.ChatInterface(chat_with_model,
                        type="messages",
                        description = "Gemma-powered Drug Interactions AI App")
demo.launch(share=True, debug=True)

# Providing Chroma db and Web search context to Chatbot

In [None]:
! pip install -q -U tavily-python

In [None]:
from tavily import TavilyClient

In [None]:
from kaggle_secrets import UserSecretsClient
secret_label = "tavily_key"
api_key = UserSecretsClient().get_secret(secret_label)

In [None]:
tavily = TavilyClient(api_key)

In [None]:
query = "Is there an interaction between Goserelin and fluoxetin?"
response = tavily.search(query, include_domains=['reference.medscape.com'])
for result in response['results']:
    print(result['title'])
    print(result['content'])

In [None]:
print(response)

In [None]:
search_api = TavilyClient(api_key)

In [None]:
def search_web(query, search_api):
    """
    Performs a web search, prioritizing included domain(s) and retrieves the top result snippets.
    
    Args:
        query: The search query.
        search_api: The Tavily search client.
    
    Returns:
        A string containing relevant text from the web search results.
    """
    response = search_api.search(query, include_domains=['reference.medscape.com'])
    snippets = [result['content'] for result in response['results']]
    return "\n".join(snippets)

In [None]:
# Modified chat class for context and web result
class ChatState():
  """
  Manages the conversation history for a turn-based chatbot
  Follows the turn-based conversation guidelines for the Gemma family of models
  documented at https://ai.google.dev/gemma/docs/formatting
  """

  __START_TURN_USER__ = "user\n"
  __START_TURN_MODEL__ = "model\n"
  __END_TURN__ = "\n"

  def __init__(self, model, system=""):
    """
    Initializes the chat state.

    Args:
        model: The language model to use for generating responses.
        system: (Optional) System instructions or bot description.
    """
    self.model = model
    self.system = system
    self.history = []

  def add_to_history_as_user(self, message):
    """
    Adds a user message to the history with start/end turn markers.
    """
    self.history.append(self.__START_TURN_USER__ + message + self.__END_TURN__)

  def add_to_history_as_model(self, message):
    """
    Adds a model response to the history with start/end turn markers.
    """
    self.history.append(self.__START_TURN_MODEL__ + message + self.__END_TURN__)

  def get_history(self):
    """
    Returns the entire chat history as a single string.
    """
    return "".join([*self.history])

  def get_full_prompt(self):
    """
    Builds the prompt for the language model, including history and system description.
    """
    prompt = self.get_history() + self.__START_TURN_MODEL__
    if len(self.system)>0:
      prompt = self.system + "\n" + prompt
    return prompt

  def send_message(self, message):
    """
    Handles sending a user message and getting a model response.

    Args:
        message: The user's message.

    Returns:
        The model's response.
    """
    self.add_to_history_as_user(message)
        
    # Step 2: Retrieve context from Chroma
    chroma_context = retrieve_context_from_chroma(message, retriever)
        
    # Step 3: Retrieve web search context
    web_context = search_web(message, search_api)
        
    # Step 4: Construct prompt with both Chroma and web search contexts
    prompt = self.get_full_prompt()
    full_prompt = f"""You are a highly knowledgeable drug information assistant specializing in drug interactions enquiries. 
                Your goal is to provide an accurate response and relevant information on question below. 
                If you don't know the answer, honestly respond that you don't have the information. Avoid guessing or providing incomplete information.

                Here is an example:
                Query: Can I take warfarin with ibuprofen?
                Response: Warfarin and ibuprofen can interact and increase the risk of bleeding. Ibuprofen is a drug with antiplatelet properties and may increase anticoagulation effect of warfarin. It is recommended to avoid using them together or consult your healthcare provider for alternatives.

                Use the following context to answer the question below:
                {chroma_context}
                {web_context}
                
                Use the following history of your interaction with the user when needed to help answer the question below:
                {prompt}

                Question:
                {message}

                Answer:"""
        
    # Generate response with full prompt
    response = self.model.generate(full_prompt, max_length=1024)
    result = response.replace(full_prompt, "")  # Extract only the new response
        
    # Add the result to chat history
    self.add_to_history_as_model(result)
    return result

In [None]:
# Modified chat class for context and web result
class ChatState():
  """
  Manages the conversation history for a turn-based chatbot
  Follows the turn-based conversation guidelines for the Gemma family of models
  documented at https://ai.google.dev/gemma/docs/formatting
  """

  __START_TURN_USER__ = "user\n"
  __START_TURN_MODEL__ = "model\n"
  __END_TURN__ = "\n"

  def __init__(self, model, system=""):
    """
    Initializes the chat state.

    Args:
        model: The language model to use for generating responses.
        system: (Optional) System instructions or bot description.
    """
    self.model = model
    self.system = system
    self.history = []

  def add_to_history_as_user(self, message):
    """
    Adds a user message to the history with start/end turn markers.
    """
    self.history.append(self.__START_TURN_USER__ + message + self.__END_TURN__)

  def add_to_history_as_model(self, message):
    """
    Adds a model response to the history with start/end turn markers.
    """
    self.history.append(self.__START_TURN_MODEL__ + message + self.__END_TURN__)

  def get_history(self):
    """
    Returns the entire chat history as a single string.
    """
    return "".join([*self.history])

  def get_full_prompt(self):
    """
    Builds the prompt for the language model, including history and system description.
    """
    prompt = self.get_history() + self.__START_TURN_MODEL__
    if len(self.system)>0:
      prompt = self.system + "\n" + prompt
    return prompt

  def send_message(self, message):
    """
    Handles sending a user message and getting a model response.

    Args:
        message: The user's message.

    Returns:
        The model's response.
    """
    self.add_to_history_as_user(message)
        
    # Step 2: Retrieve context from Chroma
    chroma_context = retrieve_context_from_chroma(message, retriever)
        
    # Step 3: Retrieve web search context
    web_context = search_web(message, search_api)
        
    # Step 4: Construct prompt with both Chroma and web search contexts
    prompt = self.get_full_prompt()
    full_prompt = f"{chroma_context}\n\n{web_context}\n\n{prompt}"
        
    # Generate response with full prompt
    response = self.model.generate(full_prompt, max_length=1024)
    result = response.replace(full_prompt, "")  # Extract only the new response
        
    # Add the result to chat history
    self.add_to_history_as_model(result)
    return result

## Trying Prompt template

In [None]:
## Correction from advisor
class ChatState():
  """
  Manages the conversation history for a turn-based chatbot
  Follows the turn-based conversation guidelines for the Gemma family of models
  documented at https://ai.google.dev/gemma/docs/formatting
  """
  __START_TURN_USER__ = "user\n" # NOTE: This is only valid for gemma2_instr
  __START_TURN_MODEL__ = "model\n" # NOTE: This is only valid for gemma2_instr
  __END_TURN__ = "\n" # NOTE: This is only valid for gemma2_instr
  def __init__(self, model, tokenizer=None, system=""):
    """
    Initializes the chat state.
    Args:
        model: The language model to use for generating responses.
        system: (Optional) System instructions or bot description.
    """
    self.model = model
    self.tokenizer = tokenizer
    self.system = system
    self.history = []
  def add_to_history_as_user(self, message):
    """
    Adds a user message to the history with start/end turn markers.
    """
    self.history.append(self.__START_TURN_USER__ + message + self.__END_TURN__)
  def add_to_history_as_model(self, message):
    """
    Adds a model response to the history with start/end turn markers.
    """
    self.history.append(self.__START_TURN_MODEL__ + message ) #+ self.__END_TURN__)
  def get_history(self):
    """
    Returns the entire chat history as a single string.
    """
    return "".join([*self.history])
  def get_history_blurb(self):
    """
    Returns what to insert into the current prompt
    """
    if len(self.history)==0:
      return ""
    else:
      return \
f"""\n\nUse the following history of your interaction with the user to help answer the question below:\n"""\
f"""{self.get_history()}"""

  def get_full_prompt(self):
    """
    Builds the prompt for the language model, including history and system description.
    """
    prompt = self.get_history() + self.__START_TURN_MODEL__
    if len(self.system)>0:
      prompt = self.system + "\n" + prompt
    return prompt
  def send_message(self, message):
    """
    Handles sending a user message and getting a model response.
    Args:
        message: The user's message.
    Returns:
        The model's response.
    """
    # Step 2: Fake retrieving context from Chroma
    chroma_context = retrieve_context_from_chroma(message, retriever)
    # Step 3: Fake retrieving web search context
    web_context = search_web(message, search_api)
    # Step 4: Construct prompt with both Chroma and web search contexts
    prompt = self.get_full_prompt()
    full_prompt = \
f"""You are a highly knowledgeable drug information assistant specializing in drug interactions enquiries. """\
f"""Your goal is to provide an accurate response and relevant information on question below. """\
f"""If you don't know the answer, honestly respond that you don't have the information. """\
f"""Avoid guessing or providing incomplete information.\n\n"""\
f"""Here is an example:\n"""\
f"""Query: Can I take warfarin with ibuprofen?\n"""\
f"""Response: Warfarin and ibuprofen can interact and increase the risk of bleeding. """\
f"""Ibuprofen is a drug with antiplatelet properties and may increase anticoagulation effect of warfarin. """\
f"""It is recommended to avoid using them together or consult your healthcare provider for alternatives.\n\n"""\
f"""Use the following context to answer the question below:\n"""\
f"""{chroma_context}"""\
f"""{web_context}"""\
f"""{self.get_history_blurb()}"""\
f"""\n{self.__START_TURN_USER__ }"""\
f"""{message}"""\
f"""\n{self.__END_TURN__ }"""\
f"""\n{self.__START_TURN_MODEL__ }"""
    # for debugging - 
    print("--->\n" + full_prompt + "<--")
    tokenized_input = tokenizer.tokenize(full_prompt)
    print("PROMPT NUM TOKENS=", len(tokenized_input))
    self.add_to_history_as_user(message)
    # Generate response with full prompt
    response = self.model.generate(full_prompt, max_length=1024)
    # for debugging - 
    print("--->\n" + response + "<--")
    result = response.replace(full_prompt, "")  # Extract only the new response
    # Add the result to chat history
    self.add_to_history_as_model(result)

    return result
print("Done")

In [None]:
# Test your enhanced ChatState
tokenizer = keras_nlp.models.GemmaTokenizer.from_preset("gemma2_instruct_2b_en")
chat2 = ChatState(model=gemma_lm, tokenizer=tokenizer)
print("Done")

In [None]:
message = "Is there an interaction between goserelin with carbamazepine?"
display_chat(message, chat2.send_message(message))

In [None]:
message = "What drugs are discussing?"
display_chat(message, chat2.send_message(message))