# 2024-06-09 - Attempt for a simple chatbot using Llama3

Install Required Libraries (likely not needed in running in poetry shell)

In [None]:
!pip install transformers torch langchain

In [1]:
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import notebook_login

from langchain import LLMPipeline, LLMChain, Memory
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate


def load_model(model_name, device):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    model.to(device)
    return model, tokenizer

def verify_cuda():
    if torch.cuda.is_available():
        print("CUDA is available. Using GPU.")
        return torch.device("cuda")
    else:
        print("CUDA is not available. Using CPU.")
        return torch.device("cpu")

# Set up device
device = verify_cuda()

# Login to Hugging Face via hugingface_hub (for private models)
# notebook_login()

CUDA is not available. Using CPU.


In [2]:
# Set the environment variable
environment = "dev"  # prod, dev

# Load models based on the environment
if environment == "dev":  # Development
    # model_name = "meta-llama/Meta-Llama-3-8B-Instruct"  # Use official 8B model (requires access)
    model_name = "aaditya/Llama3-OpenBioLLM-8B"  # Or 8B version of the OpenBioLLM model
    # model_name = "distilgpt2"  # Or alternatively, use a much much smaller model that isn't llama-based
elif environment == "prod":  # Production
    model_name = "aaditya/Llama3-OpenBioLLM-70B"  # Use the larger model for production
else:
    raise ValueError("Invalid environment. Please choose either 'dev' or 'prod'.")

model, tokenizer = load_model(model_name, device)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
# This is from huggingface example

pipeline = transformers.pipeline(
    "text-generation",
    model=model_name,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device="auto",
)

messages = [
    {"role": "system", "content": "You are an expert and experienced from the healthcare and biomedical domain with extensive medical knowledge and practical experience. Your name is OpenBioLLM, and you were developed by Saama AI Labs. who's willing to help answer the user's query with explanation. In your explanation, leverage your deep medical expertise such as relevant anatomical structures, physiological processes, diagnostic criteria, treatment guidelines, or other pertinent medical concepts. Use precise medical terminology while still aiming to make the explanation clear and accessible to a general audience."},
    {"role": "user", "content": "How can i split a 3mg or 4mg waefin pill so i can get a 2.5mg pill?"},
]

prompt = pipeline.tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = pipeline(
    prompt,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.0,
    top_p=0.9,
)
print(outputs[0]["generated_text"][len(prompt):])


Leave the stuff below for another day (2024/06/10 MK)

In [None]:
# Define a simple template for the prompt
template = """
The following is a conversation between a friendly medical chatbot and a heart-failure patient. The chatbot collects details about symptoms and medication adherence.

Patient: {input}
Chatbot:"""

prompt = PromptTemplate(input_variables=["input"], template=template)

# Set up conversation memory
memory = ConversationBufferMemory()

# Create a function to generate responses using the model
def generate_response(model, tokenizer, device, input_text, memory):
    inputs = tokenizer(input_text, return_tensors="pt").to(device)
    outputs = model.generate(inputs.input_ids, max_length=500)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    memory.add_context("Chatbot", response)
    return response


# Set up the chatbot with LangChain
def run_chatbot(model, tokenizer, device, memory):
    print("You are chatting with a friendly medical chatbot. Type 'exit' to stop.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            break
        prompt_with_memory = memory.get_context() + f"Patient: {user_input}\nChatbot:"
        response = generate_response(model, tokenizer, device, prompt_with_memory, memory)
        print(response)


In [None]:
def look_up_patient_medication_history(patient_id):
    # Placeholder function for looking up patient medication history
    return f"Medication history for patient {patient_id}: [Placeholder data]"

def look_up_recovery_day(patient_id, discharge_date):
    # Placeholder function for looking up the recovery day
    return f"Patient {patient_id} is on day X of recovery since discharge on {discharge_date}."

# Example functions to demonstrate tool usage
def run_tools_example():
    patient_id = "12345"
    discharge_date = "2024-01-01"
    print(look_up_patient_medication_history(patient_id))
    print(look_up_recovery_day(patient_id, discharge_date))

# Run the tool examples
run_tools_example()


In [None]:
run_chatbot(model, tokenizer, device, memory)