In [76]:
from huggingface_hub import login
import os
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain.retrievers.document_compressors import CrossEncoderReranker
from sentence_transformers import CrossEncoder
from dotenv import load_dotenv
import torch

In [43]:
load_dotenv()

True

In [44]:
token = os.getenv("HF_TOKEN")

login(token=token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [64]:
embedding_id = "distiluse-base-multilingual-cased-v2"

embeddings = HuggingFaceEmbeddings(model_name=embedding_id, cache_folder="./model_cache")

In [77]:
reranker_id = "cross-encoder/ms-marco-MiniLM-L-6-v2"

# reranker = CrossEncoder(model_name=reranker_id, cache_dir="./model_cache")
reranker = HuggingFaceCrossEncoder(model_name=reranker_id, model_kwargs={'cache_dir' : "./model_cache"})

In [85]:
compressor = CrossEncoderReranker(model=reranker)

### Chroma

In [63]:
from langchain_chroma import Chroma
from chromadb.config import Settings


In [65]:
host = "localhost"
port = "8000"

vector_store = Chroma(
                collection_name="drugs",
                embedding_function=embeddings,
                client_settings=Settings(
                    chroma_api_impl="chromadb.api.fastapi.FastAPI",
                    chroma_server_host=host,
                    chroma_server_http_port=port
                )
            )

In [80]:
retriever = vector_store.as_retriever(
            search_type="similarity",
            search_kwargs={"k" : 10}
        )

In [None]:
query = "Saya mengalami sakit kepala, mual, dan demam. Obat apa yang bisa saya konsumsi?"

In [83]:
results = retriever.invoke(query, k=10)
results

[Document(id='Isordil Titradose_32', metadata={'drug_name': 'Isordil Titradose', 'source': 'drugs.com'}, page_content='headache, fever, confusion, severe dizziness, fast or pounding heartbeats, vision problems, nausea, vomiting, stomach pain, bloody diarrhea, trouble breathing, sweating, cold or clammy skin, fainting, and seizure (convulsions). What should I avoid while taking Isordil Titradose? This medicine may'),
 Document(id='Advil Migraine_29', metadata={'source': 'drugs.com', 'drug_name': 'Advil Migraine'}, page_content='drowsiness, black or bloody stools, coughing up blood, shallow breathing, fainting, or coma. What should I avoid while taking Advil Migraine? Ask a doctor or pharmacist before using other medicines for pain, fever, swelling, or cold/flu symptoms. They may contain ingredients similar to Advil'),
 Document(id='Dyphylline and guaifenesin_26', metadata={'drug_name': 'Dyphylline and guaifenesin', 'source': 'drugs.com'}, page_content='ears, sweating, feeling hot, slow 

In [86]:
reranked_results = compressor.compress_documents(results, query)
reranked_results

[Document(id='Isosorbide dinitrate_33', metadata={'source': 'drugs.com', 'drug_name': 'Isosorbide dinitrate'}, page_content='throbbing headache, fever, confusion, severe dizziness, fast or pounding heartbeats, vision problems, nausea, vomiting, stomach pain, bloody diarrhea, trouble breathing, sweating, cold or clammy skin, fainting, and seizure (convulsions). What should I avoid while taking isosorbide dinitrate? This'),
 Document(id='Isordil Titradose_32', metadata={'drug_name': 'Isordil Titradose', 'source': 'drugs.com'}, page_content='headache, fever, confusion, severe dizziness, fast or pounding heartbeats, vision problems, nausea, vomiting, stomach pain, bloody diarrhea, trouble breathing, sweating, cold or clammy skin, fainting, and seizure (convulsions). What should I avoid while taking Isordil Titradose? This medicine may'),
 Document(id='Meclofenamate_26', metadata={'drug_name': 'Meclofenamate', 'source': 'drugs.com'}, page_content='medical attention or call the Poison Help l

### LLM Inference

In [45]:
import torch
from transformers import pipeline, BitsAndBytesConfig

In [46]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
# model_id = "meta-llama/Llama-3.2-1B-Instruct"
model_id = "meta-llama/Llama-3.2-3B-Instruct"
pipe = pipeline(
    "text-generation",
    model=model_id,
    device_map="auto",
    model_kwargs={
        'cache_dir' : './model_cache',
        'quantization_config' : quantization_config
    },
    max_new_tokens=512
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [47]:
llm = HuggingFacePipeline(pipeline=pipe)

#### Normal Inference

In [17]:
from langchain_core.prompts import PromptTemplate

template = """
Answer this question in one paragraph.
Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

chain = prompt | llm

question = "What is electroencephalography?"

response = chain.invoke({"question": question})

print(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Answer this question in one paragraph.
Question: What is electroencephalography?

Answer: Let's think step by step. We are asked to describe what electroencephalography is. First, we need to break down the term "electroencephalography". The term can be broken down into three parts: "electro", "encephalo", and "graphy". "Electro" refers to electricity, "encephalo" means brain, and "graphy" means writing. Therefore, electroencephalography can be translated to "the writing of the brain's electricity". Now that we know the meaning of each part of the term, we can combine them to get the full definition of electroencephalography. This definition is the "writing" or recording of the electrical activity of the brain. This is typically done using electrodes placed on the scalp, which measure the electrical impulses produced by the brain's neurons. The resulting data is then written onto a graph, allowing researchers to study the brain's electrical activity. In simple terms, electroencephalogr

In [48]:
from langchain_huggingface import ChatHuggingFace

chat = ChatHuggingFace(llm=llm, verbose=True)

In [49]:
from langchain_core.messages import (
    HumanMessage,
    SystemMessage,
)

messages = [
    SystemMessage(content="You're a helpful assistant"),
    HumanMessage(
        content="What happens when an unstoppable force meets an immovable object?"
    ),
]

ai_msg = chat.invoke(messages)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [50]:
ai_msg

AIMessage(content='<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 20 May 2025\n\nYou\'re a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat happens when an unstoppable force meets an immovable object?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nA classic reference to the famous cartoon paradox.\n\nThe concept of an "unstoppable force" and an "immovable object" is often used to illustrate a thought experiment in physics and philosophy. In this scenario, we have two opposing forces that cannot be stopped or moved by any means.\n\nAccording to the laws of physics, when an unstoppable force meets an immovable object, the outcome is often depicted as a paradoxical situation. Here are a few possible interpretations:\n\n1. **The laws of physics break down**: In this scenario, the fundamental laws of physics, such as Newton\'s laws of motion, no longer apply. The unstoppable force and im

#### Tool Calling

In [53]:
from langchain.tools import tool

@tool
def multiply(x: int, y: int) -> int:
    """Multiplies two numbers."""
    return x * y

chat_with_tools = chat.bind_tools([multiply])

In [54]:
result = chat_with_tools.invoke("What is 2 multiply by 3?")
print(result)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


content='<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 20 May 2025\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is 2 multiply by 3?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n2 x 3 = 6' additional_kwargs={} response_metadata={} id='run--e76639f0-e82f-4edd-b247-a0ac9c38376e-0'


In [107]:
tavily_key = os.getenv("TAVILY_API_KEY")

if not os.environ.get("TAVILY_API_KEY"):
    os.environ["TAVILY_API_KEY"] = tavily_key

In [108]:
from langgraph.prebuilt import create_react_agent
from langchain_tavily import TavilySearch

tavily_search_tool = TavilySearch(
    max_results=5,
    topic="general",
)


In [None]:

agent = create_react_agent(chat, [tavily_search_tool])

user_input = "What nation hosted the Euro 2024? Include only wikipedia sources."

for step in agent.stream(
    {"messages": user_input},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()

#### Message History

In [88]:
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory

In [89]:
from langchain_core.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder,
)

template = """Anda adalah seorang asisten medis yang ahli dalam memberikan rekomandasi obat.
        Berdasarkan pertanyaan yang  Rekomendasi obat yang diberikan harus berisi informasi terkait nama obat, deskripsi kegunaan obat, dosis, dan efek samping obat.
        Berikan rekomendasi dengan maksimum 2 paragraf.
        Terakhir, tolong kasih tahu ke pengguna bahwa jika dalam waktu 3 hari pengguna masih mengalami gejala yang dialami, maka segera konsultasi ke dokter.

        Konteks: {context}
        """
        # Jika kamu tidak mengetahui terkait informasi obat yang diberikan, maka kamu cukup bilang tidak mengetahuinya.

prompt = ChatPromptTemplate.from_messages([
    ("system", template),
    MessagesPlaceholder(variable_name="history"),
    ("user", "{query}")
])

In [94]:
message_history = MongoDBChatMessageHistory(
    session_id="test_session",
    connection_string="mongodb://otw_kos:nowel@localhost:27017",
    database_name="drugbot",
    collection_name="test_histories",
)

In [109]:
chat_with_tools = chat.bind_tools([multiply, tavily_search_tool])

In [110]:
chain = prompt | chat_with_tools.bind(skip_prompt=True)

In [112]:
chain_with_history = RunnableWithMessageHistory(
    chain,
    lambda session_id: MongoDBChatMessageHistory(
        session_id=session_id,
        connection_string="mongodb://otw_kos:nowel@localhost:27017",
        database_name="drugbot",
        collection_name="test_histories",
    ),
    input_messages_key="query",
    history_messages_key="history",
)

In [113]:
query = "Saya mengalami sakit kepala, mual, dan demam. Obat apa yang bisa saya konsumsi?"

In [114]:
response = chain_with_history.invoke(
    {"query" : query, "context" : []},
    {"configurable" : {"session_id" : "123"}}
)


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [115]:
response.content

'Saya tidak bisa memberikan rekomendasi medis tanpa mempelajari lebih lanjut tentang kondisi kesehatan Anda. Namun, saya dapat memberikan beberapa informasi umum tentang obat-obatan yang biasanya digunakan untuk mengobati gejala-gejala yang Anda alami.\n\nUntuk sakit kepala, demam, dan mual, beberapa obat-obatan yang bisa Anda pertimbangkan adalah:\n\n- Parasetamol (dikenal juga sebagai acetaminophen) atau ibuprofen, yang dapat membantu mengurangi demam dan sakit kepala. Namun, pastikan untuk mengikuti dosis yang disarankan dan tidak melebihi dosis yang direkomendasikan.\n- Anti-mual (seperti metoclopramide atau domperidone), yang dapat membantu mengurangi mual dan muntah.\n\nNamun, perlu diingat bahwa gejala-gejala yang Anda alami dapat disebabkan oleh berbagai penyakit, dan diagnosis yang tepat harus dilakukan oleh dokter. Oleh karena itu, sangat disarankan untuk berkonsultasi dengan dokter sebelum mengonsumsi obatan apa pun.\n\nJika Anda masih mengalami gejala-gejala tersebut setela