In [111]:
%pip install pydantic_settings langchain langchain-core langchain-google-genai langchain-qdrant fastembed langchain-community qdrant-client langgraph sentence-transformers

Collecting sentence-transformers
  Obtaining dependency information for sentence-transformers from https://files.pythonhosted.org/packages/6f/ff/178f08ea5ebc1f9193d9de7f601efe78c01748347875c8438f66f5cecc19/sentence_transformers-5.0.0-py3-none-any.whl.metadata
  Downloading sentence_transformers-5.0.0-py3-none-any.whl.metadata (16 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Obtaining dependency information for transformers<5.0.0,>=4.41.0 from https://files.pythonhosted.org/packages/cf/18/eb7578f84ef5a080d4e5ca9bc4f7c68e7aa9c1e464f1b3d3001e4c642fce/transformers-4.54.1-py3-none-any.whl.metadata
  Downloading transformers-4.54.1-py3-none-any.whl.metadata (41 kB)
     ---------------------------------------- 0.0/41.7 kB ? eta -:--:--
     ------------------ ------------------- 20.5/41.7 kB 682.7 kB/s eta 0:00:01
     -------------------------------------- 41.7/41.7 kB 669.5 kB/s eta 0:00:00
Collecting torch>=1.11.0 (from sentence-transformers)
  Obtaining depen


[notice] A new release of pip is available: 23.2.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [112]:
from pydantic_settings import BaseSettings, SettingsConfigDict

class Settings(BaseSettings):
    GOOGLE_API_KEY: str
    model_config = SettingsConfigDict(env_file=".env")

env = Settings()

In [115]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [125]:
from qdrant_client.http.models import Distance

collection_name = "hackathon"
dimension = 384
distance = Distance.COSINE

In [126]:
# load doctor_final.json data
import json

with open("doctors_final.json", "r") as file:
    doctors_data = json.load(file)
    
print (doctors_data[0])

{'id': '5eae0017-40dd-4961-869f-79d9e45d87f2', 'name': 'Adventia Emilia Krysna Sipi Seda, M.M., M.Psi., Psikolog', 'specialization_name': 'Psikologi', 'specialization_name_en': 'Psychology', 'sub_specialization_name': 'Psikolog', 'sub_specialization_name_en': 'Psychologist', 'hospital_name': 'Siloam Hospitals Yogyakarta'}


In [127]:
from qdrant_client import QdrantClient

client = QdrantClient(":memory:")

In [128]:
from qdrant_client.http.models import VectorParams

if not client.collection_exists(collection_name=collection_name):
    client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=dimension, distance=distance),
    )
    print(f"Collection '{collection_name}' created.")
else:
    print(f"Collection '{collection_name}' already exists.")

Collection 'hackathon' created.


In [129]:
from qdrant_client.models import PointStruct
from sentence_transformers import SentenceTransformer
import uuid

# Step 1: Load model from Hugging Face
model = SentenceTransformer("all-MiniLM-L6-v2")  # You can change to other HF models

# Step 2: Prepare all texts from doctor data
texts = [
    f"Doctor Name: {row['name']}, Specialization: {row['specialization_name_en']}, Hospital: {row['hospital_name']}"
    for row in doctors_data
]

# Step 3: Embed all documents
embeddings = model.encode(texts, show_progress_bar=True, convert_to_numpy=True)

# Step 4: Construct point structs for Qdrant
points = []
for row, text, vector in zip(doctors_data, texts, embeddings):
    point = PointStruct(
        id=str(uuid.uuid4()),
        vector=vector.tolist(),  # ensure vector is list, not numpy array
        payload={
            "page_content": text,
            "metadata": {
                "id": row["id"],
                "name": row["name"],
                "specialization": row["specialization_name_en"],
                "hospital": row["hospital_name"],
            },
        },
    )
    points.append(point)

# Step 5: Upsert into Qdrant
client.upsert(
    collection_name=collection_name,
    points=points
)

print(f"✅ Successfully upserted {len(points)} doctor entries using Hugging Face embeddings.")


Batches: 100%|██████████| 32/32 [00:03<00:00,  9.98it/s]


✅ Successfully upserted 1000 doctor entries using Hugging Face embeddings.


In [148]:
from langchain_qdrant import QdrantVectorStore
def get_retriever():
    embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    vector_store = QdrantVectorStore(
        client=client,
        collection_name=collection_name,
        embedding=embedding,
    )
    
    return vector_store.as_retriever()

In [149]:
from langchain_core.tools import tool
from typing import Annotated, List

@tool
def search_doctor_list(query: Annotated[str, "search query must contain keywords related to doctor list"]) -> List[str]:
    """Search for doctor data by name, specialization name, or hospital name."""
    retriever = get_retriever()
    results = retriever.invoke(query, k=10)
    return [result.page_content for result in results]

In [150]:
search_doctor_list("Doctors in Siloam Yogyakarta")


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


['Doctor Name: Dokter Umum Siloam Yogyakarta, Specialization: General Practitioner, Hospital: Siloam Hospitals Yogyakarta',
 'Doctor Name: dr. Yuyun Suci Megawati, Specialization: General Practitioner, Hospital: Siloam Hospitals Yogyakarta',
 'Doctor Name: dr. Ynes Aulia Eka Damayanti, Specialization: General Practitioner, Hospital: Siloam Hospitals Yogyakarta',
 'Doctor Name: dr. Nabila Khairunisa, Specialization: General Practitioner, Hospital: Siloam Hospitals Yogyakarta',
 'Doctor Name: dr. Dirgantara Fathurrizki Harfanie, Specialization: General Practitioner, Hospital: Siloam Hospitals Yogyakarta',
 'Doctor Name: dr. Joyo Santoso, SpPD, FINASIM, Specialization: Internal Medicine, Hospital: Siloam Hospitals Yogyakarta',
 'Doctor Name: dr. Sri Haryati, MKK, Specialization: General Practitioner, Hospital: Siloam Hospitals Yogyakarta',
 'Doctor Name: dr. Hendry Kurnia Jati, Specialization: General Practitioner, Hospital: Siloam Hospitals Yogyakarta',
 'Doctor Name: dr. Rr. Parasthity 

In [151]:
# access the Google Gemini API
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    api_key=env.GOOGLE_API_KEY,
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant that provides information for helping people for finding a doctor, especially about knowing where the doctor practices."),
        ("human", "{question}"),
    ]
)

chain = prompt | llm

In [152]:
chain.invoke({"question": "Saya ingin mencari dokter penyakit dalam di Yogyakarta, ada di rumah sakit mana saja?"})

AIMessage(content='Tentu, saya bisa bantu. Dokter penyakit dalam di Yogyakarta biasanya praktik di beberapa rumah sakit besar. Berikut ini adalah beberapa rumah sakit yang mungkin memiliki dokter spesialis penyakit dalam:\n\n*   **RSUP Dr. Sardjito:** Ini adalah rumah sakit umum pusat yang besar dan memiliki banyak dokter spesialis, termasuk penyakit dalam.\n*   **RS Bethesda:** Rumah sakit swasta ini juga dikenal memiliki dokter spesialis yang kompeten di berbagai bidang.\n*   **RS Panti Rapih:** Rumah sakit swasta lainnya yang cukup besar dan memiliki berbagai dokter spesialis.\n*   **RS Siloam Yogyakarta:** Bagian dari jaringan rumah sakit Siloam, yang biasanya memiliki standar pelayanan yang baik.\n*   **RS Akademik UGM:** Rumah sakit pendidikan yang terkait dengan Universitas Gadjah Mada, yang juga memiliki dokter spesialis.\n\nUntuk mendapatkan informasi yang lebih akurat dan jadwal praktik dokter yang terkini, disarankan untuk menghubungi langsung rumah sakit yang bersangkutan. 

In [153]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
# Define state for application
class State(TypedDict):
    question: str
    context: List[str]
    search: str
    answer: str

In [154]:
def get_context(state: State):
    prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """
                You are an expert in extracting doctor search queries based on the following three fields:

                1. Doctor's full name (if mentioned)
                2. Medical specialization (e.g., cardiologist, pediatrician)
                3. Practice location (e.g., Jakarta, Siloam Kelapa Dua)
                
                Your task is to return only the relevant search keywords based on the user's question.

                If the user asks a question that includes these fields, your task is to return only the relevant search keywords based on the user's question.
                Do not return any extra explanation or unrelated information.
                

                If a field is not provided in the question, return it as an empty string.
                All values must be in English.
            """),
            ("human", "{question}"),
        ]
    )
    chain = prompt | llm
    result = chain.invoke({"question": state["question"]})
    return {"search": result.content}

In [155]:
def retrieve(state: State):
    retrieved_docs = search_doctor_list(state["search"])
    return {"context": retrieved_docs}

In [156]:
def generate(state: State):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """
                You are an assistant that extracts doctor search criteria from user questions
                to help find doctors available at Siloam Hospitals.
                You will generate resposne based on the following question and context.
                The response should be concise and focused on the doctor's name, specialization, and location.

                Context (doctor name, specializations, or locations):
                {context}
                If the context is empty, you can answer based on the question alone.
                Please ensure that the response is relevant to the question asked.
            """),
            ("human", "{question}"),
        ]
    )

    chain = prompt | llm
    result = chain.invoke({"question": state["question"], "context": state["context"]})
    return {"answer": result.content}


In [157]:
graph_builder = StateGraph(State).add_sequence([get_context, retrieve, generate])
graph_builder.add_edge(START, "get_context")
graph = graph_builder.compile()

In [159]:
response = graph.invoke({
	"question": "I want to find internal medicine doctors in Yogyakarta, can you show me all the doctors?",
	"context": [],
	"search": "",
	"answer": ""
})
print(response["answer"])

  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


Here are the internal medicine doctors in Yogyakarta:
- dr. Catharina Triwikatmani, M. Kes, SpPD-KGEH at Siloam Hospitals Yogyakarta
- dr. Meita Ucche, SpPD, Subsp. H. Onk. M (K), FINASIM at Siloam Hospitals Yogyakarta
