### Base chatbot

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

from langchain_core.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatDeepInfra

template = """Question: {question}
Answer: Let's think step by step."""

DEEPINFRA_CHAT_MODEL = os.getenv("DEEPINFRA_CHAT_MODEL")
DEEPINFRA_API_TOKEN = os.getenv("DEEPINFRA_API_TOKEN")
prompt = ChatPromptTemplate.from_template(template=template)
model = ChatDeepInfra(model=DEEPINFRA_CHAT_MODEL, temperature=0, deepinfra_api_token=DEEPINFRA_API_TOKEN)

chain = prompt | model
response = chain.invoke({"question": "What is 2+2"})
print(response.content)

  from .autonotebook import tqdm as notebook_tqdm


**Step 1:** Identify the numbers you need to add.  
- The first number is **2**.  
- The second number is **2**.

**Step 2:** Perform the addition.  
- Add the first number to the second: \(2 + 2\).

**Step 3:** Calculate the result.  
- \(2 + 2 = 4\).

**Answer:** The sum of 2 and 2 is **4**.


### Retrieve agent

In [2]:
from qdrant_client import QdrantClient

if os.getenv("IS_DOCKER"):
    qdrant_host = "localhost"
else:
    qdrant_host = os.getenv("QDRANT_HOST")
    
qdrant_url = f"http://{qdrant_host}:{os.getenv('QDRANT_PORT')}"
client = QdrantClient(url=qdrant_url)

In [3]:
from langchain_community.embeddings import DeepInfraEmbeddings

user_query = "berapa harga face peeling?"
deepinfra_embedding = DeepInfraEmbeddings(
    model_id=os.getenv("DEEPINFRA_EMBEDDING_MODEL"),
    embed_instruction="",
    query_instruction=""
)

embedding = deepinfra_embedding.embed_query(user_query)

In [4]:
result = client.query_points(
    collection_name=os.getenv("QDRANT_COLLECTION"),
    query=embedding,
    limit=50
)

In [5]:
template = """Anda adalah asisten AI untuk klinik kecantikan (ERHA/Dermies).
Gunakan konteks berikut untuk menjawab pertanyaan pengguna dengan akurat dan profesional.

Aturan:
1. Jika konteks tidak memuat jawaban, katakan "Maaf, informasi tersebut tidak tersedia dalam dokumen kami."
2. Jawablah dalam Bahasa Indonesia yang sopan.
3. Usahakan jawaban singkat dan padat (maksimal 3-4 kalimat).
4. Jika menyebutkan harga, pastikan sesuai dengan yang tertulis di dokumen.

Pertanyaan: {question} 

Konteks: 
{context} 

Jawaban:
"""

prompt = ChatPromptTemplate.from_template(template=template)
chain = prompt | model
response = chain.invoke({"question": user_query, "context": result})
print(response)


content='Harga face peeling di klinik kami mulai dari **Rp525.000** per sesi.  \nHarga tersebut berlaku untuk berbagai jenis peeling wajah seperti Derma Peeling, Acne Peeling, dan Light Peeling.  \nSilakan hubungi kami untuk konsultasi dan penyesuaian harga sesuai area dan kebutuhan kulit Anda.' additional_kwargs={} response_metadata={'token_usage': {'prompt_tokens': 18376, 'total_tokens': 18579, 'completion_tokens': 203, 'estimated_cost': 0.0005797, 'prompt_tokens_details': None}, 'model': 'openai/gpt-oss-20b', 'finish_reason': 'stop'} id='lc_run--cc111916-3c25-495d-b0fa-a79420ee8415-0'


### Using reranker

In [6]:
import mlflow

tracking_uri = "http://127.0.0.1:5050"

mlflow.set_tracking_uri(tracking_uri)
client = mlflow.tracking.MlflowClient(tracking_uri=tracking_uri, registry_uri=tracking_uri)

versions = client.get_latest_versions("XGBoostReranker", stages=["Staging"])
latest_version = versions[0].version
MODEL_URI = f"models:/XGBoostReranker/{latest_version}"
reranker_model = mlflow.xgboost.load_model(MODEL_URI)

  versions = client.get_latest_versions("XGBoostReranker", stages=["Staging"])
Downloading artifacts: 100%|██████████| 8/8 [00:00<00:00, 12.71it/s]


In [7]:
import pandas as pd

data = []
for point in result.points:
    payload = point.payload
    data.append({
        "query_text": user_query,
        "doc_id": point.id,
        "full_text": payload.get('full_text', ''),
        "h1": payload.get('h1', ''),
        "qdrant_score": point.score,
        "payload": payload
    })

df_candidates = pd.DataFrame(data)
df_candidates.head()

Unnamed: 0,query_text,doc_id,full_text,h1,qdrant_score,payload
0,berapa harga face peeling?,85700d24-cdf6-4da3-9c1c-715698d22bf1,Derma Peeling for Face\n\nPeeling wajah yang b...,724126666-ERHA-Ultimate-Pricelist-24.pdf,0.563953,{'full_text': 'Derma Peeling for Face Peeling...
1,berapa harga face peeling?,7d5e9045-37ae-4260-9bc7-ba9f483c4759,ACNE CENTER\n\nSINGLE TREATMENT\n\nAcne Peelin...,724126666-ERHA-Ultimate-Pricelist-24.pdf,0.544131,{'full_text': 'ACNE CENTER SINGLE TREATMENT ...
2,berapa harga face peeling?,0d3732ec-5d84-4387-a339-3372fe9cf509,"MANFAAT\n\nKulit wajah menjadi lebih halus, se...",724126666-ERHA-Ultimate-Pricelist-24.pdf,0.542678,{'full_text': 'MANFAAT Kulit wajah menjadi le...
3,berapa harga face peeling?,e49cf3e0-28dc-4ce1-8612-1ee18ef39d0d,DERMIAS MAX\n\nSingle Advanced Prime Purifying...,661627558-Katalog-Dermies-Max-by-Erha-Hiress.pdf,0.534818,{'full_text': 'DERMIAS MAX Single Advanced Pr...
4,berapa harga face peeling?,a0991df4-63ec-4d52-b356-5471a0ff544c,e Jelly Ice Facial for Acne\n\nStart From Rp 1...,661627558-Katalog-Dermies-Max-by-Erha-Hiress.pdf,0.515347,{'full_text': 'e Jelly Ice Facial for Acne St...


In [8]:
from rapidfuzz import fuzz

features = pd.DataFrame()
        
# Normalize text
df_candidates['q_lower'] = df_candidates['query_text'].astype(str).str.lower()
df_candidates['doc_lower'] = df_candidates['full_text'].astype(str).str.lower()
df_candidates['h1_lower'] = df_candidates['h1'].astype(str).str.lower()

# 1. Vector Score
features['qdrant_score'] = df_candidates['qdrant_score']

# 2. Lengths
features['doc_len'] = df_candidates['doc_lower'].apply(len)
features['query_len'] = df_candidates['q_lower'].apply(len)

# 3. Word Overlap
def word_overlap(row):
    q_tokens = set(row['q_lower'].split())
    d_tokens = set(row['doc_lower'].split())
    if not q_tokens: return 0.0
    return len(q_tokens.intersection(d_tokens)) / len(q_tokens)
features['word_overlap'] = df_candidates.apply(word_overlap, axis=1)

# 4. Header Match
features['match_in_h1'] = df_candidates.apply(
    lambda x: fuzz.partial_ratio(x['q_lower'], x['h1_lower']), axis=1
)

# 5. Fuzzy Match
features['fuzzy_ratio'] = df_candidates.apply(
    lambda x: fuzz.ratio(x['q_lower'], x['doc_lower'][:500]), axis=1
)

# 6. Price Heuristic
def price_relevance(row):
    is_price_query = any(w in row['q_lower'] for w in ['harga', 'biaya', 'price', 'rp'])
    has_price_info = 'rp' in row['doc_lower'] or 'rp.' in row['doc_lower']
    return 1 if (is_price_query and has_price_info) else 0
features['is_price_match'] = df_candidates.apply(price_relevance, axis=1)
features.head()

Unnamed: 0,qdrant_score,doc_len,query_len,word_overlap,match_in_h1,fuzzy_ratio,is_price_match
0,0.563953,1065,26,0.25,38.461538,8.745247,1
1,0.544131,874,26,0.25,38.461538,9.505703,1
2,0.542678,843,26,0.25,38.461538,9.505703,1
3,0.534818,1000,26,0.25,36.363636,8.745247,1
4,0.515347,1182,26,0.0,36.363636,8.745247,1


In [9]:
scores = reranker_model.predict(features)
scores

array([-0.7605543 , -0.09042907, -0.05367904, -2.290413  , -1.2410139 ,
       -1.6172218 , -1.5081747 , -2.0079057 , -2.274274  , -2.3299387 ,
       -2.2524097 , -1.3685997 , -1.9656116 , -1.430646  , -1.5279803 ,
       -1.3350501 , -1.7380353 , -1.8490493 , -1.3685997 , -1.8105885 ,
       -1.7330595 , -1.430646  , -1.3887334 , -2.3299387 , -1.6099395 ,
       -1.3205595 , -2.1246567 , -1.939732  , -2.2740028 , -2.2836294 ,
       -1.9686649 , -1.939732  , -1.6648858 , -1.939732  , -1.939732  ,
       -1.6099395 , -2.1964738 , -1.549048  , -1.362472  , -2.2061005 ,
       -1.4400009 , -1.9699906 , -1.4715192 , -1.2765802 , -1.6648858 ,
       -2.170698  , -1.9686649 , -1.4400009 , -2.261765  , -2.184236  ],
      dtype=float32)

In [10]:
df_candidates['rerank_score'] = scores
df_candidates = df_candidates.sort_values(by='rerank_score', ascending=False)
df_candidates.head()
reranked_results = df_candidates.to_dict(orient='records')
top_docs = reranked_results[:5]
top_docs

[{'query_text': 'berapa harga face peeling?',
  'doc_id': '0d3732ec-5d84-4387-a339-3372fe9cf509',
  'full_text': 'MANFAAT\n\nKulit wajah menjadi lebih halus, sehat, kenyal, cerah, dan terlihat segar.\n\n&)\n\nMulai dari Rp525.000\n\nPROSEDUR\n\nCleansing - Anestesi- Treatment - Post treatment\n\na)\n\n69\n\nERHA ULTIMATE\n\nMAKE OVER CENTER\n\nSINGLE TREATMENT\n\nLight Peeling for Face\n\nPeeling wajah menggunakan cairan peeling dengan konsentrasi ringan yang dapat mengangkat sel kulit mati, memperbaiki tekstur kulit, dan menyamarkan kerut dan garis halus serta meratakan warna kulit. Kulit wajah akan menjadi lebih halus, sehat, cerah, dan segar.\n\nMANFAAT\n\n30 menitanestesi 20 menit treatment\n\n“T)\n\nMembantu mengatasi kulit kusam dan tanda-tanda penuaan. Kulit wajah akan menjadi lebih halus, sehat, cerah, dan segar.\n\n@) Rp525.000\n\nPROSEDUR\n\nCleansing - Anestesi- Treatment - Post treatment\n\n30 menit anestesi 15-30 menit treatment\n\n‘T)\n\n(tergantung luas area treatment)\n

In [11]:
template = """Anda adalah asisten AI untuk klinik kecantikan (ERHA/Dermies).
Gunakan konteks berikut untuk menjawab pertanyaan pengguna dengan akurat dan profesional.

Aturan:
1. Jika konteks tidak memuat jawaban, katakan "Maaf, informasi tersebut tidak tersedia dalam dokumen kami."
2. Jawablah dalam Bahasa Indonesia yang sopan.
3. Usahakan jawaban singkat dan padat (maksimal 3-4 kalimat).
4. Jika menyebutkan harga, pastikan sesuai dengan yang tertulis di dokumen.

Pertanyaan: {question} 

Konteks: 
{context} 

Jawaban:
"""

prompt = ChatPromptTemplate.from_template(template=template)
model = ChatDeepInfra(model=DEEPINFRA_CHAT_MODEL, temperature=0, deepinfra_api_token=DEEPINFRA_API_TOKEN)
chain = prompt | model

print("Generating Answer")
response = chain.invoke({"question": user_query, "context": top_docs})
response.content

Generating Answer


'Harga face peeling di klinik kami mulai dari **Rp\u202f525.000**.'