### Base chatbot

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

template = """Question: {question}
Answer: Let's think step by step."""

OLLAMA_DEFAULT_MODEL = os.getenv("OLLAMA_DEFAULT_MODEL")
prompt = ChatPromptTemplate.from_template(template=template)
model = OllamaLLM(model=OLLAMA_DEFAULT_MODEL, temperature=0)

chain = prompt | model
response = chain.invoke({"question": "What is 2+2"})
print(response)

Step 1: Identify the question and understand what it is asking for. In this case, the question is about performing a basic arithmetic operation - addition. The question asks us to find the sum of two numbers, which are 2 and 2.

Step 2: Recall or recall previously learned information. We know that in mathematics, when we add two numbers together, we simply need to combine them. 

Step 3: Perform the operation. Now, let's perform the addition operation on the numbers 2 and 2. 

2 + 2 = 4

So, the answer is 4.


### Retrieve agent

In [2]:
from qdrant_client import QdrantClient

if os.getenv("IS_DOCKER"):
    qdrant_host = "localhost"
else:
    qdrant_host = os.getenv("QDRANT_HOST")
    
qdrant_url = f"http://{qdrant_host}:{os.getenv('QDRANT_PORT')}"
client = QdrantClient(url=qdrant_url)


In [3]:
from langchain_ollama.embeddings import OllamaEmbeddings

user_query = "why emdee"
ollama_embedding = OllamaEmbeddings(
    model=os.getenv("OLLAMA_EMBEDDING_MODEL"),
    temperature=0
)

embedding = ollama_embedding.embed_query(user_query)

In [4]:
result = client.query_points(
    collection_name=os.getenv("QDRANT_COLLECTION"),
    query=embedding,
    limit=40
)

result.points

[ScoredPoint(id='08ce9c10-d605-4ece-a147-ba4eefe6b584', version=1, score=0.6151722, payload={'h1': 'Treatments', 'h2': 'Bagaimana jika ingin melakukan pembatalan booking?', 'h3': 'FAQ', 'source_url': 'https://emdeeclinic.com/'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id='75dfda85-3a09-4450-99ed-df5cc524b0d7', version=1, score=0.5829917, payload={'h1': 'WHY EMDEE?', 'h3': 'MISI', 'h4': 'OUR JOURNEY', 'source_url': 'https://emdeeclinic.com/'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id='5bd20c3a-f9e8-44eb-8140-1bef570b7c1f', version=1, score=0.573333, payload={'h1': 'Solusi terbaik mengatasi permasalahan kulit wajahmu', 'h3': 'Purifying Wash', 'source_url': 'https://emdeeclinic.com/'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id='fae12349-f5c9-49c4-bf07-3a43d460c321', version=1, score=0.572529, payload={'h1': 'Solusi terbaik mengatasi permasalahan kulit wajahmu', 'h3': 'Purifying Cleanser', 'source_url': 'https://emdeeclinic.

In [5]:
for point in result.points:
    print(point.payload)

{'h1': 'Treatments', 'h2': 'Bagaimana jika ingin melakukan pembatalan booking?', 'h3': 'FAQ', 'source_url': 'https://emdeeclinic.com/'}
{'h1': 'WHY EMDEE?', 'h3': 'MISI', 'h4': 'OUR JOURNEY', 'source_url': 'https://emdeeclinic.com/'}
{'h1': 'Solusi terbaik mengatasi permasalahan kulit wajahmu', 'h3': 'Purifying Wash', 'source_url': 'https://emdeeclinic.com/'}
{'h1': 'Solusi terbaik mengatasi permasalahan kulit wajahmu', 'h3': 'Purifying Cleanser', 'source_url': 'https://emdeeclinic.com/'}
{'h1': 'WHY EMDEE?', 'h3': 'MISI', 'h4': 'OUR JOURNEY', 'source_url': 'https://emdeeclinic.com/'}
{'h1': 'WHY EMDEE?', 'h3': 'MISI', 'h4': 'OUR JOURNEY', 'source_url': 'https://emdeeclinic.com/'}
{'h1': 'OPERATIONAL HOUR', 'source_url': 'https://emdeeclinic.com/'}
{'h1': 'Solusi terbaik mengatasi permasalahan kulit wajahmu', 'h3': 'Moist Oil Free', 'source_url': 'https://emdeeclinic.com/'}
{'h1': 'OPERATIONAL HOUR', 'source_url': 'https://emdeeclinic.com/'}
{'h2': 'SEE OUR RESULT', 'h3': 'YOUR FEEDBAC

In [6]:
template = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""

prompt = ChatPromptTemplate.from_template(template=template)
chain = prompt | model
response = chain.invoke({"question": user_query, "context": result})
print(response)


The answer is that the user has a list of ScoredPoints with various scores and corresponding payloads. The ScoredPoints are sorted by their score in descending order, meaning the points with the highest score appear first. In this case, the top three ScoredPoints have the following scores and payloads:

1. ScoredPoint(id='0835f371-60d1-49c2-9f7e-7f3fdedd343d', score=0.41610926, payload={'h1': 'OPERATIONAL HOUR', 'h2': 'MALANG', 'source_url': 'https://emdeeclinic.com/'})
2. ScoredPoint(id='7b9707da-06d9-4983-b37d-6ae7bf0235e0', score=0.38825145, payload={'h1': 'OPERATIONAL HOUR', 'h3': 'BB Sunprotect Care - White', 'source_url': 'https://emdeeclinic.com/'})
3. ScoredPoint(id='7716056d-758c-472b-94f5-de0a65408e36', score=0.39730206, payload={'h1': 'OPERATIONAL HOUR', 'source_url': 'https://emdeeclinic.com/'})

These three ScoredPoints have the highest scores and are therefore considered to be the most relevant results for the given query.


### Using reranker

In [7]:
def extract_content(payload):
    """
    Flattens the hierarchical payload into a single string 
    and keeps a dictionary of specific fields.
    """
    fields = ['h1', 'h2', 'h3', 'h4', 'h5']
    
    full_text = " ".join([str(payload.get(k, '')) for k in fields if k in payload])
    
    return {
        'full_text': full_text.strip(),
        'h1': payload.get('h1', ''),
        'h2': payload.get('h2', ''),
        'content_fields': payload 
    }

In [8]:
import pandas as pd
from rapidfuzz import fuzz

class RerankerFeatureExtractor:
    def __init__(self):
        pass

    def compute_features(self, query: str, doc_data: dict, original_score: float):
        """
        Creates a dictionary of numerical features for a (Query, Doc) pair.
        """
        q_str = query.lower()
        full_text = doc_data['full_text'].lower()
        h1_text = str(doc_data.get('h1', '')).lower()
        h2_text = str(doc_data.get('h2', '')).lower()
        
        features = {}
        
        # --- Feature Group 1: The Original Signal ---
        features['qdrant_score'] = original_score
        
        # --- Feature Group 2: Lengths ---
        features['query_len'] = len(q_str)
        features['doc_len'] = len(full_text)
        
        # --- Feature Group 3: Lexical Overlap (Exact Matches) ---
        # Does the specific query word appear in the text?
        features['word_overlap_count'] = sum(1 for word in q_str.split() if word in full_text)
        
        # --- Feature Group 4: Structural Importance (Header Matching) ---
        features['match_in_h1'] = 1 if q_str in h1_text and h1_text else 0
        features['match_in_h2'] = 1 if q_str in h2_text and h2_text else 0
        
        # --- Feature Group 5: Fuzzy Matching (Typos/Partial) ---
        features['fuzzy_ratio'] = fuzz.ratio(q_str, full_text)
        features['fuzzy_partial_ratio'] = fuzz.partial_ratio(q_str, full_text)
        features['fuzzy_token_sort'] = fuzz.token_sort_ratio(q_str, full_text)
        
        return features

    def transform(self, query: str, scored_points):
        X_list = []
        for point in scored_points:
            doc_data = extract_content(point.payload)
            feats = self.compute_features(query, doc_data, point.score)
            X_list.append(feats)
        return pd.DataFrame(X_list)

In [9]:
import numpy as np

def generate_training_data():
    print("Generating dummy training data...")
    extractor = RerankerFeatureExtractor()
    
    # We create a fake dataset: (Query, Document, Label)
    # Label 1 = Relevant, Label 0 = Irrelevant
    train_queries = ["cancel booking", "operational hour", "skin treatment"]
    
    X_train = []
    y_train = []
    groups = [] # XGBoost needs to know which rows belong to the same query
    
    for q in train_queries:
        # We assume we have 5 docs per query for training
        q_group_size = 0
        
        # Create a "Relevant" doc (high overlap)
        good_doc = {'full_text': f"Details about {q}", 'h1': q, 'h2': 'details'}
        feat_good = extractor.compute_features(q, good_doc, 0.9)
        X_train.append(feat_good)
        y_train.append(1) # Label 1
        q_group_size += 1
        
        # Create "Irrelevant" docs (random noise)
        for _ in range(4):
            bad_doc = {'full_text': "Random text unrelated", 'h1': 'Misc', 'h2': 'Info'}
            feat_bad = extractor.compute_features(q, bad_doc, 0.4)
            X_train.append(feat_bad)
            y_train.append(0) # Label 0
            q_group_size += 1
            
        groups.append(q_group_size)

    return pd.DataFrame(X_train), np.array(y_train), np.array(groups)

X_train, y_train, groups = generate_training_data()

Generating dummy training data...


In [10]:
display(X_train.head())
display(y_train[:5])
groups[:5]

Unnamed: 0,qdrant_score,query_len,doc_len,word_overlap_count,match_in_h1,match_in_h2,fuzzy_ratio,fuzzy_partial_ratio,fuzzy_token_sort
0,0.9,14,28,2,1,0,66.666667,100.0,66.666667
1,0.4,14,21,0,0,0,28.571429,35.714286,28.571429
2,0.4,14,21,0,0,0,28.571429,35.714286,28.571429
3,0.4,14,21,0,0,0,28.571429,35.714286,28.571429
4,0.4,14,21,0,0,0,28.571429,35.714286,28.571429


array([1, 0, 0, 0, 0])

array([5, 5, 5])

In [11]:
import xgboost as xgb

ranker = xgb.XGBRanker(
    tree_method="hist", 
    lambdarank_pair_method="topk",
    objective="rank:ndcg", 
    eval_metric="ndcg",
    learning_rate=0.1,
    n_estimators=100
)

ranker.fit(X_train, y_train, group=groups)

print("\nXGBoost Reranker trained successfully!")
print("Feature Importances:", ranker.feature_importances_)


XGBoost Reranker trained successfully!
Feature Importances: [1. 0. 0. 0. 0. 0. 0. 0. 0.]


In [12]:
def rerank_results(user_query: str, qdrant_results, model: xgb.XGBRanker):
    if not qdrant_results:
        return []

    # 1. Feature Extraction
    extractor = RerankerFeatureExtractor()
    features_df = extractor.transform(user_query, qdrant_results)
    
    # 2. Predict Scores
    # XGBoost outputs a raw score (logit). Higher is better.
    rerank_scores = model.predict(features_df)
    
    # 3. Attach new scores to results
    reranked_results = []
    for i, point in enumerate(qdrant_results):
        # We create a clean dict or object for the final output
        reranked_results.append({
            "original_id": point.id,
            "payload": point.payload,
            "original_score": point.score,
            "reranker_score": float(rerank_scores[i]), # The XGBoost score
            "features_used": features_df.iloc[i].to_dict() # Optional: for debugging
        })
        
    # 4. Sort by new reranker score (Descending)
    reranked_results.sort(key=lambda x: x['reranker_score'], reverse=True)
    
    return reranked_results

In [14]:
user_query = "why emdee"

# Run the reranker
final_results = rerank_results(user_query, result.points, ranker)

# Display Top 3 Results
print(f"\n--- Top 3 Results for query: '{user_query}' ---")
for i, res in enumerate(final_results[:3]):
    print(f"\nRank {i+1}:")
    print(f"  ID: {res['original_id']}")
    print(f"  Original Qdrant Score: {res['original_score']:.4f}")
    print(f"  New XGBoost Score:     {res['reranker_score']:.4f}")
    print(f"  H1: {res['payload'].get('h1')}")
    print(f"  H2: {res['payload'].get('h2')}")


--- Top 3 Results for query: 'why emdee' ---

Rank 1:
  ID: 08ce9c10-d605-4ece-a147-ba4eefe6b584
  Original Qdrant Score: 0.6152
  New XGBoost Score:     -0.9236
  H1: Treatments
  H2: Bagaimana jika ingin melakukan pembatalan booking?

Rank 2:
  ID: 75dfda85-3a09-4450-99ed-df5cc524b0d7
  Original Qdrant Score: 0.5830
  New XGBoost Score:     -0.9236
  H1: WHY EMDEE?
  H2: None

Rank 3:
  ID: 5bd20c3a-f9e8-44eb-8140-1bef570b7c1f
  Original Qdrant Score: 0.5733
  New XGBoost Score:     -0.9236
  H1: Solusi terbaik mengatasi permasalahan kulit wajahmu
  H2: None


In [15]:
import os
from dotenv import load_dotenv

load_dotenv()

from langchain_community.chat_models import ChatDeepInfra
from langchain_core.prompts import ChatPromptTemplate

judge = ChatDeepInfra(name="openai/gpt-oss-20b", temperature=0, deepinfra_api_token=os.getenv("DEEPINFRA_API_TOKEN"))
def ask_llm_to_judge(query, document_text):
    template = """
    Query: {query}
    Document: {document_text}
    
    Task: Is this document relevant to the query? 
    Reply ONLY with 'YES' or 'NO'.
    """

    prompt = ChatPromptTemplate.from_template(template=template)
    chain = prompt | judge
    
    # --- REAL LOGIC WOULD GO HERE ---
    response = chain.invoke({
        "query": query, 
        "document_text": document_text
    })

    print(response)

ask_llm_to_judge(user_query, result.points)

content='YES' additional_kwargs={} response_metadata={'token_usage': {'prompt_tokens': 3544, 'total_tokens': 3546, 'completion_tokens': 2, 'estimated_cost': 0.0014184000000000002, 'prompt_tokens_details': None}, 'model': 'meta-llama/Llama-2-70b-chat-hf', 'finish_reason': 'stop'} id='lc_run--b9f22809-e7f5-4191-8be7-c248a6d8e82e-0'
