# Demo: Medical Concept Embedding Evaluation

This notebook demonstrates a minimal, self-contained evaluation pipeline for medical concept embeddings. It shows:
- Creating a small sample dataset (queries + candidate synonyms + relevance)
- Using SentenceTransformers or OpenAI Embeddings as embedding providers (if available)
- Computing candidate embeddings, ranking by cosine similarity
- Computing NDCG@k, MSE, and Spearman correlation per query and aggregated results

Notes:
- If you plan to run OpenAI embeddings, set the environment variable OPENAI_API_KEY first.
- The notebook is intentionally self-contained so you can run it even if you don't have the rest of the repo.

## Install dependencies (run in a notebook cell)

If you don't have the libraries already, uncomment and run the following cell. This may take a few minutes for the sentence-transformers model download.

In [None]:
# !pip install -q sentence-transformers openai scipy scikit-learn pandas numpy
# If you use Pinecone elsewhere: !pip install -q pinecone-client
pass

In [None]:
import os
import time
from typing import List, Any

import numpy as np
import pandas as pd
from scipy.stats import spearmanr
from sklearn.metrics import mean_squared_error

try:
    from sentence_transformers import SentenceTransformer
except Exception:
    SentenceTransformer = None

try:
    import openai
except Exception:
    openai = None

def dcg_at_k(relevances, k):
    relevances = np.asarray(relevances)[:k]
    if relevances.size == 0:
        return 0.0
    discounts = np.log2(np.arange(2, relevances.size + 2))
    gains = (2 ** relevances - 1) / discounts
    return float(np.sum(gains))

def ndcg_at_k_per_query(true_rels, pred_scores, k):
    if len(true_rels) == 0:
        return 0.0
    order = np.argsort(pred_scores)[::-1]
    pred_ordered_rels = np.asarray(true_rels)[order]
    dcg = dcg_at_k(pred_ordered_rels.tolist(), k)
    ideal_order = np.sort(true_rels)[::-1]
    idcg = dcg_at_k(ideal_order.tolist(), k)
    return 0.0 if idcg == 0.0 else dcg / idcg

class SentenceTransformersEmbedder:
    def __init__(self, model_name_or_path: str):
        if SentenceTransformer is None:
            raise RuntimeError("sentence-transformers not installed")
        print(f"Loading sentence-transformers model: {model_name_or_path}")
        self.model = SentenceTransformer(model_name_or_path)

    def encode(self, texts: List[str], batch_size: int = 64, show_progress_bar: bool = True) -> np.ndarray:
        emb = self.model.encode(texts, batch_size=batch_size, show_progress_bar=show_progress_bar)
        arr = np.asarray(emb, dtype=np.float32)
        norms = np.linalg.norm(arr, axis=1, keepdims=True)
        norms[norms == 0] = 1.0
        arr = arr / norms
        return arr

class OpenAIEmbedder:
    def __init__(self, model_name: str = "text-embedding-3-small"):
        if openai is None:
            raise RuntimeError("openai package not installed")
        self.model = model_name
        api_key = os.environ.get("OPENAI_API_KEY")
        if not api_key:
            raise RuntimeError("OPENAI_API_KEY environment variable not set")
        openai.api_key = api_key

    def encode(self, texts: List[str], batch_size: int = 64, show_progress_bar: bool = False) -> np.ndarray:
        embeddings = []
        for i in range(0, len(texts), batch_size):
            batch = texts[i : i + batch_size]
            resp = openai.Embedding.create(model=self.model, input=batch)
            batch_embs = [d['embedding'] for d in resp['data']]
            embeddings.extend(batch_embs)
            time.sleep(0.1)
        arr = np.asarray(embeddings, dtype=np.float32)
        norms = np.linalg.norm(arr, axis=1, keepdims=True)
        norms[norms == 0] = 1.0
        arr = arr / norms
        return arr

def build_embedding_encoder(model_name: str, model_type: str):
    mt = (model_type or "").lower()
    if mt in {"openai", "openai-embeddings"}:
        return OpenAIEmbedder(model_name)
    if mt in {"sentence-transformers", "sentencetransformer", "hf", "huggingface"}:
        return SentenceTransformersEmbedder(model_name)
    # fallback: assume model_name is a sentence-transformers model path
    return SentenceTransformersEmbedder(model_name)

def evaluate(df: pd.DataFrame, encoder: Any, ndcg_k: int = 10, batch_size: int = 64, query_id_col: str = "query_id", query_col: str = "query_text", candidate_col: str = "candidate_text", relevance_col: str = "relevance"):
    # unique queries
    queries = df[[query_id_col, query_col]].drop_duplicates(subset=[query_id_col]).set_index(query_id_col)[query_col].to_dict()
    candidates = df[[candidate_col]].drop_duplicates().reset_index(drop=True)[candidate_col].tolist()
    cand_idx = {text: i for i, text in enumerate(candidates)}
    query_texts = list(queries.values())
    query_ids = list(queries.keys())
    query_emb = encoder.encode(query_texts, batch_size=batch_size)
    candidate_emb = encoder.encode(candidates, batch_size=batch_size)
    ndcgs, mses, spearmans = [], [], []
    for i, qid in enumerate(query_ids):
        qvec = query_emb[i]
        subset = df[df[query_id_col] == qid]
        if subset.empty:
            continue
        cand_texts = subset[candidate_col].tolist()
        true_rels = subset[relevance_col].astype(float).tolist()
        cand_indices = [cand_idx[t] for t in cand_texts]
        cand_vecs = candidate_emb[cand_indices]
        pred_scores = (cand_vecs @ qvec).astype(float).tolist()
        true_rels_arr = np.asarray(true_rels, dtype=float)
        if true_rels_arr.max() > 0:
            true_norm = (true_rels_arr - true_rels_arr.min()) / (true_rels_arr.max() - true_rels_arr.min())
        else:
            true_norm = true_rels_arr
        mse = float(mean_squared_error(true_norm.tolist(), pred_scores))
        mses.append(mse)
        try:
            if np.unique(true_rels_arr).size > 1:
                rho, _ = spearmanr(true_rels_arr, pred_scores)
                rho = 0.0 if np.isnan(rho) else float(rho)
            else:
                rho = 0.0
        except Exception:
            rho = 0.0
        spearmans.append(rho)
        ndcg = ndcg_at_k_per_query(true_rels, pred_scores, ndcg_k)
        ndcgs.append(ndcg)
    results = {
        f"ndcg@{ndcg_k}": float(np.mean(ndcgs)) if ndcgs else 0.0,
        "mse": float(np.mean(mses)) if mses else 0.0,
        "spearman": float(np.mean(spearmans)) if spearmans else 0.0,
        "queries_evaluated": len(ndcgs),
    }
    return results


In [None]:
# Create a small sample dataset
data = [
    # query_id, query_text, candidate_text, relevance (0-3)
    ("q1", "acute myocardial infarction", "heart attack", 3),
    ("q1", "acute myocardial infarction", "myocardial infarct", 2),
    ("q1", "acute myocardial infarction", "chest pain", 1),
    ("q1", "acute myocardial infarction", "diabetes", 0),
    ("q2", "hypertension", "high blood pressure", 3),
    ("q2", "hypertension", "HTN", 2),
    ("q2", "hypertension", "elevated BP", 2),
    ("q2", "hypertension", "headache", 0),
    ("q3", "type 2 diabetes mellitus", "T2DM", 3),
    ("q3", "type 2 diabetes mellitus", "adult-onset diabetes", 2),
    ("q3", "type 2 diabetes mellitus", "insulin-dependent diabetes", 0),
]
df = pd.DataFrame(data, columns=["query_id", "query_text", "candidate_text", "relevance"]) 
df.head()

In [None]:
# Evaluate with sentence-transformers (if installed)
try:
    encoder = build_embedding_encoder("all-mpnet-base-v2", "sentence-transformers")
    print("Computing evaluation with sentence-transformers...")
    res = evaluate(df, encoder, ndcg_k=3, batch_size=32)
    print("Results (sentence-transformers):", res)
except Exception as e:
    print("Sentence-transformers evaluation skipped or failed:", str(e))


In [None]:
# Evaluate with OpenAI embeddings if OPENAI_API_KEY is set and openai is installed
try:
    if os.environ.get("OPENAI_API_KEY") and openai is not None:
        # choose a model available to your account, e.g., 'text-embedding-3-small'
        encoder_oa = build_embedding_encoder("text-embedding-3-small", "openai")
        print("Computing evaluation with OpenAI embeddings (this will call the API)...")
        res_oa = evaluate(df, encoder_oa, ndcg_k=3, batch_size=8)
        print("Results (OpenAI):", res_oa)
    else:
        print("Skipping OpenAI evaluation: either OPENAI_API_KEY not set or openai package missing.")
except Exception as e:
    print("OpenAI evaluation failed:", str(e))


## Next steps
- Replace the sample DataFrame with your evaluation dataset (CSV/Parquet/JSONL) and ensure columns match.
- Use the same encoder building pattern to evaluate other models (HF Hub model names or OpenAI models).
- If you want to persist vectors to Pinecone, reuse the candidate embeddings and upsert (take care to use stable IDs).
- For large datasets, add batching and caching of candidate embeddings to avoid recomputing.

This completes the demo notebook. Save/download it and adapt to your dataset and environment.