In [None]:
import re
import numpy as np

# Approximate BERTScore using Ollama embeddings (qwen3)
# Note: This is an approximation because Ollama embeddings are sentence/phrase-level, not contextual token embeddings.
def _simple_tokenize(text: str):
    # Lowercase and split into alphanumeric tokens (rough approximation to word pieces)
    return re.findall(r"[A-Za-z0-9]+", text.lower())

def _cosine_matrix(A: np.ndarray, B: np.ndarray):
    # Normalize rows and compute cosine similarity matrix A_norm @ B_norm^T
    A_norm = A / (np.linalg.norm(A, axis=1, keepdims=True) + 1e-9)
    B_norm = B / (np.linalg.norm(B, axis=1, keepdims=True) + 1e-9)
    return np.dot(A_norm, B_norm.T)

def _ollama_embed_text(text: str, model: str):
    import ollama
    res = ollama.embeddings(model=model, prompt=text)
    # ollama.embeddings returns a dict with 'embedding' key
    return np.array(res["embedding"], dtype=np.float32)

def _token_embeddings(tokens, model: str, cache: dict):
    # Cache to avoid repeated calls for the same token
    embs = []
    for tok in tokens:
        if tok in cache:
            emb = cache[tok]
        else:
            emb = _ollama_embed_text(tok, model)
            cache[tok] = emb
        embs.append(emb)
    return np.vstack(embs) if embs else np.empty((0, 0), dtype=np.float32)

def approx_bertscore_ollama(candidate_text: str,
                            reference_text: str,
                            ollama_model: str = "qwen3"):
    """
    Approximate BERTScore using Ollama embeddings by:
    - tokenizing each text
    - embedding each token independently via Ollama
    - computing greedy matching cosine similarities for precision and recall
    Returns (precision, recall, f1).

    IMPORTANT: This is not true BERTScore because it lacks contextual token embeddings.
    """
    cand_tokens = _simple_tokenize(candidate_text)
    ref_tokens = _simple_tokenize(reference_text)

    if not cand_tokens or not ref_tokens:
        raise ValueError("One of the texts has no tokens after tokenization.")

    cache = {}
    A = _token_embeddings(cand_tokens, ollama_model, cache)  # shape: n_cand x dim
    B = _token_embeddings(ref_tokens, ollama_model, cache)   # shape: n_ref x dim

    if A.size == 0 or B.size == 0:
        raise RuntimeError("Failed to obtain embeddings from Ollama.")

    M = _cosine_matrix(A, B)  # n_cand x n_ref

    # Greedy matching like BERTScore (without IDF weighting here)
    precision = float(M.max(axis=1).mean())
    recall = float(M.max(axis=0).mean())
    f1 = 0.0 if (precision + recall) == 0 else float(2 * precision * recall / (precision + recall))
    return precision, recall, f1

In [None]:
if __name__ == "__main__":
    # Example medical texts
    candidate = (
        "The patient was started on metformin for newly diagnosed type 2 diabetes. "
        "Renal function was monitored; eGFR remained stable at 85 mL/min."
    )
    reference = (
        "A new diagnosis of type 2 diabetes led to initiation of metformin therapy. "
        "Kidney function tests showed a stable estimated GFR of 85 mL/min."
    )

    # 2) Approximate BERTScore via Ollama embeddings (qwen3)
    # Ensure Ollama is running locally and the qwen3 embedding model is available:
    #   ollama pull qwen3
    try:
        p_ollama, r_ollama, f1_ollama = approx_bertscore_ollama(candidate, reference, ollama_model="qwen3-embedding:latest")
        print("\nApproximate BERTScore via Ollama embeddings (qwen3):")
        print(f"Precision: {p_ollama:.4f}, Recall: {r_ollama:.4f}, F1: {f1_ollama:.4f}")
    except Exception as e:
        print(f"Error computing approximate BERTScore via Ollama: {e}")