# Parsing Evaluation with Chroma

In [13]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np

from chunking_evaluation import GeneralEvaluation
from dotenv import load_dotenv
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

from lib.evaluation.segmentation.chroma.chroma_adapter import ChromaChunker
from lib.segmentation.methods.implementations.fixed_sized import FixedSizeChunker
from lib.segmentation.methods.implementations.recursive import RecursiveChunker
from lib.segmentation.methods.implementations.hierarchical import HierarchicalChunker
from lib.segmentation.methods.implementations.semantic import SemanticChunker
from lib.parsing.methods.parsers import Parsers

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
chunkers = [
    FixedSizeChunker(max_tokens=128, overlap=0),
    FixedSizeChunker(max_tokens=128, overlap=26),
    FixedSizeChunker(max_tokens=256, overlap=0),
    FixedSizeChunker(max_tokens=256, overlap=52),
    FixedSizeChunker(max_tokens=512, overlap=0),
    FixedSizeChunker(max_tokens=512, overlap=104),
    FixedSizeChunker(max_tokens=1024, overlap=0),
    FixedSizeChunker(max_tokens=1024, overlap=208),
    RecursiveChunker(max_tokens=128, overlap=0),
    RecursiveChunker(max_tokens=128, overlap=26),
    RecursiveChunker(max_tokens=256, overlap=0),
    RecursiveChunker(max_tokens=256, overlap=52),
    RecursiveChunker(max_tokens=512, overlap=0),
    RecursiveChunker(max_tokens=512, overlap=104),
    RecursiveChunker(max_tokens=1024, overlap=0),
    RecursiveChunker(max_tokens=1024, overlap=208),
    SemanticChunker(max_tokens=128, similarity_threshold=90, min_tokens=64),
    SemanticChunker(max_tokens=128, similarity_threshold=70, min_tokens=64),
    SemanticChunker(max_tokens=256, similarity_threshold=90, min_tokens=128),
    SemanticChunker(max_tokens=256, similarity_threshold=70, min_tokens=128),
    SemanticChunker(max_tokens=512, similarity_threshold=90, min_tokens=256),
    SemanticChunker(max_tokens=512, similarity_threshold=70, min_tokens=256),
    SemanticChunker(max_tokens=1024, similarity_threshold=90, min_tokens=512),
    SemanticChunker(max_tokens=1024, similarity_threshold=70, min_tokens=512),
    HierarchicalChunker(max_tokens=128, max_parent_token_ratio=0.2),
    HierarchicalChunker(max_tokens=256, max_parent_token_ratio=0.2),
    HierarchicalChunker(max_tokens=512, max_parent_token_ratio=0.2),
    HierarchicalChunker(max_tokens=1024, max_parent_token_ratio=0.2),
]

In [15]:
chunkers = [
    ChromaChunker(c) for c in chunkers
]

In [16]:
# Make sure "OPENAI_API_KEY" is set in a .env file
load_dotenv()
embedding_function = OpenAIEmbeddingFunction(model_name="text-embedding-3-small")

## General Evaluation (predefined data sets)

In [5]:
evaluation = GeneralEvaluation()

results = []
pubmed_results = []

for chunker in chunkers:
    c_info = chunker.get_info()
    print(c_info)

    c_res = evaluation.run(chunker, embedding_function)
    pubmed_scores = c_res["corpora_scores"]["pubmed"]
    del c_res["corpora_scores"]

    # Add method info to all-corpus-mean scores
    c_res.update(c_info)
    results.append(c_res)

    # Get data for pubmed only
    c_row = {}
    for metric, scores in pubmed_scores.items():
        metric_name = metric.replace("_scores", "")
        c_row[f"{metric_name}_mean"] = np.mean(scores)
        c_row[f"{metric_name}_std"] = np.std(scores)

    c_row.update(c_info)
    pubmed_results.append(c_row)

2026-02-15 15:46:47,854 - INFO : Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
Token indices sequence length is longer than the specified maximum sequence length for this model (25022 > 512). Running this sequence through the model will result in indexing errors


{'Method': 'Fixed-Size', 'Param': '$O=0$', 'N': 128}


2026-02-15 15:46:50,535 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:46:52,371 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:46:53,493 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:46:54,426 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:46:55,250 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:46:56,053 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:46:57,283 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:46:58,306 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:46:59,275 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:00,213 - INFO : HTTP

{'Method': 'Fixed-Size', 'Param': '$O=26$', 'N': 128}


2026-02-15 15:47:03,225 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:05,789 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:08,203 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:09,796 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:11,313 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:12,897 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:13,943 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:14,972 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:16,329 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:17,353 - INFO : HTTP

{'Method': 'Fixed-Size', 'Param': '$O=0$', 'N': 256}


2026-02-15 15:47:23,395 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:24,606 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:25,751 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:26,731 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:27,876 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:28,584 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=52$', 'N': 256}


2026-02-15 15:47:30,666 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:31,690 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:32,919 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:34,146 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:35,376 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:36,706 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:37,525 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=0$', 'N': 512}


2026-02-15 15:47:39,779 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:42,552 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:44,447 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=104$', 'N': 512}


2026-02-15 15:47:46,950 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:48,951 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:50,433 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:51,422 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=0$', 'N': 1024}


2026-02-15 15:47:54,362 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:47:55,855 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=208$', 'N': 1024}


2026-02-15 15:48:00,772 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:02,921 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=0$', 'N': 128}


2026-02-15 15:48:04,763 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:05,934 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:07,122 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:08,463 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:09,167 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:10,217 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:11,523 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:12,755 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:13,777 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:14,649 - INFO : HTTP

{'Method': 'Recursive', 'Param': '$O=26$', 'N': 128}


2026-02-15 15:48:22,073 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:24,322 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:25,452 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:26,783 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:28,110 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:29,491 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:30,773 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:32,105 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:33,233 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:34,596 - INFO : HTTP

{'Method': 'Recursive', 'Param': '$O=0$', 'N': 256}


2026-02-15 15:48:45,929 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:47,469 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:48,575 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:49,852 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:51,233 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:52,179 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:52,996 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:53,456 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=52$', 'N': 256}


2026-02-15 15:48:55,660 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:56,888 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:57,825 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:58,832 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:48:59,756 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:00,921 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:01,904 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:02,927 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:04,158 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=0$', 'N': 512}


2026-02-15 15:49:06,922 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:08,405 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:09,788 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:11,119 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=104$', 'N': 512}


2026-02-15 15:49:13,579 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:14,706 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:16,241 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:17,677 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:18,698 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=0$', 'N': 1024}


2026-02-15 15:49:21,763 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:23,896 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=208$', 'N': 1024}


2026-02-15 15:49:27,094 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:28,939 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:49:29,668 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=90$', 'N': 128}


2026-02-15 15:50:51,086 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:50:52,190 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:50:52,948 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:50:53,729 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:50:54,607 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:50:55,820 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:50:57,211 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:50:59,557 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:51:01,919 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:51:02,946 - INFO : HTTP

{'Method': 'Semantic', 'Param': '$Q=70$', 'N': 128}


2026-02-15 15:52:27,406 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:52:28,980 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:52:29,938 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:52:30,706 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:52:31,825 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:52:32,657 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:52:33,665 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:52:34,694 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:52:35,513 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:52:36,382 - INFO : HTTP

{'Method': 'Semantic', 'Param': '$Q=90$', 'N': 256}


2026-02-15 15:54:03,444 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:54:04,618 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:54:05,627 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:54:06,853 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:54:07,886 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:54:09,005 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:54:10,238 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:54:11,463 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=70$', 'N': 256}


2026-02-15 15:55:29,342 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:55:30,622 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:55:31,745 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:55:33,181 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:55:34,306 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:55:35,534 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:55:36,969 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=90$', 'N': 512}


2026-02-15 15:56:51,619 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:56:53,259 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:56:54,842 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:56:55,920 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=70$', 'N': 512}


2026-02-15 15:58:12,311 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:58:14,361 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:58:15,893 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:58:16,814 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=90$', 'N': 1024}


2026-02-15 15:59:34,538 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 15:59:36,791 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=70$', 'N': 1024}


2026-02-15 16:00:54,105 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:00:56,253 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Hierarchical', 'Param': '$B_h=51$', 'N': 128}


2026-02-15 16:00:59,955 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:00,969 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:02,499 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:03,932 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:06,187 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:07,823 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:08,954 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:10,010 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:11,410 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:12,740 - INFO : HTTP

{'Method': 'Hierarchical', 'Param': '$B_h=102$', 'N': 256}


2026-02-15 16:01:22,059 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:23,264 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:24,520 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:25,949 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:26,974 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:28,304 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:29,431 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:30,350 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Hierarchical', 'Param': '$B_h=204$', 'N': 512}


2026-02-15 16:01:33,464 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:35,266 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:36,603 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:37,828 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Hierarchical', 'Param': '$B_h=409$', 'N': 1024}


2026-02-15 16:01:41,825 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-15 16:01:44,176 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [40]:
from lib.utils.chroma_export import export_results

df = pd.DataFrame(results)
combined_df = export_results(df, "chroma_results_general")

2026-02-15 17:31:04,219 - INFO : Saved table content to: /Users/matteo/Uni/7.Semester/thesis/visual-chunking/thesis/figures/tables/chroma_results_general.tex


In [42]:
pubmed_df = pd.DataFrame(pubmed_results)
combined_pubmed_df = export_results(pubmed_df, "chroma_results_pubmed")

## Synthetic Evaluation

In [17]:
from lib.evaluation.segmentation.chroma.chroma_setup import setup_evaluation_from_medical_qas, \
    get_db_path

parser = Parsers.DOCLING

db_path = get_db_path(parser)
synth_eval = setup_evaluation_from_medical_qas(
    parser,
    "awmf",
    parse_exist_ok=True,
    question_exist_ok=True
)

2026-02-16 03:08:51,535 - INFO : Chroma document corpus exists at /Users/matteo/Uni/7.Semester/thesis/visual-chunking/data/configs/chroma/docling/awmf.txt.Skipping generation.
2026-02-16 03:08:51,607 - INFO : Finished creating evaluation on the medical QA pairs.


In [35]:
results = []

for chunker in chunkers:
    c_info = chunker.get_info()
    print(c_info)

    c_res = synth_eval.run(
        chunker,
        embedding_function,
        retrieve=-1,
        db_to_save_chunks=db_path
    )
    del c_res["corpora_scores"]

    c_res.update(c_info)
    results.append(c_res)

{'Method': 'Fixed-Size', 'Param': '$O=0$', 'N': 128}


2026-02-16 15:22:13,150 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=26$', 'N': 128}


2026-02-16 15:22:17,144 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=0$', 'N': 256}


2026-02-16 15:22:22,457 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=52$', 'N': 256}


2026-02-16 15:22:25,279 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=0$', 'N': 512}


2026-02-16 15:22:28,692 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=104$', 'N': 512}


2026-02-16 15:22:32,833 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=0$', 'N': 1024}


2026-02-16 15:22:34,947 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Fixed-Size', 'Param': '$O=208$', 'N': 1024}


2026-02-16 15:22:36,036 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=0$', 'N': 128}


2026-02-16 15:22:37,931 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=26$', 'N': 128}


2026-02-16 15:22:43,888 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=0$', 'N': 256}


2026-02-16 15:22:50,982 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=52$', 'N': 256}


2026-02-16 15:22:54,050 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=0$', 'N': 512}


2026-02-16 15:22:57,322 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=104$', 'N': 512}


2026-02-16 15:22:59,273 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=0$', 'N': 1024}


2026-02-16 15:23:01,526 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Recursive', 'Param': '$O=208$', 'N': 1024}


2026-02-16 15:23:02,845 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=90$', 'N': 128}


2026-02-16 15:23:04,702 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=70$', 'N': 128}


2026-02-16 15:23:10,800 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=90$', 'N': 256}


2026-02-16 15:23:16,247 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=70$', 'N': 256}


2026-02-16 15:23:19,319 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=90$', 'N': 512}


2026-02-16 15:23:22,038 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=70$', 'N': 512}


2026-02-16 15:23:24,236 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=90$', 'N': 1024}


2026-02-16 15:23:25,998 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Semantic', 'Param': '$Q=70$', 'N': 1024}


2026-02-16 15:23:27,062 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Hierarchical', 'Param': '$B_h=25$', 'N': 128}


2026-02-16 15:23:28,828 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Hierarchical', 'Param': '$B_h=51$', 'N': 256}


2026-02-16 15:23:34,770 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Hierarchical', 'Param': '$B_h=102$', 'N': 512}


2026-02-16 15:23:38,388 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'Method': 'Hierarchical', 'Param': '$B_h=204$', 'N': 1024}


2026-02-16 15:23:40,329 - INFO : HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [36]:
pd.DataFrame(results)

Unnamed: 0,iou_mean,iou_std,recall_mean,recall_std,precision_omega_mean,precision_omega_std,precision_mean,precision_std,Method,Param,N
0,0.113643,0.13315,0.238281,0.288437,0.422392,0.11136,0.195618,0.205088,Fixed-Size,$O=0$,128
1,0.101444,0.1074,0.247866,0.287191,0.38084,0.115146,0.17143,0.158985,Fixed-Size,$O=26$,128
2,0.094259,0.103778,0.297087,0.322984,0.299501,0.115872,0.13511,0.139152,Fixed-Size,$O=0$,256
3,0.107986,0.100708,0.355571,0.338757,0.265148,0.099183,0.150604,0.129877,Fixed-Size,$O=52$,256
4,0.086644,0.08281,0.413899,0.351018,0.19327,0.081948,0.105359,0.094076,Fixed-Size,$O=0$,512
5,0.08805,0.084209,0.442326,0.36987,0.165769,0.071941,0.106956,0.094375,Fixed-Size,$O=104$,512
6,0.065677,0.064047,0.486344,0.375469,0.125596,0.062408,0.073598,0.067814,Fixed-Size,$O=0$,1024
7,0.067395,0.060655,0.458185,0.360091,0.109023,0.052292,0.084942,0.097188,Fixed-Size,$O=208$,1024
8,0.123441,0.14291,0.236098,0.298804,0.591056,0.145471,0.226727,0.209323,Recursive,$O=0$,128
9,0.092823,0.135251,0.170202,0.25832,0.445364,0.124418,0.16947,0.204876,Recursive,$O=26$,128


In [38]:
from lib.utils.chroma_export import export_results

df = pd.DataFrame(results)
combined_df = export_results(df, "chroma_results_concology")

2026-02-16 15:29:08,360 - INFO : Saved table content to: /Users/matteo/Uni/7.Semester/thesis/visual-chunking/thesis/figures/tables/chroma_results_concology_top_4.tex


In [None]:
combined_df