In [1]:
# First, change the working directory to the project root.
# Example: %cd /home/your_username/MetaHarmonizer

%cd /home/lcc/projects/MetaHarmonizer

/home/lcc/projects/MetaHarmonizer


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [None]:
# Required files:
# data/corpus/oncotree_code_to_name.csv

In [2]:
# This is required to run asyncio code in Jupyter notebooks.
# Jupyter already runs its own event loop, so calling asyncio.run() directly would raise an error.
# nest_asyncio.apply() patches the loop to allow nested usage.

import nest_asyncio

nest_asyncio.apply()

In [3]:
# Import core modules and ontology mapper implementations

import pandas as pd

# Import the engine that handles pipeline logic and integrates the mappers
from src.Engine import get_ontology_engine

OntoMapEngine = get_ontology_engine()

In [4]:
# Import the CalcStats class for calculating Top1, Top3, and Top5 accuracy

from evaluation.calc_stats import CalcStats

calc = CalcStats()

In [None]:
# Optional utility: Clean up the FAISS + SQLite vector store
# Useful after testing or re-running experiments to avoid stale data
# Not required for standard inference or training runs

from src.utils.cleanup_vector_store import cleanup_vector_store

cleanup_vector_store("st", "mt-sap-bert", "disease")  # (strategy, model_name, entity_type)

[Success] Table 'st_mt_sap_bert_disease' dropped from src/KnowledgeDb/vector_db.sqlite
[Success] Index file 'src/KnowledgeDb/faiss_indexes/st_mt-sap-bert_disease.index' deleted.


DF

In [5]:
df = pd.read_csv("data/corpus/cbio_disease/disease_query_updated.csv")
large_corpus = pd.read_csv(
    'data/corpus/cbio_disease/disease_corpus_updated.csv')

query_list = df['original_value'].tolist()
small_corpus_list = df['curated_ontology'].tolist() # Small corpus list is made of curated ontology values in the query file
large_corpus_list = (
    large_corpus["official_label"].tolist()
    if "official_label" in large_corpus.columns
    else large_corpus["label"].tolist()
)

cura_map = dict(zip(df['original_value'], df['curated_ontology']))

In [6]:
# ST Strategy: use default pooling method. LM Strategy: use CLS token pooling method.
# Arguments:
# - method
# - category
# - topk: retrieve the top k matches
# - query: list of original values to map
# - corpus: list of curated ontology values to match against
# - cura_map: dictionary mapping original values to curated ontology values
# Returns: DF with original values, curated ontology values, match levels, stage, and top k matches with scores

other_params = {"test_or_prod": "test"}
onto_engine_large = OntoMapEngine(category='disease',
                                  topk=5,
                                  query=query_list,
                                  corpus=large_corpus_list,
                                  cura_map=cura_map,
                                  s2_method="mt-sap-bert",
                                  s2_strategy="st",
                                  **other_params)
st_sapbert_disease_top5_result = onto_engine_large.run()

03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Initialized OntoMap Engine
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Stage 1: Exact matching
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Stage 2: ST
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Stage 3: Disabled
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Starting Ontology Mapping
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Stage 1: Exact Matching
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Exact matches: 342
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Remaining for Stage 2: 1213
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Stage 2: ST Matching
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Replacing shortNames using rule-based name mapping
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Replaced: ACC → Adrenocortical Carcinoma
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Replaced: ACYC → Adenoid Cystic Carcinoma
03/11/2025 01:16:30 PM - INFO - OntoMapEngine: Replaced: AML → Acute Myeloid Leukemia
03/11/2025 01:16:

No sentence-transformers model found with name model_cache/mt-sap-bert. Creating a new one with mean pooling.
No sentence-transformers model found with name cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token. Creating a new one with mean pooling.
  return forward_call(*args, **kwargs)


03/11/2025 01:16:37 PM - INFO - OntoMapEngine: Stage 2 completed: 1213 queries
03/11/2025 01:16:37 PM - INFO - OntoMapEngine: Stage 3: Disabled
03/11/2025 01:16:37 PM - INFO - OntoMapEngine: FINAL SUMMARY
03/11/2025 01:16:37 PM - INFO - OntoMapEngine: Stage 1 (Exact): 342 queries
03/11/2025 01:16:37 PM - INFO - OntoMapEngine: Stage 2 (ST): 1213 queries


In [7]:
# Calculate Top1, Top3, and Top5 accuracy for the generated results

st_sapbert_accuracy_df = calc.calc_accuracy(st_sapbert_disease_top5_result)
print(st_sapbert_accuracy_df)

  Accuracy Level   Accuracy
0    Top 1 Match  75.562701
1  Top 3 Matches  84.694534
2  Top 5 Matches  87.781350


In [8]:
# Save the results to a CSV file for further analysis or reporting. Optional.

st_sapbert_disease_top5_result.to_csv(
    "data/outputs/2025/large_corpus/1024/st_sapbert_disease_top5_result.csv",
    index=False)

In [6]:
# RAG Strategy: Need corpus_df for concept retrieval.
# Example: 
other_params = {"test_or_prod": "test"}
onto_engine_large = OntoMapEngine(category='disease',
                                  topk=5,
                                  query=query_list,
                                  corpus=large_corpus_list,
                                  corpus_df=large_corpus,
                                  cura_map=cura_map,
                                  s2_method="mt-sap-bert",
                                  s2_strategy="st",
                                  s3_method="pubmed-bert",
                                  s3_strategy="rag",
                                  s3_threshold=0.9,
                                  **other_params)
st_sapbert_rag_pubmedbert_disease_top5_result = onto_engine_large.run()

03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Initialized OntoMap Engine
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Stage 1: Exact matching
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Stage 2: ST
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Stage 3: RAG (threshold=0.9)
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Starting Ontology Mapping
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Stage 1: Exact Matching
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Exact matches: 342
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Remaining for Stage 2: 1213
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Stage 2: ST Matching
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replacing shortNames using rule-based name mapping
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: ACC → Adrenocortical Carcinoma
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: ACYC → Adenoid Cystic Carcinoma
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: AML → Acute Myeloid Leukemia
03/11/

03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: CML → Chronic Myelogenous Leukemia
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: CMML → Chronic Myelomonocytic Leukemia
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: COAD → Colon Adenocarcinoma
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: CSCC → Cutaneous Squamous Cell Carcinoma
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: DIPG → Diffuse Intrinsic Pontine Glioma
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: EMBT → Embryonal Tumor
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: EPM → Ependymoma
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: ES → Ewing Sarcoma
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: ESCA → Esophageal Adenocarcinoma
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: FL → Follicular Lymphoma
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced: GB → Glioblastoma
03/11/2025 02:10:53 PM - INFO - OntoMapEngine: Replaced

No sentence-transformers model found with name model_cache/mt-sap-bert. Creating a new one with mean pooling.
No sentence-transformers model found with name cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token. Creating a new one with mean pooling.
  return forward_call(*args, **kwargs)


03/11/2025 02:10:59 PM - INFO - OntoMapEngine: Stage 2 completed: 1213 queries
03/11/2025 02:10:59 PM - INFO - OntoMapEngine: Stage 3: RAG Matching
03/11/2025 02:10:59 PM - INFO - OntoMapEngine: S2 result columns: ['original_value', 'updated_value', 'curated_ontology', 'match_level', 'top1_match', 'top1_score', 'top2_match', 'top2_score', 'top3_match', 'top3_score', 'top4_match', 'top4_score', 'top5_match', 'top5_score', 'stage']
03/11/2025 02:10:59 PM - INFO - OntoMapEngine: S2 result top1_score dtype: object
03/11/2025 02:10:59 PM - INFO - OntoMapEngine: S2 result top1_score unique values (first 10): ['0.6203' '0.7585' '0.6178' '0.8033' '0.9037' '0.6614' '0.9614' '0.9626'
 '0.9182' '0.7317']
03/11/2025 02:10:59 PM - INFO - OntoMapEngine: Queries with top1_score < 0.9: 618
03/11/2025 02:10:59 PM - INFO - OntoMapEngine: Replaced: CML → Chronic Myelogenous Leukemia
03/11/2025 02:10:59 PM - INFO - OntoMapEngine: Replaced: GBM → Glioblastoma Multiforme
03/11/2025 02:10:59 PM - INFO - Onto

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  return forward_call(*args, **kwargs)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

03/11/2025 02:11:19 PM - INFO - OntoMapRAG: Results Generated
03/11/2025 02:11:19 PM - INFO - OntoMapEngine: Stage 3 completed: 618 queries
03/11/2025 02:11:19 PM - INFO - OntoMapEngine: FINAL SUMMARY
03/11/2025 02:11:19 PM - INFO - OntoMapEngine: Stage 1 (Exact): 342 queries
03/11/2025 02:11:19 PM - INFO - OntoMapEngine: Stage 2 (ST): 595 queries
03/11/2025 02:11:19 PM - INFO - OntoMapEngine: Stage 3 (RAG): 618 queries


In [7]:
st_sapbert_rag_pubmedbert_disease_top5_eval = calc.calc_accuracy(
    st_sapbert_rag_pubmedbert_disease_top5_result)
print(st_sapbert_rag_pubmedbert_disease_top5_eval)

  Accuracy Level   Accuracy
0    Top 1 Match  72.990354
1  Top 3 Matches  84.565916
2  Top 5 Matches  88.038585


In [8]:
st_sapbert_rag_pubmedbert_disease_top5_result.to_csv(
    "data/outputs/2025/large_corpus/1024/st_sapbert_rag_pubmedbert_disease_top5_result.csv",
    index=False)

In [6]:
# rag_bie Strategy: Need corpus_df for concept retrieval and query_df for query enrichment.

# Example:

# Note: rag_bie is a query-enriched variant of RAG, so we have to use query with expanded fields.
query_df = pd.read_csv(
    "data/corpus/cbio_disease/query_with_selected_fields_for_bie.csv")
large_corpus = pd.read_csv(
    'data/corpus/cbio_disease/disease_corpus_updated.csv')

query_list = query_df['original_cancer_type_value'].tolist(
)  # TODO: use a common schema for all strategies.
large_corpus_list = large_corpus['official_label'].tolist()

cura_map = dict(
    zip(query_df['original_cancer_type_value'], query_df['official_label']))

# run rag_bie strategy:
other_params = {"test_or_prod": "test"}
onto_engine_large = OntoMapEngine(category='disease',
                                  topk=20,
                                  query=query_list,
                                  corpus=large_corpus_list,
                                  cura_map=cura_map,
                                  s2_method="mt-sap-bert",
                                  s2_strategy="st",
                                  s3_method="pubmed-bert",
                                  s3_strategy="rag",
                                  s3_threshold=0.9,
                                  corpus_df=large_corpus,
                                  query_df=query_df,
                                  **other_params)
pubmedbert_rag_bie_result = onto_engine_large.run()

03/11/2025 02:14:08 PM - INFO - OntoMapEngine: Initialized OntoMap Engine
03/11/2025 02:14:08 PM - INFO - OntoMapEngine: Stage 1: Exact matching
03/11/2025 02:14:08 PM - INFO - OntoMapEngine: Stage 2: ST
03/11/2025 02:14:08 PM - INFO - OntoMapEngine: Stage 3: RAG (threshold=0.9)
03/11/2025 02:14:08 PM - INFO - OntoMapEngine: Starting Ontology Mapping
03/11/2025 02:14:08 PM - INFO - OntoMapEngine: Stage 1: Exact Matching
03/11/2025 02:14:08 PM - INFO - OntoMapEngine: Exact matches: 76
03/11/2025 02:14:08 PM - INFO - OntoMapEngine: Remaining for Stage 2: 86
03/11/2025 02:14:08 PM - INFO - OntoMapEngine: Stage 2: ST Matching
03/11/2025 02:14:08 PM - INFO - OntoMapEngine: Replacing shortNames using rule-based name mapping
03/11/2025 02:14:08 PM - INFO - OntoMapST: Initialized OntoMap Sentence Transformer module


No sentence-transformers model found with name model_cache/mt-sap-bert. Creating a new one with mean pooling.
No sentence-transformers model found with name cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token. Creating a new one with mean pooling.
  return forward_call(*args, **kwargs)


03/11/2025 02:14:13 PM - INFO - OntoMapEngine: Stage 2 completed: 86 queries
03/11/2025 02:14:13 PM - INFO - OntoMapEngine: Stage 3: RAG Matching
03/11/2025 02:14:13 PM - INFO - OntoMapEngine: S2 result columns: ['original_value', 'updated_value', 'curated_ontology', 'match_level', 'top1_match', 'top1_score', 'top2_match', 'top2_score', 'top3_match', 'top3_score', 'top4_match', 'top4_score', 'top5_match', 'top5_score', 'top6_match', 'top6_score', 'top7_match', 'top7_score', 'top8_match', 'top8_score', 'top9_match', 'top9_score', 'top10_match', 'top10_score', 'top11_match', 'top11_score', 'top12_match', 'top12_score', 'top13_match', 'top13_score', 'top14_match', 'top14_score', 'top15_match', 'top15_score', 'top16_match', 'top16_score', 'top17_match', 'top17_score', 'top18_match', 'top18_score', 'top19_match', 'top19_score', 'top20_match', 'top20_score', 'stage']
03/11/2025 02:14:13 PM - INFO - OntoMapEngine: S2 result top1_score dtype: object
03/11/2025 02:14:13 PM - INFO - OntoMapEngin

Processing queries:   0%|          | 0/26 [00:00<?, ?it/s]

03/11/2025 02:14:15 PM - INFO - FAISSSQLiteSearch: All corpus terms already processed.
03/11/2025 02:14:15 PM - INFO - OntoMapRAG: True - Vector store initialized for method=pubmed-bert, category=disease, om_strategy=rag


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  return forward_call(*args, **kwargs)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

03/11/2025 02:14:16 PM - INFO - OntoMapRAG: Results Generated
03/11/2025 02:14:16 PM - INFO - OntoMapEngine: Stage 3 completed: 26 queries
03/11/2025 02:14:16 PM - INFO - OntoMapEngine: FINAL SUMMARY
03/11/2025 02:14:16 PM - INFO - OntoMapEngine: Stage 1 (Exact): 76 queries
03/11/2025 02:14:16 PM - INFO - OntoMapEngine: Stage 2 (ST): 60 queries
03/11/2025 02:14:16 PM - INFO - OntoMapEngine: Stage 3 (RAG): 26 queries


In [7]:
pubmedbert_rag_bie_accuracy_df = calc.calc_accuracy(pubmedbert_rag_bie_result)
print(pubmedbert_rag_bie_accuracy_df)

  Accuracy Level   Accuracy
0    Top 1 Match  84.567901
1  Top 3 Matches  93.827160
2  Top 5 Matches  93.827160


Workflow Code

In [12]:
from graphviz import Digraph

dot = Digraph(comment='3-Stage Ontology Mapping Workflow')
dot.attr(rankdir='TB', fontsize='10', fontname='Helvetica')

# Input
dot.node('A', 'Input Arguments\n(query, corpus, s2_strategy, s3_strategy, s3_threshold)', shape='box', style='filled', fillcolor='lightgray')

# ========== Stage 1 ==========
with dot.subgraph(name='cluster_s1') as s1:
    s1.attr(label='Stage 1: Exact Matching', style='filled', fillcolor='lightblue', fontsize='11')
    s1.node('S1_EXACT', 'Exact Match?', shape='diamond')
    s1.node('S1_MATCHED', 'Exact Matched Terms\n(stage=1)', shape='box', style='filled', fillcolor='lightgreen')

# ========== Stage 2 ==========
with dot.subgraph(name='cluster_s2') as s2:
    s2.attr(label='Stage 2: LM/ST (Transformer-based)', style='filled', fillcolor='lightyellow', fontsize='11')
    s2.node('S2_ABBR', 'Replace Abbreviations\n(via abbreviation dict)', shape='box')
    s2.node('S2_STRATEGY', 'Choose s2_strategy:\nLM or ST', shape='box', style='filled', fillcolor='orange')
    s2.node('S2_FAISS', 'Check FAISS Index &\nSQLite Table', shape='box')
    s2.node('S2_BUILD', 'Encode Corpus Terms\n→ Store in FAISS & SQLite', shape='box')
    s2.node('S2_CHECK', 'Check Completeness\n→ Append Missing if Needed', shape='box')
    s2.node('S2_ENCODE', 'Encode Query Terms', shape='box', style='filled', fillcolor='lightyellow')
    s2.node('S2_SEARCH', 'FAISS Search\n(Get top-k matches + scores)', shape='ellipse', style='filled', fillcolor='orange')
    s2.node('S2_RESULT', 'Stage 2 Results\n(stage=2, with top1_score)', shape='box', style='filled', fillcolor='lightgreen')

# ========== Stage 3 ==========
with dot.subgraph(name='cluster_s3') as s3:
    s3.attr(label='Stage 3: RAG/RAG_BIE (Optional, for low-confidence queries)', style='filled', fillcolor='lightcoral', fontsize='11')
    s3.node('S3_CHECK', 'top1_score < s3_threshold?', shape='diamond')
    s3.node('S3_SKIP', 'Skip Stage 3', shape='box', style='dashed')
    s3.node('S3_ABBR', 'Replace Abbreviations\nfor low-confidence queries', shape='box')
    s3.node('S3_STRATEGY', 'Choose s3_strategy:\nRAG or RAG_BIE', shape='box', style='filled', fillcolor='orange')
    s3.node('S3_FAISS', 'Check FAISS Index &\nSQLite Table', shape='box')
    s3.node('S3_BUILD', 'Encode Corpus Contexts\n→ Store in FAISS & SQLite', shape='box')
    s3.node('S3_CHECK_DB', 'Check Completeness\n→ Append Missing if Needed', shape='box')
    s3.node('S3_ENCODE_RAG', 'RAG:\nEncode Query Terms', shape='box', style='filled', fillcolor='lightcoral')
    s3.node('S3_ENCODE_RAGBIE', 'RAG_BIE:\nEnrich + Encode Query', shape='box', style='filled', fillcolor='lightcoral')
    s3.node('S3_SEARCH', 'FAISS Search\n(Get top-k matches + scores)', shape='ellipse', style='filled', fillcolor='orange')
    s3.node('S3_RESULT', 'Stage 3 Results\n(stage=3, overrides low-confidence S2 results)', shape='box', style='filled', fillcolor='lightgreen')

# ========== Final Merge ==========
dot.node('MERGE', 'Merge All Stages:\nStage 1 + Stage 2 (filtered) + Stage 3', shape='box', style='filled', fillcolor='lightgreen')
dot.node('OUTPUT', 'Final Output DataFrame\n(with stage column: 1, 2, or 3)', shape='box', style='filled', fillcolor='green')

# ========== Flow Edges ==========
# Input to Stage 1
dot.edge('A', 'S1_EXACT')

# Stage 1 flow
dot.edge('S1_EXACT', 'S1_MATCHED', label='Yes')
dot.edge('S1_EXACT', 'S2_ABBR', label='No\n(unmatched queries)')

# Stage 2 flow
dot.edge('S2_ABBR', 'S2_STRATEGY')
dot.edge('S2_STRATEGY', 'S2_FAISS')
dot.edge('S2_FAISS', 'S2_BUILD', label='Not Exist')
dot.edge('S2_FAISS', 'S2_CHECK', label='Exist')
dot.edge('S2_BUILD', 'S2_ENCODE')
dot.edge('S2_CHECK', 'S2_ENCODE')
dot.edge('S2_ENCODE', 'S2_SEARCH')
dot.edge('S2_SEARCH', 'S2_RESULT')

# Stage 2 to Stage 3 decision
dot.edge('S2_RESULT', 'S3_CHECK')
dot.edge('S3_CHECK', 'S3_SKIP', label='No / s3_strategy=None')
dot.edge('S3_CHECK', 'S3_ABBR', label='Yes')

# Stage 3 flow
dot.edge('S3_ABBR', 'S3_STRATEGY')
dot.edge('S3_STRATEGY', 'S3_FAISS')
dot.edge('S3_FAISS', 'S3_BUILD', label='Not Exist')
dot.edge('S3_FAISS', 'S3_CHECK_DB', label='Exist')
dot.edge('S3_BUILD', 'S3_ENCODE_RAG', label='RAG')
dot.edge('S3_BUILD', 'S3_ENCODE_RAGBIE', label='RAG_BIE')
dot.edge('S3_CHECK_DB', 'S3_ENCODE_RAG', label='RAG')
dot.edge('S3_CHECK_DB', 'S3_ENCODE_RAGBIE', label='RAG_BIE')
dot.edge('S3_ENCODE_RAG', 'S3_SEARCH')
dot.edge('S3_ENCODE_RAGBIE', 'S3_SEARCH')
dot.edge('S3_SEARCH', 'S3_RESULT')

# Merge all stages
dot.edge('S1_MATCHED', 'MERGE')
dot.edge('S2_RESULT', 'MERGE', label='(if S3 skipped)')
dot.edge('S3_SKIP', 'MERGE')
dot.edge('S3_RESULT', 'MERGE')

# Final output
dot.edge('MERGE', 'OUTPUT')

# Note
dot.node('NOTE', 'Note:\n• Stage 1: Exact matching (always runs)\n• Stage 2: LM/ST transformer matching (always runs)\n• Stage 3: RAG/RAG_BIE context-based matching\n  (only for queries with top1_score < s3_threshold)\n• RAG_BIE enriches queries before encoding', 
         shape='note', style='dashed', fontsize='9', fontcolor='slategray')
dot.edge('OUTPUT', 'NOTE', style='invis')

# Render
dot.render('ontology_mapping_3stage_workflow', format='png', cleanup=True)
print("Flowchart saved as 'ontology_mapping_3stage_workflow.png'")

Flowchart saved as 'ontology_mapping_3stage_workflow.png'
