# Triplet Manual Test Notebook

This notebook helps you test the complete summary classification pipeline:
1. **Load document summaries** from `data/summaries.json`
2. **Extract triplets** using the project's TripletService
3. **Index triplets** in FAISS vector store with Sentence Transformers embeddings
4. **Run similarity queries** to test document classification
5. **Evaluate results** with various search scenarios

The notebook uses the project's modules and follows the complete pipeline from summaries → triplets → vector store → retrieval.

Notes:
- Run this notebook in the project root so relative paths resolve correctly.
- Cells include guards so the notebook can run without optional dependencies installed.
- This tests the Sentence Transformers integration with your actual data.


In [6]:
# Section 1: Install and import dependencies (guards)
import importlib
import sys
import logging
from pathlib import Path

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("triplet_notebook")

# Feature flags for optional libraries
HAS_PANDAS = importlib.util.find_spec("pandas") is not None
HAS_PYARROW = importlib.util.find_spec("pyarrow") is not None
HAS_SENT_TRANS = importlib.util.find_spec("sentence_transformers") is not None
HAS_FAISS = importlib.util.find_spec("faiss") is not None or importlib.util.find_spec("faiss-cpu") is not None
HAS_RAPIDFUZZ = importlib.util.find_spec("rapidfuzz") is not None

if not HAS_PANDAS:
    logger.warning("pandas is not available. Please install it to run loaders: pip install pandas")
else:
    import pandas as pd

import json
import sqlite3
import typing
import numpy as np

logger.info(f"Features: pandas={HAS_PANDAS}, sentence_transformers={HAS_SENT_TRANS}, faiss={HAS_FAISS}")


INFO:triplet_notebook:Features: pandas=True, sentence_transformers=True, faiss=True


## Section 2: Configure paths and constants
Define file paths for sample data and settings used throughout the notebook.

In [7]:
# Section 2: constants
ROOT = Path("../").resolve()
DATA_DIR = ROOT / "data"
NOTEBOOKS_DIR = ROOT / "notebooks"
SAMPLE_JSON = DATA_DIR / "summaries.json"
SAMPLE_JSONL = DATA_DIR / "summaries.jsonl"
SAMPLE_CSV = DATA_DIR / "summaries.csv"
SAMPLE_PARQUET = DATA_DIR / "summaries.parquet"
SQLITE_DB = ROOT / "data" / "triplets.db"
INDEX_DIR = ROOT / "indexes"
INDEX_DIR.mkdir(parents=True, exist_ok=True)
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
VERBOSE = True

print("Paths configured:")
print(f"DATA_DIR={DATA_DIR}")
print(f"SAMPLE_JSON={SAMPLE_JSON}")
print(f"SQLITE_DB={SQLITE_DB}")


Paths configured:
DATA_DIR=D:\Projects\Gen AI Projects\summary-classification-poc\data
SAMPLE_JSON=D:\Projects\Gen AI Projects\summary-classification-poc\data\summaries.json
SQLITE_DB=D:\Projects\Gen AI Projects\summary-classification-poc\data\triplets.db


## Section 3: Load triplets from CSV / TSV / JSONL / Parquet
Helper functions to load common file formats and return a standardized pandas.DataFrame with columns ['subject','predicate','object','source','row_id']

In [8]:
def _ensure_df_columns(df: "pd.DataFrame") -> "pd.DataFrame":
    # Normalize column names to lower and ensure required columns exist
    df = df.copy()
    df.columns = [c.lower() for c in df.columns]
    required = ["subject","predicate","object"]
    for col in required:
        if col not in df.columns:
            raise ValueError(f"Missing required column: {col}")
    # preserve provenance
    if "source" not in df.columns:
        df["source"] = None
    if "row_id" not in df.columns:
        df["row_id"] = range(len(df))
    return df[["subject","predicate","object","source","row_id"]]


def load_csv(path: "Path", sep=",") -> "pd.DataFrame":
    if not HAS_PANDAS:
        raise RuntimeError("pandas not available")
    df = pd.read_csv(path, sep=sep)
    df = _ensure_df_columns(df)
    df["source"] = str(path)
    return df


def load_jsonl(path: "Path") -> "pd.DataFrame":
    if not HAS_PANDAS:
        raise RuntimeError("pandas not available")
    rows = []
    with open(path, "r", encoding="utf-8") as f:
        for i,line in enumerate(f):
            if not line.strip():
                continue
            obj = json.loads(line)
            rows.append({
                "subject": obj.get("subject"),
                "predicate": obj.get("predicate"),
                "object": obj.get("object"),
                "source": str(path),
                "row_id": i,
            })
    df = pd.DataFrame(rows)
    return _ensure_df_columns(df)


def load_parquet(path: "Path") -> "pd.DataFrame":
    if not HAS_PANDAS or not HAS_PYARROW:
        raise RuntimeError("parquet support requires pandas + pyarrow")
    df = pd.read_parquet(path)
    df = _ensure_df_columns(df)
    df["source"] = str(path)
    return df


def load_any(path: "Path") -> "pd.DataFrame":
    path = Path(path)
    if path.suffix.lower() in (".csv",):
        return load_csv(path, sep=",")
    if path.suffix.lower() in (".tsv", ".txt"):
        return load_csv(path, sep="\t")
    if path.suffix.lower() in (".jsonl", ".ndjson"):
        return load_jsonl(path)
    if path.suffix.lower() in (".parquet", ".pq"):
        return load_parquet(path)
    if path.suffix.lower() in (".json",):
        # assume full JSON array
        if not HAS_PANDAS:
            raise RuntimeError("pandas required for json array loading")
        df = pd.read_json(path)
        df["source"] = str(path)
        return _ensure_df_columns(df)
    raise ValueError(f"Unsupported file: {path}")


# Load summaries data and convert to triplets
if SAMPLE_JSON.exists():
    try:
        # Load summaries data
        with open(SAMPLE_JSON, 'r', encoding='utf-8') as f:
            summaries_data = json.load(f)
        print(f"Loaded {len(summaries_data)} summaries from {SAMPLE_JSON}")
        
        # Import our project modules
        import sys
        sys.path.append(str(ROOT))
        from src.services.triplet_service import TripletService
        from src.utils.normalization import triplet_to_text
        
        # Initialize triplet service
        triplet_service = TripletService()
        
        # Convert summaries to triplets
        all_triplets = []
        for i, item in enumerate(summaries_data[:50]):  # Process first 50 for testing
            summary = item.get('summary', '')
            doc_type = item.get('doc_type', '')
            doc_code = item.get('doc_code', '')
            
            # Extract triplets from summary
            triplets = triplet_service.extract_and_normalize(summary)
            
            for triplet in triplets:
                triplet_text = triplet_to_text(triplet)
                all_triplets.append({
                    'subject': triplet[0],
                    'predicate': triplet[1], 
                    'object': triplet[2],
                    'triplet_text': triplet_text,
                    'doc_type': doc_type,
                    'doc_code': doc_code,
                    'summary': summary,
                    'source': str(SAMPLE_JSON),
                    'row_id': len(all_triplets)
                })
        
        # Create DataFrame from triplets
        if HAS_PANDAS:
            df = pd.DataFrame(all_triplets)
            print(f"Extracted {len(df)} triplets from {len(summaries_data[:50])} summaries")
            print(f"Document types found: {df['doc_type'].value_counts().to_dict()}")
            display(df.head())
        else:
            print(f"Extracted {len(all_triplets)} triplets (pandas not available for display)")
            
    except Exception as e:
        print("Error loading summaries:", e)
        import traceback
        traceback.print_exc()
else:
    print("Sample JSON not found at", SAMPLE_JSON)


Loaded 300 summaries from D:\Projects\Gen AI Projects\summary-classification-poc\data\summaries.json
Extracted 50 triplets from 50 summaries
Document types found: {'INVOICE': 50}


Unnamed: 0,subject,predicate,object,triplet_text,doc_type,doc_code,summary,source,row_id
0,id,identifier,invoice,id identifier invoice,INVOICE,INV001,This invoice (INV001) itemizes charges for pro...,D:\Projects\Gen AI Projects\summary-classifica...,0
1,id,identifier,inv002,id identifier inv002,INVOICE,INV002,A detailed billing notice identified as INV002...,D:\Projects\Gen AI Projects\summary-classifica...,1
2,id,identifier,invoice,id identifier invoice,INVOICE,INV003,Invoice INV003 records a transaction between t...,D:\Projects\Gen AI Projects\summary-classifica...,2
3,id,identifier,inv004,id identifier inv004,INVOICE,INV004,A detailed billing notice identified as INV004...,D:\Projects\Gen AI Projects\summary-classifica...,3
4,id,identifier,invoice,id identifier invoice,INVOICE,INV005,Invoice INV005 records a transaction between t...,D:\Projects\Gen AI Projects\summary-classifica...,4


## Section 4: Validate Triplet Schema and Types
Functions to confirm required fields are present, types are strings, and to report invalid rows.

In [9]:
def validate_triplets(df: "pd.DataFrame") -> dict:
    issues = {"missing":0, "nonstring":0, "rows":[]}
    required = ["subject","predicate","object"]
    for i,row in df.iterrows():
        row_issues = []
        for c in required:
            if pd.isna(row.get(c)) or row.get(c) is None:
                row_issues.append(f"missing_{c}")
            elif not isinstance(row.get(c), str):
                row_issues.append(f"nonstring_{c}")
        if row_issues:
            issues["rows"].append({"row_id": row.get("row_id", i), "issues": row_issues})
    issues["missing"] = sum(1 for r in issues["rows"] if any("missing" in i for i in r["issues"]))
    issues["nonstring"] = sum(1 for r in issues["rows"] if any("nonstring" in i for i in r["issues"]))
    issues["total_rows"] = len(df)
    return issues

# Example usage
if 'df' in globals():
    print(validate_triplets(df))


{'missing': 0, 'nonstring': 0, 'rows': [], 'total_rows': 50}


## Section 5.5: Test FAISS Vector Store with Sentence Transformers

Test the complete pipeline: summaries → triplets → FAISS vector store → similarity search


In [10]:
# Test FAISS Vector Store with Sentence Transformers
if 'all_triplets' in locals() and len(all_triplets) > 0:
    try:
        # Import our vector store
        from src.db.vector_store import FaissVectorStore
        
        print("Initializing FAISS Vector Store with Sentence Transformers...")
        vector_store = FaissVectorStore(model_name="all-MiniLM-L6-v2")
        
        # Prepare data for vector store
        triplet_texts = [t['triplet_text'] for t in all_triplets]
        triplet_metas = [{
            'doc_type': t['doc_type'],
            'doc_code': t['doc_code'],
            'subject': t['subject'],
            'predicate': t['predicate'],
            'object': t['object'],
            'summary': t['summary'][:100] + "..." if len(t['summary']) > 100 else t['summary']
        } for t in all_triplets]
        
        print(f"Adding {len(triplet_texts)} triplets to vector store...")
        vector_store.add(triplet_texts, triplet_metas)
        
        print("✓ Vector store initialized and populated successfully!")
        print(f"✓ Model dimension: {vector_store._dim}")
        print(f"✓ Total triplets indexed: {len(triplet_texts)}")
        
        # Test some queries
        test_queries = [
            "invoice has_amount <AMOUNT>",
            "payment date <DATE>",
            "organization issued_by",
            "bank statement account",
            "leave request employee"
        ]
        
        print("\n" + "="*60)
        print("TESTING SIMILARITY SEARCH")
        print("="*60)
        
        for query in test_queries:
            print(f"\nQuery: '{query}'")
            results = vector_store.query(query, top_k=3)
            
            if results:
                print(f"Found {len(results)} results:")
                for i, result in enumerate(results, 1):
                    score = result.get('score', 0)
                    doc_type = result.get('doc_type', 'N/A')
                    subject = result.get('subject', 'N/A')
                    predicate = result.get('predicate', 'N/A')
                    object_val = result.get('object', 'N/A')
                    print(f"  {i}. Score: {score:.4f} | {doc_type} | {subject} {predicate} {object_val}")
            else:
                print("  No results found")
        
        print("\n✓ FAISS Vector Store with Sentence Transformers is working correctly!")
        
    except Exception as e:
        print(f"Error testing FAISS vector store: {e}")
        import traceback
        traceback.print_exc()
else:
    print("No triplets available. Please run the previous cell first to extract triplets from summaries.")


INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Initializing FAISS Vector Store with Sentence Transformers...
Using Sentence Transformers model 'all-MiniLM-L6-v2' with dimension 384
Adding 50 triplets to vector store...
✓ Vector store initialized and populated successfully!
✓ Model dimension: 384
✓ Total triplets indexed: 50

TESTING SIMILARITY SEARCH

Query: 'invoice has_amount <AMOUNT>'
Found 3 results:
  1. Score: 0.4868 | INVOICE | id identifier invoice
  2. Score: 0.4868 | INVOICE | id identifier invoice
  3. Score: 0.4868 | INVOICE | id identifier invoice

Query: 'payment date <DATE>'
Found 3 results:
  1. Score: 0.1923 | INVOICE | id identifier invoice
  2. Score: 0.1923 | INVOICE | id identifier invoice
  3. Score: 0.1923 | INVOICE | id identifier invoice

Query: 'organization issued_by'
Found 3 results:
  1. Score: 0.3153 | INVOICE | id identifier invoice
  2. Score: 0.3153 | INVOICE | id identifier invoice
  3. Score: 0.3153 | INVOICE | id identifier invoice

Query: 'bank statement account'
Found 3 results:
  1. Score: 0.1

## Section 5: Preprocess Triplets (normalization, deduplication)
Clean text fields, lowercase optionally, strip whitespace, normalize unicode, and drop duplicates.

In [11]:
import unicodedata

def preprocess_triplets(df: "pd.DataFrame", lowercase: bool=True, dedup: bool=True) -> "pd.DataFrame":
    df = df.copy()
    def clean_text(s):
        if pd.isna(s):
            return s
        if not isinstance(s, str):
            s = str(s)
        s = s.strip()
        s = unicodedata.normalize("NFKC", s)
        if lowercase:
            s = s.lower()
        return s

    for c in ["subject","predicate","object"]:
        df[c] = df[c].apply(clean_text)

    before = len(df)
    if dedup:
        df = df.drop_duplicates(subset=["subject","predicate","object"]).reset_index(drop=True)
    after = len(df)
    print(f"Preprocessing: rows before={before}, after={after}")
    return df

# Example: run preprocessing
if 'df' in globals():
    df_clean = preprocess_triplets(df)
    display(df_clean.head())


Preprocessing: rows before=50, after=20


Unnamed: 0,subject,predicate,object,triplet_text,doc_type,doc_code,summary,source,row_id
0,id,identifier,invoice,id identifier invoice,INVOICE,INV001,This invoice (INV001) itemizes charges for pro...,D:\Projects\Gen AI Projects\summary-classifica...,0
1,id,identifier,inv002,id identifier inv002,INVOICE,INV002,A detailed billing notice identified as INV002...,D:\Projects\Gen AI Projects\summary-classifica...,1
2,id,identifier,inv004,id identifier inv004,INVOICE,INV004,A detailed billing notice identified as INV004...,D:\Projects\Gen AI Projects\summary-classifica...,3
3,id,identifier,inv007,id identifier inv007,INVOICE,INV007,A detailed billing notice identified as INV007...,D:\Projects\Gen AI Projects\summary-classifica...,6
4,id,identifier,inv011,id identifier inv011,INVOICE,INV011,A detailed billing notice identified as INV011...,D:\Projects\Gen AI Projects\summary-classifica...,10


## Section 6: Build Index (in-memory dict + optional FAISS/Annoy)
Construct simple inverted indexes and optional vector indexes.

In [12]:
# Simple inverted index
class InvertedIndex:
    def __init__(self):
        self.by_subject = {}
        self.by_predicate = {}
        self.by_object = {}
        self.docs = {}

    def add(self, doc_id:int, subject:str, predicate:str, object_:str, source=None):
        self.docs[doc_id] = dict(subject=subject, predicate=predicate, object=object_, source=source)
        self.by_subject.setdefault(subject, set()).add(doc_id)
        self.by_predicate.setdefault(predicate, set()).add(doc_id)
        self.by_object.setdefault(object_, set()).add(doc_id)

    def query_subject(self, subject:str):
        return [self.docs[i] for i in sorted(self.by_subject.get(subject, []))]

    def query_predicate(self, predicate:str):
        return [self.docs[i] for i in sorted(self.by_predicate.get(predicate, []))]

    def query_object(self, object_:str):
        return [self.docs[i] for i in sorted(self.by_object.get(object_, []))]


# Optional: vector index wrappers (FAISS minimal)
VEC_DIM = 384

class SimpleVectorIndex:
    def __init__(self, dim:int=VEC_DIM):
        self.dim = dim
        self.embeddings = []
        self.ids = []
        self._model = None
        if HAS_SENT_TRANS:
            from sentence_transformers import SentenceTransformer
            self._model = SentenceTransformer(EMBEDDING_MODEL)
            self.dim = self._model.get_sentence_embedding_dimension()

    def add(self, texts:list, ids:list):
        if not self._model:
            raise RuntimeError("No embedding model available")
        emb = self._model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
        self.embeddings.append(emb)
        self.ids.extend(ids)

    def build_faiss(self):
        if not HAS_FAISS:
            raise RuntimeError("faiss not available")
        import faiss
        import numpy as np
        mat = np.vstack(self.embeddings)
        index = faiss.IndexFlatIP(mat.shape[1])
        faiss.normalize_L2(mat)
        index.add(mat)
        self._faiss = index

    def search(self, query_text, k=5):
        if not self._model:
            raise RuntimeError("No embedding model available")
        q_emb = self._model.encode([query_text], convert_to_numpy=True)
        import numpy as np
        faiss.normalize_L2(q_emb)
        D, I = self._faiss.search(q_emb, k)
        res = []
        for score, idx in zip(D[0].tolist(), I[0].tolist()):
            if idx < len(self.ids):
                res.append((self.ids[idx], float(score)))
        return res


# Example: build inverted index from df_clean
if 'df_clean' in globals():
    inv = InvertedIndex()
    for i,row in df_clean.iterrows():
        inv.add(int(row.row_id), row.subject, row.predicate, row.object, row.source)
    print("Inverted index built, docs=", len(inv.docs))


Inverted index built, docs= 20


## Section 7: Insert Triplets into SQLite and Index
Store cleaned triplets in a SQLite table and bulk-add to the in-memory index.

In [13]:
def init_db(db_path: "Path"):
    conn = sqlite3.connect(str(db_path))
    cur = conn.cursor()
    cur.execute('''
        CREATE TABLE IF NOT EXISTS triplets (
            id INTEGER PRIMARY KEY,
            subject TEXT,
            predicate TEXT,
            object TEXT,
            source TEXT
        )
    ''')
    conn.commit()
    return conn


def bulk_insert(conn: "sqlite3.Connection", df: "pd.DataFrame"):
    cur = conn.cursor()
    cur.execute("BEGIN TRANSACTION")
    for _,row in df.iterrows():
        cur.execute("INSERT INTO triplets (id,subject,predicate,object,source) VALUES (?,?,?,?,?)",
                    (int(row.row_id), row.subject, row.predicate, row.object, row.source))
    conn.commit()

# Example: initialize DB and insert
conn = init_db(SQLITE_DB)
if 'df_clean' in globals():
    bulk_insert(conn, df_clean)
    print("Inserted rows into SQLite")


Inserted rows into SQLite


## Section 8: Manual Querying - Exact and Pattern Match
Examples for exact lookups, predicate filters, and pattern matching using SQL and pandas.

In [14]:
# Exact lookup: subject

def sql_query(conn, sql, params=()):
    cur = conn.cursor()
    cur.execute(sql, params)
    cols = [c[0] for c in cur.description]
    rows = cur.fetchall()
    import pandas as pd
    return pd.DataFrame(rows, columns=cols)

if 'inv' in globals():
    print("Subject lookup for 'acute myocardial infarction':")
    res = inv.query_subject('acute myocardial infarction')
    for r in res[:5]:
        print(r)

# SQL pattern match
if conn:
    df_sql = sql_query(conn, "SELECT id,subject,predicate,object,source FROM triplets WHERE subject LIKE ? LIMIT 10", ("%infarction%",))
    display(df_sql)

# pandas pattern match
if 'df_clean' in globals():
    mm = df_clean[df_clean.subject.str.contains("infarction", na=False)]
    display(mm.head())


Subject lookup for 'acute myocardial infarction':


Unnamed: 0,id,subject,predicate,object,source


Unnamed: 0,subject,predicate,object,triplet_text,doc_type,doc_code,summary,source,row_id


## Section 9: Manual Querying - Predicate Filter and Multi-hop Lookups
Run chained traversals across predicates to find multi-hop relationships.

In [15]:
def multi_hop(inv_index: InvertedIndex, start_subject: str, predicates: list, max_hops:int=3):
    # predicates: list of predicate strings to follow in order (or None for any)
    paths = []
    frontier = [(start_subject, [start_subject])]
    for hop in range(min(max_hops, len(predicates))):
        next_frontier = []
        pred = predicates[hop]
        for node, path in frontier:
            # get docs where subject == node
            doc_ids = inv_index.by_subject.get(node, set())
            for did in doc_ids:
                doc = inv_index.docs[did]
                if pred is None or doc['predicate'] == pred:
                    next_node = doc['object']
                    next_frontier.append((next_node, path + [doc['predicate'], next_node]))
        frontier = next_frontier
        paths.extend(frontier)
    return paths

# Example multi-hop: subject->predicate->object->predicate->object
if 'inv' in globals():
    ph = multi_hop(inv, 'acute myocardial infarction', ['has_symptom', 'related_to'], max_hops=2)
    for p in ph:
        print(p)


## Section 10: Embedding-based Semantic Search (optional)
Compute embeddings for texts and run top-k semantic similarity if sentence-transformers is installed. Falls back to rapidfuzz-based fuzzy matching otherwise.

In [16]:
def build_embeddings_for_df(df: "pd.DataFrame", text_field: str='object') -> np.ndarray:
    if not HAS_SENT_TRANS:
        raise RuntimeError("sentence_transformers not installed")
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer(EMBEDDING_MODEL)
    texts = df[text_field].astype(str).tolist()
    emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
    return emb

# fallback fuzzy search
if not HAS_SENT_TRANS and HAS_RAPIDFUZZ:
    from rapidfuzz import fuzz, process


# Example: build embeddings if possible
if 'df_clean' in globals() and HAS_SENT_TRANS:
    emb = build_embeddings_for_df(df_clean, 'object')
    print('Embeddings shape:', emb.shape)


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Embeddings shape: (20, 384)


## Section 11: Run End-to-End Test Scenarios
Execute a set of example cells that run all scenarios and summarize results.

In [17]:
def scenario_exact_lookup(inv_index: InvertedIndex, subject: str):
    res = inv_index.query_subject(subject)
    print(f"Exact lookup for subject={subject}, found={len(res)}")
    return res


def scenario_pattern_search(conn, pattern: str):
    df = sql_query(conn, "SELECT id,subject,predicate,object,source FROM triplets WHERE object LIKE ? LIMIT 20", (f"%{pattern}%",))
    print(f"Pattern search for pattern={pattern}, rows={len(df)}")
    return df


def run_all_scenarios():
    results = {}
    if 'inv' in globals():
        results['exact'] = scenario_exact_lookup(inv, 'acute myocardial infarction')
    if conn:
        results['pattern'] = scenario_pattern_search(conn, 'infarction')
    if 'df_clean' in globals() and HAS_SENT_TRANS:
        # semantic example
        vid = SimpleVectorIndex()
        texts = df_clean['object'].tolist()
        ids = df_clean['row_id'].tolist()
        vid.add(texts, ids)
        try:
            vid.build_faiss()
            results['semantic'] = vid.search('heart attack symptoms', k=5)
        except Exception as e:
            print('Semantic index build failed:', e)
    return results

if __name__ == '__main__':
    out = run_all_scenarios()
    print('Scenarios completed, keys=', list(out.keys()))


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Exact lookup for subject=acute myocardial infarction, found=0
Pattern search for pattern=infarction, rows=0


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Semantic index build failed: name 'faiss' is not defined
Scenarios completed, keys= ['exact', 'pattern']
