To resolve potential Python Process Crash: EXC_CRASH (SIGABRT) due to an explicit call to abort() inside Intel’s OpenMP runtime `libomp.dylib`:

* find the instances of the `libomp` libraries in the Virtual Environment:
```shell
find "$(python3 -c 'import site; print(site.getsitepackages()[0])')" \
     -name 'libomp*.dylib' -exec ls -al {} \;
```

* inactivate them
```shell
cd /Users/<PATH_TO_PROJECT>/.venv/lib/python3.12/site-packages

mv torch/lib/libomp.dylib       torch/lib/libomp.dylib.bak
mv sklearn/.dylibs/libomp.dylib sklearn/.dylibs/libomp.dylib.bak
mv faiss/.dylibs/libomp.dylib   faiss/.dylibs/libomp.dylib.bak
```

* install single `brew` version of the `libomp`:
```shell
brew install libomp
```

* enable single version of the `libomp`:
```shell
ln -sf /opt/homebrew/opt/libomp/lib/libomp.dylib torch/lib/libomp.dylib
ln -sf /opt/homebrew/opt/libomp/lib/libomp.dylib sklearn/.dylibs/libomp.dylib
ln -sf /opt/homebrew/opt/libomp/lib/libomp.dylib faiss/.dylibs/libomp.dylib
```

* restart IDE

In [1]:
from os import path

PROJECT_ROOT = path.abspath(path.join(globals()['_dh'][0], '..'))
DATALAKE_PATH = path.abspath(path.join(PROJECT_ROOT, '..', '..', 'datalake'))
CBP_PROCESSED = path.abspath(path.join(DATALAKE_PATH, 'contrastivebertproj', 'processed'))
MODEL_DIR = path.abspath(path.join(PROJECT_ROOT, 'contrastivebert', 'classifier'))
EXT_INDEX_PATH = path.join(PROJECT_ROOT, 'contrastivebert', 'ext_index')

FQFN_PROCESSED_DF = path.join(CBP_PROCESSED, 'flowers_dataset_with_descriptions.jsonl')


In [2]:
import base64
import os

import pandas as pd


def encode_text_body(text_body: str | bytes) -> str:
    """
    Returns Base64-encoded string version of a UTF-8 string or bytes representing a UTF-8 string
    """
    if isinstance(text_body, bytes):
        return base64.b64encode(text_body).decode('utf-8')
    else:
        return base64.b64encode(text_body.encode('utf-8')).decode('utf-8')


def text_as_base64(fqfn: str) -> tuple[str, str]:
    with open(fqfn, 'r') as f:
        text_bytes = f.read()
        b64str = encode_text_body(text_bytes)
        return text_bytes, b64str


def read(k: int | None = 3) -> pd.DataFrame:
    df = pd.read_json(FQFN_PROCESSED_DF, orient='records', lines=True)
    if k is not None:
        return df.sample(n=k)
    else:
        return df


In [3]:
df = read(100)
df

Unnamed: 0,FLOWER_NAME,PETAL_COLOR,PETAL_NUMBER,STEM_LENGTH,LEAF_SHAPE,HUMAN_DESCRIPTION
80,Carnation,red,16,16.4,spatulate,"A red carnation with 16 petals, a 16.4 cm stem..."
1,Daisy,blue,12,9.6,ovate,"A blue daisy with 12 petals, a 9.6 cm stem, an..."
5,Rose,purple,14,5.1,spatulate,"A purple rose with 14 petals, a 5.1 cm stem, a..."
82,Carnation,red,17,43.7,lanceolate,"A red carnation with 17 petals, a 43.7 cm stem..."
17,Rose,orange,17,27.2,lanceolate,"A orange rose with 17 petals, a 27.2 cm stem, ..."
...,...,...,...,...,...,...
2,Daisy,orange,12,14.6,oblong,"A orange daisy with 12 petals, a 14.6 cm stem,..."
62,Chrysanthemum,pink,7,42.9,cordate,"A pink chrysanthemum with 7 petals, a 42.9 cm ..."
96,Marigold,yellow,11,48.9,spatulate,"A yellow marigold with 11 petals, a 48.9 cm st..."
57,Orchid,red,4,37.5,lanceolate,"A red orchid with 4 petals, a 37.5 cm stem, an..."


In [4]:
from contrastivebert.classifier.inf_server_driver import InferenceServerDriver

inf_server = InferenceServerDriver(fqfn_index=EXT_INDEX_PATH)
df['pooler_output'] = inf_server.build_embeddings(inputs=df)['pooler_output'].values
df.to_json(path.join(EXT_INDEX_PATH, 'flower_embeddings.jsonl'), orient='records', lines=True)


2025-07-09 22:22:23,596 - tensorcraft - INFO - XLA Device Not Supported: No module named 'torch_xla'
2025-07-09 22:22:23,609 - tensorcraft - INFO - Pytorch version=2.6.0 preferred device=mps build with MPS support=True
2025-07-09 22:22:23,623 - tensorcraft - INFO - resolved device_name: mps compute_device: mps tensor_device: mps
2025-07-09 22:22:23,623 - tensorcraft - INFO - LM Components are being loaded from /Users/shershen/workspace/tensorcraft/ext_models/microsoft/graphcodebert-base...
2025-07-09 22:22:23,846 - tensorcraft - INFO - LM Components successfully loaded for mps compute architecture
2025-07-09 22:22:24,043 - tensorcraft - INFO - ONNX Session InferenceSession initialized with providers ['CPUExecutionProvider'].


In [5]:
import pandas as pd

df = pd.read_json(path.join(EXT_INDEX_PATH, 'flower_embeddings.jsonl'), orient='records', lines=True)
df

Unnamed: 0,FLOWER_NAME,PETAL_COLOR,PETAL_NUMBER,STEM_LENGTH,LEAF_SHAPE,HUMAN_DESCRIPTION,text_body,pooler_output
0,Carnation,red,16,16.4,spatulate,"A red carnation with 16 petals, a 16.4 cm stem...",Flower: Carnation | Structure: [PETAL_COLOR: r...,"[-0.2749658823, 0.37554156780000003, -0.197379..."
1,Daisy,blue,12,9.6,ovate,"A blue daisy with 12 petals, a 9.6 cm stem, an...",Flower: Daisy | Structure: [PETAL_COLOR: blue]...,"[-0.2742287517, 0.37130492930000003, -0.196920..."
2,Rose,purple,14,5.1,spatulate,"A purple rose with 14 petals, a 5.1 cm stem, a...",Flower: Rose | Structure: [PETAL_COLOR: purple...,"[-0.2722599804, 0.3803739846, -0.1933833212, -..."
3,Carnation,red,17,43.7,lanceolate,"A red carnation with 17 petals, a 43.7 cm stem...",Flower: Carnation | Structure: [PETAL_COLOR: r...,"[-0.275747478, 0.37138283250000004, -0.1978426..."
4,Rose,orange,17,27.2,lanceolate,"A orange rose with 17 petals, a 27.2 cm stem, ...",Flower: Rose | Structure: [PETAL_COLOR: orange...,"[-0.2757126391, 0.3742625713, -0.1966705322, -..."
...,...,...,...,...,...,...,...,...
95,Daisy,orange,12,14.6,oblong,"A orange daisy with 12 petals, a 14.6 cm stem,...",Flower: Daisy | Structure: [PETAL_COLOR: orang...,"[-0.27521273490000003, 0.3776193559, -0.196383..."
96,Chrysanthemum,pink,7,42.9,cordate,"A pink chrysanthemum with 7 petals, a 42.9 cm ...",Flower: Chrysanthemum | Structure: [PETAL_COLO...,"[-0.26950579880000003, 0.36528873440000004, -0..."
97,Marigold,yellow,11,48.9,spatulate,"A yellow marigold with 11 petals, a 48.9 cm st...",Flower: Marigold | Structure: [PETAL_COLOR: ye...,"[-0.2697029412, 0.37630996110000003, -0.198022..."
98,Orchid,red,4,37.5,lanceolate,"A red orchid with 4 petals, a 37.5 cm stem, an...",Flower: Orchid | Structure: [PETAL_COLOR: red]...,"[-0.2764808834, 0.3713231087, -0.1967390180000..."


In [6]:
import faiss
faiss.omp_set_num_threads(1)

from contrastivebert.index.hnsw_index import HNSWIndexWrapper
from contrastivebert.classifier.contrastivebert_configuration import ModelConf

indexer = HNSWIndexWrapper(ModelConf.output_size)
indexer.fit_index(df)

In [7]:
indexer.save_index(path.join(EXT_INDEX_PATH, 'index.hnsw'), path.join(EXT_INDEX_PATH, 'metadata.json'))

In [8]:
import sys
def clean_modules(query_name: str = 'hnsw_index'):
    candidates: list[str] = [
        name
        for name in list(sys.modules.keys())
        if query_name in name
    ]
    print(f'Unloading {candidates} from sys.modules')

    for module_name in candidates:
        try:
            del sys.modules[module_name]
        except KeyError:
            print(f'Warning: {module_name!r} was not found in sys.modules')


clean_modules()

Unloading ['contrastivebert.index.hnsw_index'] from sys.modules


In [9]:
import pandas as pd
from os import path
from contrastivebert.classifier.contrastivebert_configuration import ModelConf
from contrastivebert.index.hnsw_index import HNSWIndexWrapper

df = pd.read_json(path.join(EXT_INDEX_PATH, 'flower_embeddings.jsonl'), orient='records', lines=True)

q_sample = df.sample(n=5)[['pooler_output']]
print(f'{q_sample.shape=}')
display(q_sample)

indexer = HNSWIndexWrapper(ModelConf.output_size)
indexer.load_index(path.join(EXT_INDEX_PATH, 'index.hnsw'), path.join(EXT_INDEX_PATH, 'metadata.json'))

indexer.search(q_sample)

q_sample.shape=(5, 1)


Unnamed: 0,pooler_output
21,"[-0.2763001323, 0.3825729191, -0.1888462603, -..."
91,"[-0.2768054008, 0.3779165447, -0.1903408021, -..."
49,"[-0.268316865, 0.37197229270000004, -0.1907279..."
22,"[-0.2710065544, 0.3760875165, -0.1974271387, -..."
33,"[-0.2716973424, 0.3840194941, -0.1970253289000..."


Unnamed: 0,query_embeddings,rank,distance,FLOWER_NAME,PETAL_COLOR,PETAL_NUMBER,STEM_LENGTH,LEAF_SHAPE,HUMAN_DESCRIPTION
0,"[-0.27630013, 0.38257292, -0.18884626, -0.3628...",0,0.0,Rose,purple,17,37.5,oblong,"A purple rose with 17 petals, a 37.5 cm stem, ..."
1,"[-0.27630013, 0.38257292, -0.18884626, -0.3628...",1,0.007247,Daisy,orange,12,14.6,oblong,"A orange daisy with 12 petals, a 14.6 cm stem,..."
2,"[-0.27630013, 0.38257292, -0.18884626, -0.3628...",2,0.008482,Lily,white,12,47.1,oblong,"A white lily with 12 petals, a 47.1 cm stem, a..."
3,"[-0.27630013, 0.38257292, -0.18884626, -0.3628...",3,0.0086,Lily,yellow,14,23.9,oblong,"A yellow lily with 14 petals, a 23.9 cm stem, ..."
4,"[-0.27630013, 0.38257292, -0.18884626, -0.3628...",4,0.008709,Lily,purple,10,15.2,oblong,"A purple lily with 10 petals, a 15.2 cm stem, ..."
5,"[-0.2768054, 0.37791654, -0.1903408, -0.359137...",0,0.0,Iris,yellow,9,21.6,elliptic,"A yellow iris with 9 petals, a 21.6 cm stem, a..."
6,"[-0.2768054, 0.37791654, -0.1903408, -0.359137...",1,0.005373,Iris,pink,17,14.3,elliptic,"A pink iris with 17 petals, a 14.3 cm stem, an..."
7,"[-0.2768054, 0.37791654, -0.1903408, -0.359137...",2,0.007493,Daisy,orange,14,44.3,elliptic,"A orange daisy with 14 petals, a 44.3 cm stem,..."
8,"[-0.2768054, 0.37791654, -0.1903408, -0.359137...",3,0.007998,Daisy,blue,19,32.6,elliptic,"A blue daisy with 19 petals, a 32.6 cm stem, a..."
9,"[-0.2768054, 0.37791654, -0.1903408, -0.359137...",4,0.008717,Tulip,orange,10,21.4,elliptic,"A orange tulip with 10 petals, a 21.4 cm stem,..."
