In [1]:
from os import path

PROJECT_ROOT = path.abspath(path.join(globals()['_dh'][0], '..'))
DATALAKE_PATH = path.abspath(path.join(PROJECT_ROOT, '..', '..', 'datalake'))
CBP_PROCESSED = path.abspath(path.join(DATALAKE_PATH, 'contrastivebertproj', 'processed'))
MODEL_DIR = path.abspath(path.join(PROJECT_ROOT, 'contrastivebert', 'classifier'))

EXT_INDEX_PATH = path.join(PROJECT_ROOT, 'contrastivebert', 'ext_index')

FQFN_PROCESSED_DF = path.join(CBP_PROCESSED, 'flowers_dataset_with_descriptions.jsonl')

In [2]:
import base64
import os

import pandas as pd


def encode_text_body(text_body: str | bytes) -> str:
    """
    Returns Base64-encoded string version of a UTF-8 string or bytes representing a UTF-8 string
    """
    if isinstance(text_body, bytes):
        return base64.b64encode(text_body).decode('utf-8')
    else:
        return base64.b64encode(text_body.encode('utf-8')).decode('utf-8')


def text_as_base64(fqfn: str) -> tuple[str, str]:
    with open(fqfn, 'r') as f:
        text_bytes = f.read()
        b64str = encode_text_body(text_bytes)
        return text_bytes, b64str


def read(k: int | None = 3) -> pd.DataFrame:
    df = pd.read_json(FQFN_PROCESSED_DF, orient='records', lines=True)
    if k is not None:
        return df.sample(n=k)
    else:
        return df

In [3]:
df = read(k=5)
df

Unnamed: 0,FLOWER_NAME,PETAL_COLOR,PETAL_NUMBER,STEM_LENGTH,LEAF_SHAPE,HUMAN_DESCRIPTION
59,Orchid,red,14,46.9,spatulate,"A red orchid with 14 petals, a 46.9 cm stem, a..."
97,Carnation,white,14,11.1,linear,"A white carnation with 14 petals, a 11.1 cm st..."
78,Marigold,blue,4,42.5,elliptic,"A blue marigold with 4 petals, a 42.5 cm stem,..."
41,Lily,orange,4,42.6,spatulate,"A orange lily with 4 petals, a 42.6 cm stem, a..."
60,Chrysanthemum,purple,14,49.9,oblong,"A purple chrysanthemum with 14 petals, a 49.9 ..."


In [4]:
from contrastivebert.classifier.inf_server_driver import InferenceServerDriver

inf_server = InferenceServerDriver(fqfn_index=EXT_INDEX_PATH)

2025-07-09 22:26:32,217 - tensorcraft - INFO - XLA Device Not Supported: No module named 'torch_xla'
2025-07-09 22:26:32,228 - tensorcraft - INFO - Pytorch version=2.6.0 preferred device=mps build with MPS support=True
2025-07-09 22:26:32,245 - tensorcraft - INFO - resolved device_name: mps compute_device: mps tensor_device: mps
2025-07-09 22:26:32,246 - tensorcraft - INFO - LM Components are being loaded from /Users/shershen/workspace/tensorcraft/ext_models/microsoft/graphcodebert-base...
2025-07-09 22:26:32,468 - tensorcraft - INFO - LM Components successfully loaded for mps compute architecture
2025-07-09 22:26:32,671 - tensorcraft - INFO - ONNX Session InferenceSession initialized with providers ['CPUExecutionProvider'].


In [None]:
df_outcome = inf_server.predict(inputs=df)
print(df_outcome)

In [None]:
retrieved = df_outcome['HUMAN_DESCRIPTION']

indexed_records = "\n".join(f"{i + 1}. {req}" for i, req in enumerate(retrieved))
print(indexed_records)