In [7]:
from findingmodelforge.config import settings
import lancedb
from lancedb.pydantic import LanceModel, Vector
from lancedb.embeddings import get_registry, EmbeddingFunctionRegistry, OpenAIEmbeddings

In [2]:
db_conn = lancedb.connect(settings.lancedb_uri)

In [18]:
db_conn.table_names()

['finding_models']

In [19]:
db_conn.drop_table("finding_models")

In [9]:
def get_embedding_function() -> OpenAIEmbeddings:
    registry: EmbeddingFunctionRegistry = get_registry()
    embedding: OpenAIEmbeddings = registry.get("openai")
    model = embedding.create(
        name=settings.lancedb_embeddings_model,
        api_key=settings.openai_api_key.get_secret_value(),
    )
    return model


embedding_func = get_embedding_function()

In [20]:
class FindingModel(LanceModel):
    model_id: str
    name: str
    tags: list[str]
    text: str = embedding_func.SourceField()
    vector: Vector(embedding_func.ndims()) = embedding_func.VectorField()  # type: ignore

In [21]:
fm_table = db_conn.create_table("finding_models", schema=FindingModel, mode="overwrite")

[2024-12-27T22:31:26Z WARN  lance::dataset::write::insert] No existing dataset at /Users/talkasab/repos/FindingModelForge/packages/findingmodelforge/data/semanticdb/finding_models.lance, it will be created


In [None]:
from findingmodelforge.models.finding_model_db import FindingModelDb
from motor.motor_asyncio import AsyncIOMotorClient
from beanie import init_beanie


async def init_db() -> None:
    client = AsyncIOMotorClient(settings.mongo_dsn.get_secret_value())
    db = client.get_database(settings.database_name)
    await init_beanie(db, document_models=[FindingModelDb])


await init_db()

In [22]:
await FindingModelDb.count()

2

In [23]:
data_to_load = []
async for fm in FindingModelDb.find():
    data = {
        "model_id": str(fm.id),
        "tags": fm.tags,
        "name": fm.name,
        "text": f"{fm.name}\n\nDescription: {fm.description}",
    }
    data_to_load.append(data)
data_to_load

[{'model_id': '676d72d634391d71cadb112a',
  'tags': ['knee', 'mri'],
  'name': 'ACL tear',
  'text': 'ACL tear\n\nDescription: An ACL tear refers to the rupture or complete tear of the anterior cruciate ligament, a critical stabilizing ligament in the knee, often resulting from sports-related injuries and characterized by knee instability, swelling, and pain.'},
 {'model_id': '676dc0fe02e361bc820f2e5e',
  'tags': ['knee', 'mri'],
  'name': 'medial meniscus tear',
  'text': 'medial meniscus tear\n\nDescription: A medial meniscus tear is a common knee injury characterized by a disruption of the fibrous cartilage located on the inner side of the knee joint, often resulting from trauma or degeneration, which may lead to pain, swelling, and mechanical symptoms such as locking or limited range of motion.'}]

In [12]:
fm_table.add(data_to_load)

In [None]:
results = words_table.search("acl").to_list()
for result in results:
    print(f"{result['text']} (model_id: {result['model_id']}) - {result['_distance']:0.2f}")

ACL tear (model_id: 1) - 0.94
medial meniscus tear (model_id: 2) - 1.53


In [14]:
results[0].keys()

dict_keys(['model_id', 'text', 'vector', '_distance'])