# FindingModel Index

In [1]:
from pprint import pprint

from findingmodel import ChoiceAttributeIded, ChoiceValueIded, FindingModelFull, NumericAttributeIded
from findingmodel.index import Index, IndexEntry

`Index` is a list of the basic metadata about a model. It is stored in a MongoDB collection, 
which is configured using environment variables: `MONGODB_URI` (default: localhost connection) 
and `MONGODB_DB` (defaults to `findingmodels`). (The names of the collections for model index
entries, people, and organizations can also be configured.)

The index can be populated from a directory of `*.fm.json` files containing model definitions, 
such as at the [findingmodels repository](https://github.com/openimagingdata/findingmodels). 
There is also a method (`update_from_directory()`) that will examine a directory of model 
definitions and add new files, update changed files, and remove deleted files.

In [2]:
index = Index()

In [3]:
await index.count()

1949

Get an entry from the index (metadata only; use `load_model()` to actually get the finding model) using ID, name, or one of its synonyms.

In [4]:
entry = await index.get("abdominal aortic aneurysm")
assert isinstance(entry, IndexEntry)
print(entry.model_dump_json(indent=2))

{
  "oifm_id": "OIFM_MSFT_134126",
  "name": "abdominal aortic aneurysm",
  "slug_name": "abdominal_aortic_aneurysm",
  "filename": "abdominal_aortic_aneurysm.fm.json",
  "file_hash_sha256": "36fe838b81f63a5d5da6b1c1a2a30a1900444db36da3c9870f8a4f6276a5b6ec",
  "description": "An abdominal aortic aneurysm (AAA) is a localized dilation of the abdominal aorta, typically defined as a diameter greater than 3 cm, which can lead to rupture and significant morbidity or mortality.",
  "synonyms": [
    "AAA"
  ],
  "tags": null,
  "contributors": [
    "HeatherChase"
  ],
  "attributes": [
    {
      "attribute_id": "OIFMA_MSFT_898601",
      "name": "presence",
      "type": "choice"
    },
    {
      "attribute_id": "OIFMA_MSFT_783072",
      "name": "change from prior",
      "type": "choice"
    }
  ]
}


## Add/Remove Model

Note that adding a model performs a number of checks, especially for duplicate IDs, duplicated names, duplicate synonyms.

In [5]:
new_model = FindingModelFull(
    oifm_id="OIFM_TEST_123456",
    name="Test Model",
    description="A simple test finding model.",
    synonyms=["Test Synonym"],
    tags=["tag1", "tag2"],
    attributes=[
        ChoiceAttributeIded(
            oifma_id="OIFMA_TEST_123456",
            name="Severity",
            description="How severe is the finding?",
            values=[
                ChoiceValueIded(value_code="OIFMA_TEST_123456.0", name="Mild"),
                ChoiceValueIded(value_code="OIFMA_TEST_123456.1", name="Severe"),
            ],
            required=True,
            max_selected=1,
        ),
        NumericAttributeIded(
            oifma_id="OIFMA_TEST_654321",
            name="Size",
            description="Size of the finding.",
            minimum=1,
            maximum=10,
            unit="cm",
            required=False,
        ),
    ],
)
with open("test_model.fm.json", "w") as f:
    f.write(new_model.model_dump_json(indent=2))

In [6]:
await index.add_or_update_entry_from_file("test_model.fm.json")

<IndexReturnType.ADDED: 'added'>

In [7]:
await index.count()

1950

In [8]:
await index.remove_entry("Test Model")  # Can either use name or OIFM ID
await index.count()

1949

## Synchronize with a Definition Directory

You can use `update_from_directory()` to update the state of the index from the definition files in 
directory.

In [12]:
from pathlib import Path

added, updated, removed = await index.update_from_directory(Path.cwd().parent.parent / "findingmodels" / "defs")

In [13]:
print(added, updated, removed)

0 0 0


In [14]:
await index.count()

1949

## Name Search

Look for hits fuzzily matching a target string. Might be useful for finding potential duplicates before inserting, or just to quickly
search in general. Hits can come from the name or synonyms (not description).

In [15]:
results = await index.search("abdominal")
pprint(results)

[IndexEntry(oifm_id='OIFM_GMTS_002860', name='large abdominal gas pocket', slug_name='large_abdominal_gas_pocket', filename='large_abdominal_gas_pocket.fm.json', file_hash_sha256='1774124e4b0d080e82c6f59dfbb1573176f843bc52f468ee26313fb2c2167274', description='A substantial amount of gas in the abdominal cavity, often secondary to perforation or surgery.', synonyms=['Abdominal pneumoperitoneum', 'Gas under diaphragm'], tags=['abdomen', 'CT', 'XR', 'abdominal', 'air-filled', 'finding'], contributors=['GMTS'], attributes=[AttributeInfo(attribute_id='OIFMA_GMTS_816071', name='presence', type='choice'), AttributeInfo(attribute_id='OIFMA_GMTS_316586', name='change from prior', type='choice')]),
 IndexEntry(oifm_id='OIFM_GMTS_005202', name='layering abdominal calcifications', slug_name='layering_abdominal_calcifications', filename='layering_abdominal_calcifications.fm.json', file_hash_sha256='d76f3142c89e1bd70a2a8270a7fa919c2be159eade4ce2d840051f526f4532b1', description='Calcifications seen w

In [16]:
results = await index.search("breast")
pprint(results)

[IndexEntry(oifm_id='OIFM_MSFT_914493', name='breast calcification cluster', slug_name='breast_calcification_cluster', filename='breast_calcification_cluster.fm.json', file_hash_sha256='b17373bc40f53923d0e82e7bcbb4f3de3d15c33209978bf213d4e47d5c425add', description='Breast calcification clusters are typically a sign of benign changes in breast tissue but can sometimes indicate malignancy.', synonyms=['breast calcifications'], tags=['breast', 'calcification'], contributors=['HeatherChase'], attributes=[AttributeInfo(attribute_id='OIFMA_MSFT_661936', name='calcification cluster presence', type='choice'), AttributeInfo(attribute_id='OIFMA_MSFT_742222', name='location', type='choice'), AttributeInfo(attribute_id='OIFMA_MSFT_661937', name='number of calcifications', type='numeric')]),
 IndexEntry(oifm_id='OIFM_MSFT_356221', name='Breast density', slug_name='breast_density', filename='breast_density.fm.json', file_hash_sha256='416ba8b8166a7fdd0278ad040f00f89066c0d2e7ad936a47846348df9be567a5',

In [17]:
results = await index.search("mammogram")
pprint(results)

[IndexEntry(oifm_id='OIFM_MSFT_356221', name='Breast density', slug_name='breast_density', filename='breast_density.fm.json', file_hash_sha256='416ba8b8166a7fdd0278ad040f00f89066c0d2e7ad936a47846348df9be567a5', description='Breast density refers to the proportion of fatty tissue to fibroglandular tissue in the breast as seen on a mammogram.', synonyms=['Mammographic density', 'Breast tissue density'], tags=None, contributors=['HeatherChase'], attributes=[AttributeInfo(attribute_id='OIFMA_MSFT_806368', name='density score', type='numeric'), AttributeInfo(attribute_id='OIFMA_MSFT_765716', name='density category', type='choice')])]
