In [1]:
import motor.motor_asyncio
import pymongo
from dotenv import dotenv_values
from openai import AsyncOpenAI, OpenAI
from openimagingdatamodel.ontology_tools.embedding_creator import AsyncEmbeddingCreator, EmbeddingCreator
from openimagingdatamodel.ontology_tools.snomedct_concept import SnomedCTConcept
from openimagingdatamodel.ontology_tools.snomedct_concept_repo import AsyncSnomedCTConceptRepo, SnomedCTConceptRepo

In [2]:
config = dotenv_values(".env")

## Asynchronous


### Load Env and Database


In [9]:
client = motor.motor_asyncio.AsyncIOMotorClient(config["ATLAS_DSN"])
# Ping the client to confirm
print(await client.server_info())

{'version': '7.0.11', 'gitVersion': 'f451220f0df2b9dfe073f1521837f8ec5c208a8c', 'modules': ['enterprise'], 'allocator': 'tcmalloc', 'javascriptEngine': 'mozjs', 'sysInfo': 'deprecated', 'versionArray': [7, 0, 11, 0], 'bits': 64, 'debug': False, 'maxBsonObjectSize': 16777216, 'storageEngines': ['devnull', 'inMemory', 'queryable_wt', 'wiredTiger'], 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1718372521, 33), 'signature': {'hash': b'\xad\x87\x81\xcfO\xcf\xcb\x97\xc0g\xc8\xfce\x11\x8e\x0f^?\xee\xe1', 'keyId': 7327016299177967635}}, 'operationTime': Timestamp(1718372521, 33)}


In [10]:
db = client["ontologies"]
snomedct_collection = db["snomedct"]
snomed_concept_repo = AsyncSnomedCTConceptRepo(snomedct_collection)
SNOMED_CONCEPT_COUNT = await snomed_concept_repo.get_count()
print(f"SNOMED CT COUNT: {SNOMED_CONCEPT_COUNT}")

SNOMED CT COUNT: 508540


Initialize SnomedCTConcept


In [12]:
concept: SnomedCTConcept = (await snomed_concept_repo.get_random_concepts(1))[0]
print(concept)

concept_id='41956007' effective_date=datetime.date(2017, 7, 31) modules=[<SnomedCTModule.SNOMED_CT_CORE: 'SNOMED-CT-core'>] embedding_vector=None language_code='en' preferred_term='Propylene imide' terms=[] case_significance=<CaseSignificance.INSENSITIVE: 'insensitive'> definitions=None


Asynchronous Generate Embedding Creator


In [16]:
async_openai_client = AsyncOpenAI(api_key=config["OPENAI_API_KEY"])
async_embedding_creator = AsyncEmbeddingCreator(async_openai_client)
embedding_vector = await async_embedding_creator.create_embedding_for_snomedctconcept(concept)
print(embedding_vector)

[-0.05559616908431053, -0.025808831676840782, -0.014190570451319218, 0.05611063167452812, 0.022842101752758026, 0.055973440408706665, -0.012878692708909512, 0.002632329473271966, -0.009577563032507896, 0.028844157233834267, 0.015150899067521095, 0.002630185801535845, 0.04582139477133751, -0.041534215211868286, -0.0351891852915287, -0.007271059788763523, -0.054327163845300674, -0.029410064220428467, -0.03151935711503029, -0.0077469367533922195, 0.03488050773739815, -0.06276433914899826, 0.021710285916924477, -0.050417251884937286, 0.026066062971949577, 0.0024908524937927723, 0.004574422724545002, 0.004104976076632738, -0.007412536535412073, 0.015708232298493385, -0.022036112844944, -0.0005393809988163412, 0.01932661421597004, 0.07099572569131851, -0.002565877977758646, -0.02210470661520958, -0.023047886788845062, 0.0009694388718344271, 0.037727195769548416, -0.03752141073346138, -0.0018145495560020208, 0.006417910568416119, -0.0002696904994081706, 0.030096013098955154, 0.031828034669160

Update SnomedCT Embedded Vector value


In [17]:
concept_result = await snomed_concept_repo.write_embedding_vector(concept, embedding_vector)
if concept_result:
    print(concept)
else:
    print("Failed to write concept")

concept_id='41956007' effective_date=datetime.date(2017, 7, 31) modules=[<SnomedCTModule.SNOMED_CT_CORE: 'SNOMED-CT-core'>] embedding_vector=[-0.05559616908431053, -0.025808831676840782, -0.014190570451319218, 0.05611063167452812, 0.022842101752758026, 0.055973440408706665, -0.012878692708909512, 0.002632329473271966, -0.009577563032507896, 0.028844157233834267, 0.015150899067521095, 0.002630185801535845, 0.04582139477133751, -0.041534215211868286, -0.0351891852915287, -0.007271059788763523, -0.054327163845300674, -0.029410064220428467, -0.03151935711503029, -0.0077469367533922195, 0.03488050773739815, -0.06276433914899826, 0.021710285916924477, -0.050417251884937286, 0.026066062971949577, 0.0024908524937927723, 0.004574422724545002, 0.004104976076632738, -0.007412536535412073, 0.015708232298493385, -0.022036112844944, -0.0005393809988163412, 0.01932661421597004, 0.07099572569131851, -0.002565877977758646, -0.02210470661520958, -0.023047886788845062, 0.0009694388718344271, 0.0377271957

## Synchronous


Setup Database/Repo


In [3]:
# Set up a pymongo client, not a motor client
client = pymongo.MongoClient(config["ATLAS_DSN"])
# Ping the client to confirm
print(client.server_info())

{'version': '7.0.11', 'gitVersion': 'f451220f0df2b9dfe073f1521837f8ec5c208a8c', 'modules': ['enterprise'], 'allocator': 'tcmalloc', 'javascriptEngine': 'mozjs', 'sysInfo': 'deprecated', 'versionArray': [7, 0, 11, 0], 'bits': 64, 'debug': False, 'maxBsonObjectSize': 16777216, 'storageEngines': ['devnull', 'inMemory', 'queryable_wt', 'wiredTiger'], 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1718373019, 2), 'signature': {'hash': b'w\x96\x9e\x0e@0\x0f\r\xb8?\xe7\x04\xef\x05hP\xd5\x08\x0bg', 'keyId': 7327016299177967635}}, 'operationTime': Timestamp(1718373019, 2)}


In [4]:
snomed_concept_repo = SnomedCTConceptRepo(client["ontologies"]["snomedct"])
SNOMED_CONCEPT_COUNT = snomed_concept_repo.get_count()
print(f"SNOMED CT COUNT: {SNOMED_CONCEPT_COUNT}")

SNOMED CT COUNT: 508540


In [5]:
concept = snomed_concept_repo.get_random_concepts(1)[0]
concept

SnomedCTConcept(concept_id='265499009', effective_date=datetime.date(2017, 7, 31), modules=[<SnomedCTModule.SNOMED_CT_CORE: 'SNOMED-CT-core'>], embedding_vector=None, language_code='en', preferred_term='Transluminal operations on aorta', terms=[], case_significance=<CaseSignificance.INSENSITIVE: 'insensitive'>, definitions=None)

Synchronous Embedding Creator call


In [6]:
openai_client = OpenAI(api_key=config["OPENAI_API_KEY"])
embedding_creator = EmbeddingCreator(openai_client)
embedding_vector = embedding_creator.create_embedding_for_snomedctconcept(concept)
print(embedding_vector)

[0.014896269887685776, 0.00478437589481473, -0.015226878225803375, 0.003742959350347519, -0.005903721321374178, 0.04893004149198532, -0.028829053044319153, 0.020252125337719917, -0.014754580333828926, -0.002000180771574378, 0.07092022150754929, 0.04201560467481613, -0.006347681395709515, -0.027941131964325905, -0.011731875129044056, -0.013403808698058128, -0.006338235456496477, 0.014083917252719402, 0.008591094985604286, -0.0642324909567833, -0.006579107139259577, 0.02582523785531521, -0.01904304325580597, -0.03644249215722084, -0.02437056228518486, 0.03068045899271965, 0.01842905580997467, 0.026467563584446907, 0.04915674403309822, -0.01741833984851837, 0.012619794346392155, 0.015122972428798676, 0.022141316905617714, 0.05512658879160881, 0.021215613931417465, -0.016757123172283173, 0.014518431387841702, 0.016388731077313423, -0.0003176202008035034, -0.009927697479724884, 0.006050133612006903, -0.0020615796092897654, -0.015925878658890724, 0.045718416571617126, 0.012837051413953304, -

Write Back to Database


In [7]:
concept_result = snomed_concept_repo.write_embedding_vector(concept, embedding_vector)
if concept_result:
    print(concept)
else:
    print("Failed to write concept")

concept_id='265499009' effective_date=datetime.date(2017, 7, 31) modules=[<SnomedCTModule.SNOMED_CT_CORE: 'SNOMED-CT-core'>] embedding_vector=[0.014896269887685776, 0.00478437589481473, -0.015226878225803375, 0.003742959350347519, -0.005903721321374178, 0.04893004149198532, -0.028829053044319153, 0.020252125337719917, -0.014754580333828926, -0.002000180771574378, 0.07092022150754929, 0.04201560467481613, -0.006347681395709515, -0.027941131964325905, -0.011731875129044056, -0.013403808698058128, -0.006338235456496477, 0.014083917252719402, 0.008591094985604286, -0.0642324909567833, -0.006579107139259577, 0.02582523785531521, -0.01904304325580597, -0.03644249215722084, -0.02437056228518486, 0.03068045899271965, 0.01842905580997467, 0.026467563584446907, 0.04915674403309822, -0.01741833984851837, 0.012619794346392155, 0.015122972428798676, 0.022141316905617714, 0.05512658879160881, 0.021215613931417465, -0.016757123172283173, 0.014518431387841702, 0.016388731077313423, -0.0003176202008035