In [1]:
import random
from openai import OpenAI, AsyncOpenAI
import motor.motor_asyncio
from dotenv import dotenv_values
from openimagingdatamodel.ontology_tools.snomedct_concept import SnomedCTConcept
from openimagingdatamodel.ontology_tools.snomedct_concept_repo import SnomedCTConceptRepo
from openimagingdatamodel.ontology_tools.embedding_creator import EmbeddingCreator, AsyncEmbeddingCreator

Load Env and Database


In [2]:
config = dotenv_values(".env")
client = motor.motor_asyncio.AsyncIOMotorClient(config["ATLAS_DSN"])
# Ping the client to confirm
print(await client.server_info())  #

{'version': '7.0.11', 'gitVersion': 'f451220f0df2b9dfe073f1521837f8ec5c208a8c', 'modules': ['enterprise'], 'allocator': 'tcmalloc', 'javascriptEngine': 'mozjs', 'sysInfo': 'deprecated', 'versionArray': [7, 0, 11, 0], 'bits': 64, 'debug': False, 'maxBsonObjectSize': 16777216, 'storageEngines': ['devnull', 'inMemory', 'queryable_wt', 'wiredTiger'], 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1718281065, 5), 'signature': {'hash': b'F\xe0\xc8\x1c{\x199\x02\xde`\xaa\\T,\xb2\xd9@\x89B[', 'keyId': 7327016299177967635}}, 'operationTime': Timestamp(1718281065, 5)}


In [3]:
db = client["ontologies"]
snomedct_collection = db["snomedct"]
SNOMED_CONCEPT_COUNT = await snomedct_collection.count_documents({})
print(f"SNOMED CT COUNT: {SNOMED_CONCEPT_COUNT}")

SNOMED CT COUNT: 508540


Generate Embedding


In [4]:
openai_client = OpenAI(api_key=config["OPENAI_EMBEDDING_API_KEY"])
async_openai_client = AsyncOpenAI(api_key=config["OPENAI_EMBEDDING_API_KEY"])


Initialize SnomedCTConcept 

In [5]:
#raw_doc = await snomedct_collection.find_one({}, skip=random.randint(0, snomedct_concept))
one_raw_doc = await snomedct_collection.find_one({'_id': '100005'})
print(one_raw_doc)
concept = SnomedCTConcept(**one_raw_doc)

{'_id': '100005', 'conceptId': '100005', 'effectiveDate': '2008-07-31', 'modules': ['SNOMED-CT-core'], 'languageCode': 'en', 'preferredTerm': 'SNOMED RT Concept', 'terms': ['SNOMED RT Version 1.0 Concept'], 'semanticTags': ['special concept'], 'caseSignificance': 'sensitive', 'definitions': None, 'embedding_vector': [-0.011368467, 0.029763978, -0.023504255, 0.037336227, -0.03957761, -0.008107352, 0.0036700156, 0.02711874, 0.008531398, -0.023807144, 0.022575391, -0.05112781, 0.010853554, -0.018486379, -0.0065777586, 0.08064948, -0.076247476, 0.0151041085, 0.037921812, -0.030753419, -0.023100402, 0.024271576, -0.001195784, 0.0047250823, -0.013125228, 0.00061366754, 0.014064186, 0.015659407, -0.022575391, 0.0030718083, -0.024715815, -0.02841107, 0.03448906, 0.023988878, 0.028592804, 0.047493134, -0.0024067124, 0.05185475, 0.023504255, -0.03705353, 0.005126411, -0.007541958, -0.040143006, 0.009869162, 0.0066888183, 0.024513887, -0.008344616, 0.0040259114, -0.020677282, 0.03160151, 0.006017

Syncronous Embedding Creator



In [6]:
embedding_creator =  EmbeddingCreator(openai_client)
embeded_text = embedding_creator.create_embedding_for_snomedctconcept(concept)
print(embeded_text)


[-0.011368467, 0.029763978, -0.023504255, 0.037336227, -0.03957761, -0.008107352, 0.0036700156, 0.02711874, 0.008531398, -0.023807144, 0.022575391, -0.05112781, 0.010853554, -0.018486379, -0.0065777586, 0.08064948, -0.076247476, 0.0151041085, 0.037921812, -0.030753419, -0.023100402, 0.024271576, -0.001195784, 0.0047250823, -0.013125228, 0.00061366754, 0.014064186, 0.015659407, -0.022575391, 0.0030718083, -0.024715815, -0.02841107, 0.03448906, 0.023988878, 0.028592804, 0.047493134, -0.0024067124, 0.05185475, 0.023504255, -0.03705353, 0.005126411, -0.007541958, -0.040143006, 0.009869162, 0.0066888183, 0.024513887, -0.008344616, 0.0040259114, -0.020677282, 0.03160151, 0.006017412, -0.051935516, 0.012327618, 0.0041546393, -0.0011459334, -0.018849846, 0.040526666, 0.124467544, 0.023988878, -0.008708084, 0.005547933, -0.0055580293, 0.011711742, 0.030490914, 0.013842068, -0.040607437, -0.021525374, 0.0015712412, -0.025261017, 0.011943958, -0.05641829, 0.0146396775, -0.05137012, 0.078549445, 0

Asyncronos Embedding Creator call

In [7]:
async_embedding_creator =  AsyncEmbeddingCreator(async_openai_client)
embeded_text = await async_embedding_creator.create_embedding_for_snomedctconcept(concept)
print(embeded_text)

[-0.011368467, 0.029763978, -0.023504255, 0.037336227, -0.03957761, -0.008107352, 0.0036700156, 0.02711874, 0.008531398, -0.023807144, 0.022575391, -0.05112781, 0.010853554, -0.018486379, -0.0065777586, 0.08064948, -0.076247476, 0.0151041085, 0.037921812, -0.030753419, -0.023100402, 0.024271576, -0.001195784, 0.0047250823, -0.013125228, 0.00061366754, 0.014064186, 0.015659407, -0.022575391, 0.0030718083, -0.024715815, -0.02841107, 0.03448906, 0.023988878, 0.028592804, 0.047493134, -0.0024067124, 0.05185475, 0.023504255, -0.03705353, 0.005126411, -0.007541958, -0.040143006, 0.009869162, 0.0066888183, 0.024513887, -0.008344616, 0.0040259114, -0.020677282, 0.03160151, 0.006017412, -0.051935516, 0.012327618, 0.0041546393, -0.0011459334, -0.018849846, 0.040526666, 0.124467544, 0.023988878, -0.008708084, 0.005547933, -0.0055580293, 0.011711742, 0.030490914, 0.013842068, -0.040607437, -0.021525374, 0.0015712412, -0.025261017, 0.011943958, -0.05641829, 0.0146396775, -0.05137012, 0.078549445, 0

Update SnomedCT Embedded Vector value


In [9]:
snomed_ct_repo = SnomedCTConceptRepo(snomedct_collection)
concept_result = await snomed_ct_repo.update_concept(concept, embeded_text)
if concept_result == 1:
    print(concept)
