In [None]:
import pydicom
print(pydicom.__version__)

In [None]:
from snowflake.snowpark import Session

# fetch snowflake connection information
from config import connection_parameters

# build connection to Snowflake
session = Session.builder.configs(connection_parameters).create()

# find local pydicom location with : pip show pydicom
session.add_import("[insert pydicom location here]")  
# session.add_import("pydicom")
# test if we have a connection
session.sql("select current_warehouse() wh, current_database() db, current_schema() schema, current_version() ver").show()

In [None]:
session.sql('ls @dicom.public.dicom_st;').show()

In [None]:
# create a table to store the results
session.sql('create or replace table dicom (filename string, meta variant)').show()

# Single Dicom file (CT_small.dcm)

In [None]:
# The attributes we want to extract
dicomAttrs = {
"PerformingPhysicianName" : "",
"PatientName": "",
"PatientBirthDate": "",
"Manufacturer": "",
"PatientID": "",
"PatientSex": "",
"PatientWeight": "",
"PatientPosition": "",
"StudyID": "",
"PhotometricInterpretation": "",
"RequestedProcedureID": "",
"ProtocolName": "",
"ImagingFrequency": "",
"StudyDate": "",
"StudyTime": "",
"ContentDate": "",
"ContentTime": "",
"InstanceCreationDate": "",
"SpecificCharacterSet": "",
"StudyDescription": "",
"ReferringPhysicianName": "",
"ImageType": "",
"ImplementationVersionName": "",
"TransferSyntaxUID": ""
}

# Single Dicom UDF

In [None]:
# Create the stage to store the Python UDF
session.sql('create stage if not exists dicom_udf_st directory = (enable = true); ').show()

session.add_import("@dicom.public.dicom_st/CT_small.dcm") 

def get_import_dir() -> str:
    import sys
    IMPORT_DIRECTORY_NAME = "snowflake_import_directory"
    return sys._xoptions[IMPORT_DIRECTORY_NAME] 

def pydicom_attrs() -> str:
  import json
  from pydicom import dcmread
  import_dir = get_import_dir() 
  fpath = import_dir + 'CT_small.dcm'
  ds = dcmread(fpath)
  
  for elName in dicomAttrs.keys():
    if ds.get_item(elName):
        elem = ds[elName]
        dicomAttrs.update({elName: str(elem.value)})

  return json.dumps(dicomAttrs)


# define as UDF
extrUDF = session.udf.register(pydicom_attrs, name="pydicom_attrs", is_permanent=True, stage_location="dicom_udf_st", replace=True)

print("UDF '"+str(extrUDF.name)+"' created")

In [None]:
session.sql('INSERT INTO DICOM SELECT \'CT_small.dcm\', parse_json(pydicom_attrs())').show()
session.table('DICOM').show()

# Multiple files, dynamic

In [None]:
# dynamic- for production this should be an SPROC

dcmFiles = session.sql('select relative_path as FPATH from directory(@dicom.public.dicom_st) where relative_path ilike \'%.dcm\'')

for row in dcmFiles.to_local_iterator():
    fName = row['FPATH']
    
    #dynamically create UDF
    session.clear_imports() 
    # find local pydicom location with : pip show pydicom
    session.add_import("[insert pydicom location here]")  
    session.add_import("@dicom.public.dicom_st/"+fName)
    
    def pydicom_attrs_tmp() -> str:
      import json
      from pydicom import dcmread
      import_dir = get_import_dir() 
      fpath = import_dir + fName
      ds = dcmread(fpath, force=True)
  
      for elName in dicomAttrs.keys():
        if ds.get_item(elName):
            elem = ds[elName]
            dicomAttrs.update({elName: str(elem.value)})

      return json.dumps(dicomAttrs)

    pyditmp = session.udf.register(pydicom_attrs_tmp)
    
    session.sql('INSERT INTO DICOM SELECT \''+fName+'\',parse_json('+pyditmp.name+'())').collect()
    print (fName + ' ... OK')
    
print('done')

session.table('DICOM').show()

In [None]:
# create a view to visualize the results in columns
session.sql('create or replace view dicom_vw as select filename, meta:ContentDate::String as ContentDate,meta:ContentTime::String as ContentTime,meta:ImageType::String as ImageType,meta:ImagingFrequency::String as ImagingFrequency,meta:ImplementationVersionName::String as ImplementationVersionName,meta:InstanceCreationDate::String as InstanceCreationDate,meta:Manufacturer::String as Manufacturer,meta:PatientBirthDate::String as PatientDOB,meta:PatientID::String as PatientId,meta:PatientName::String as PatientName,meta:PatientPosition::String as PatientPosition,meta:PatientSex::String as PatientSex,meta:PatientWeight::String as PatientWeight,meta:PerformingPhysicianName::String as PerformingPhysicianName,meta:PhotometricInterpretation::String as PhotometricInterpretation,meta:ProtocolName::String as ProtocolName,meta:ReferringPhysicianName::String as ReferringPhysicianName,meta:RequestedProcedureID::String as RequestedProcedureID, meta:SpecificCharacterSet::String as SpecificCharacterSet, meta:StudyDate::String as StudyDate, meta:StudyDescription::String as StudyDescription, meta:StudyID::String as StudyID, meta:StudyTime::String as StudyTime, meta:TransferSyntaxUID::String as TransferSyntaxUID from DICOM').show()

df = session.table("DICOM_VW")
df.select('filename', 'imagetype', 'manufacturer', 'patientsex', 'PHOTOMETRICINTERPRETATION').toPandas()
