In [None]:
import os
import numpy as np
import pandas as pd
import pydicom

**This notebook looks for VOI LUT Sequences, VOI LUT Functions and Transfer Syntaxes in the SIIM-Covid19 train dataset.**

- It took about 20 min to run on GPU

In [None]:
# Load the data
base_path = "/kaggle/input/siim-covid19-detection/"
studies_df = pd.read_csv(os.path.join(base_path,"train_study_level.csv"))
images_df = pd.read_csv(os.path.join(base_path,"train_image_level.csv"))

# Strip the extra text from the study and image IDs
studies_df['id'] = studies_df['id'].map(lambda x: x.rstrip('_study'))
images_df['id'] = images_df['id'].map(lambda x: x.rstrip('_image'))

# Merge the dfs together
data_df = pd.merge(images_df, studies_df, how='inner', left_on='StudyInstanceUID', right_on='id')
data_df.drop(['id_y'], axis=1, inplace=True)

In [None]:
# This function finds the first image in a StudyInstanceUID directory and returns its path
def get_image_by_study_id(study_id):
    study_path = base_path + "train/" + study_id + "/"
    for subdir, dirs, files in os.walk(study_path):
        for file in files:     
            image = os.path.join(subdir, file)
            if os.path.isfile(image):
                return image
    return "none"

In [None]:
# Loop through the images and check how many images are not Explicit VR Little Endian transfer syntax.
# Also count images with VOI LUT Sequence tags present

count = 0
images_with_voi_lut = 0
images_with_voi_lut_function = 0
images_not_explicit_le = 0
deflated_syntaxes = []
voi_lut_functions = []

for index, row in data_df.iterrows():
    img_file = get_image_by_study_id(row['StudyInstanceUID'])
    img = pydicom.dcmread(img_file)
    
    # Check for a LUT sqequence tag
    if (0x0028,0x3010) in img:
        images_with_voi_lut += 1
    
    # Check for a LUT Function tag
    if (0x0028,0x1056) in img:
        images_with_voi_lut_function += 1
        
        if img(0x0028,0x1056) not in voi_lut_functions:
            voi_lut_functions.append(img(0x0028,0x1056))
            
    # Check the transfer syntax
    if img.file_meta.TransferSyntaxUID != "1.2.840.10008.1.2.1":
        images_not_explicit_le += 1
        
        if img.file_meta.TransferSyntaxUID not in deflated_syntaxes:
            deflated_syntaxes.append(img.file_meta.TransferSyntaxUID)
        
    count += 1
    
print("Done checking " + str(count) + " images")
print("Found " + str(images_with_voi_lut) + " images with VOI LUT")
print("Found " + str(images_with_voi_lut_function) + " images with VOI LUT Function")
print("VOI LUT Functions:")
print(voi_lut_functions)
print("Found " + str(images_not_explicit_le) + " images that are not Explicit VR LE")
print("Non Explicit VR LE Transfer Syntaxes")
print(deflated_syntaxes)