In [1]:
import os
import pydicom
import numpy as np
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
import cv2
from ipywidgets import interact, IntSlider, FloatSlider, fixed, Button, HBox, VBox, Output

# Function to load DICOM files from a directory
def load_dicom_images_from_folder(folder_path):
    dicom_files = [pydicom.dcmread(os.path.join(folder_path, f)) for f in sorted(os.listdir(folder_path)) if f.endswith(".dcm")]
    images = np.stack([f.pixel_array for f in dicom_files], axis=0)
    return images

# Function to display a single image with adjustable size
def show_image(slice_index, zoom, images):
    plt.figure(figsize=(zoom * 6, zoom * 6))  # Adjust the size of the figure based on zoom
    plt.imshow(images[slice_index], cmap="gray")
    plt.axis('off')
    plt.show()

ModuleNotFoundError: No module named 'pydicom'

In [76]:
import pylidc as pl

# ann = pl.query(pl.Annotation).first()
# print(ann.scan.patient_id)

for i in range(5):
    ann = pl.query(pl.Annotation).filter(pl.Annotation.malignancy == i)
    print(f"Malignancy {i +1}: {ann.count()}")

Malignancy 1: 0
Malignancy 2: 1020
Malignancy 3: 1580
Malignancy 4: 2606
Malignancy 5: 962


In [80]:
ann = pl.query(pl.Annotation)\
        .filter(pl.Annotation.malignancy == 1).first()

print(ann.malignancy, ann.Malignancy)
# print(ann.margin, ann.Margin)

1 Highly Unlikely
5 Sharp


In [26]:

# Data Loading function to parse XML annotations
def parse_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    malignancy_scores = []
    for nodule in root.findall('.//readingSession/uncertainNodule'):
        malignancy = nodule.find('malignancy').text
        malignancy_scores.append(int(malignancy))

    return np.mean(malignancy_scores) if malignancy_scores else None

def load_data(data_dir, annotation_dir):
    data = []

    for patient_id in os.listdir(data_dir):
        patient_path = os.path.join(data_dir, patient_id)
        for nodule_id in os.listdir(patient_path):
            nodule_path = os.path.join(patient_path, nodule_id)
            images_path = os.path.join(nodule_path, 'images')
            mask_paths = [os.path.join(nodule_path, f'mask-{i}') for i in range(4)]  # Handle multiple masks

            # Parse corresponding annotation XML
            xml_file = os.path.join(annotation_dir, f"{patient_id}_{nodule_id}.xml")
            label = parse_annotation(xml_file)

            try:
                image_slices = [cv2.imread(os.path.join(images_path, img), cv2.IMREAD_GRAYSCALE) for img in sorted(os.listdir(images_path))]
                if not image_slices:
                    continue

                mask_slices = []
                for mask_path in mask_paths:
                    if os.path.exists(mask_path):
                        mask_slices += [cv2.imread(os.path.join(mask_path, img), cv2.IMREAD_GRAYSCALE) for img in sorted(os.listdir(mask_path))]

                if not mask_slices:
                    continue

                image_slices = [cv2.resize(slice, (256, 256)) for slice in image_slices]
                mask_slices = [cv2.resize(slice, (256, 256)) for slice in mask_slices]

                # Combine masks (e.g., averaging)
                combined_mask = np.mean(mask_slices, axis=0)

                # Label assignment
                if label is not None:
                    data.append((np.array(image_slices), combined_mask, label))
            except Exception as e:
                print(f"Error processing {nodule_id}: {e}")

    return data

In [62]:
# DEBUGGING XML PARSER FUNC

xml_file = "/Users/newuser/Documents/ITU/master_thesis/data/lung_data/LIDC-XML-only/tcia-lidc-xml/188/000.xml"
tree = ET.parse(xml_file)
# Define the XML namespace and assign a prefix
namespace = {'nih': 'http://www.nih.gov'}

# # Parse the XML file
tree = ET.parse(xml_file)
root = tree.getroot()
print(f"Root: {root.tag}")

print("Second level:")
for i in root:
    print(i.tag)

print("Third level:")
for i in root[1]:
    print(i.tag)

# there is only a single RseponseHeader
response_header = root.find("nih:ResponseHeader", namespace)

reading_sessions = root.findall("nih:readingSession", namespace)

# for r in reading_sessions:
#     annotation_version = r.find('nih:annotationVersion', namespace).text
#     print(annotation_version)



# for child in root:
#     print(child.tag, child.attrib)

# maglinancy_scores = []

# for nodule in root.findall('.//nih:readingSession/nih:unblindedReadNodule', namespace):
#     characteristics = nodule.find('nih:characteristics', namespace)
#     print(characteristics)
#     break


# # Extract data from the ResponseHeader section
# response_header = root.find('nih:ResponseHeader', namespace)
# if response_header is not None:
#     version = response_header.find('nih:Version', namespace).text
#     message_id = response_header.find('nih:MessageId', namespace).text
#     series_instance_uid = response_header.find('nih:SeriesInstanceUid', namespace).text
#     study_instance_uid = response_header.find('nih:StudyInstanceUID', namespace).text

#     print(f"Version: {version}")
#     print(f"Message ID: {message_id}")
#     print(f"Series Instance UID: {series_instance_uid}")
#     print(f"Study Instance UID: {study_instance_uid}")

# # Iterate over each readingSession -> unblindedReadNodule
# for reading_session in root.findall('nih:readingSession', namespace):
#     annotation_version = reading_session.find('nih:annotationVersion', namespace).text
#     servicing_radiologist_id = reading_session.find('nih:servicingRadiologistID', namespace).text
#     print(f"Annotation Version: {annotation_version}")
#     print(f"Servicing Radiologist ID: {servicing_radiologist_id}")

#     for nodule in reading_session.findall('nih:unblindedReadNodule', namespace):
#         nodule_id = nodule.find('nih:noduleID', namespace).text
#         print(f"Nodule ID: {nodule_id}")

#         for roi in nodule.findall('nih:roi', namespace):
#             image_z_position = roi.find('nih:imageZposition', namespace).text
#             image_sop_uid = roi.find('nih:imageSOP_UID', namespace).text
#             inclusion = roi.find('nih:inclusion', namespace).text
#             print(f"Image Z Position: {image_z_position}")
#             print(f"Image SOP UID: {image_sop_uid}")
#             print(f"Inclusion: {inclusion}")

#             for edge_map in roi.findall('nih:edgeMap', namespace):
#                 x_coord = edge_map.find('nih:xCoord', namespace).text
#                 y_coord = edge_map.find('nih:yCoord', namespace).text
#                 print(f"Edge Map X Coord: {x_coord}, Y Coord: {y_coord}")

#         print("-------------------------")

Root: {http://www.nih.gov}LidcReadMessage
Second level:
{http://www.nih.gov}ResponseHeader
{http://www.nih.gov}readingSession
{http://www.nih.gov}readingSession
{http://www.nih.gov}readingSession
{http://www.nih.gov}readingSession
Third level:
{http://www.nih.gov}annotationVersion
{http://www.nih.gov}servicingRadiologistID
{http://www.nih.gov}unblindedReadNodule
{http://www.nih.gov}unblindedReadNodule
{http://www.nih.gov}unblindedReadNodule
{http://www.nih.gov}unblindedReadNodule
{http://www.nih.gov}nonNodule
{http://www.nih.gov}nonNodule
{http://www.nih.gov}nonNodule
{http://www.nih.gov}nonNodule
{http://www.nih.gov}nonNodule


# OLD CODE:

In [None]:
from glob import glob
annotation_dir_path = "/Users/newuser/Documents/ITU/master_thesis/data/lung_data/LIDC-XML-only/tcia-lidc-xml/188/"

xml_files = sorted(glob(f"{annotation_dir_path}/*.xml"))

for f in xml_files:
    print(parse_annotation(f))

# parse_annotation("/Users/newuser/Documents/ITU/master_thesis/data/lung_data/LIDC-XML-only/tcia-lidc-xml/188/000.xml")

In [None]:
data_dir = "/Users/newuser/Documents/ITU/master_thesis/data/lung_data/manifest-1725363397135/LIDC-IDRI/"
annotation_dir = "/kaggle/input/annotaion/tcia-lidc-xml/188"  # Update with correct path
data = load_data(data_dir, annotation_dir)

In [9]:
# Path to the directory containing DICOM files
folder_path = '/Users/newuser/Documents/ITU/master_thesis/data/lung_data/manifest-1725363397135/LIDC-IDRI/LIDC-IDRI-0001/01-01-2000-NA-NA-30178/3000566.000000-NA-03192/'

# Load the images
images = load_dicom_images_from_folder(folder_path)

# Create sliders for slice index and zoom
slice_slider = IntSlider(min=0, max=images.shape[0]-1, step=1, value=0, description='Slice:')
zoom_slider = FloatSlider(min=0.5, max=3.0, step=0.1, value=1.0, description='Zoom:')

# Create the interactive widget
interact(show_image, slice_index=slice_slider, zoom=zoom_slider, images=fixed(images));

interactive(children=(IntSlider(value=0, description='Slice:', max=132), FloatSlider(value=1.0, description='Z…

In [3]:
import os
import pydicom
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact, IntSlider, FloatSlider, fixed, Button, HBox, VBox, Output

# Function to load DICOM files from a directory
def load_dicom_images_from_folder(folder_path):
    dicom_files = [pydicom.dcmread(os.path.join(folder_path, f)) for f in sorted(os.listdir(folder_path)) if f.endswith(".dcm")]
    images = np.stack([f.pixel_array for f in dicom_files], axis=0)
    return images

# Function to display a single image with adjustable size
def show_image(slice_index, zoom, images):
    plt.figure(figsize=(zoom * 6, zoom * 6))  # Adjust the size of the figure based on zoom
    plt.imshow(images[slice_index], cmap="gray")
    plt.axis('off')
    plt.show()

# Function to update the image based on the slider and zoom
def update_image(slice_index, zoom, images):
    show_image(slice_index, zoom, images)

# Function to handle forward button click
def on_forward_button_clicked(b):
    global current_slice
    if current_slice < images.shape[0] - 1:
        current_slice += 1
        slice_slider.value = current_slice  # Update the slider value
        update_display(current_slice, zoom_slider.value, images)  # Update the display

# Function to handle backward button click
def on_backward_button_clicked(b):
    global current_slice
    if current_slice > 0:
        current_slice -= 1
        slice_slider.value = current_slice  # Update the slider value
        update_display(current_slice, zoom_slider.value, images)  # Update the display

# Path to the directory containing DICOM files
folder_path = '/Users/newuser/Documents/ITU/master_thesis/data/lung_data/manifest-1725363397135/LIDC-IDRI/LIDC-IDRI-0001/01-01-2000-NA-NA-30178/3000566.000000-NA-03192/'

# Load the images
images = load_dicom_images_from_folder(folder_path)

# Initialize the current slice index
current_slice = 0

# Create sliders for slice index and zoom
slice_slider = IntSlider(min=0, max=images.shape[0]-1, step=1, value=current_slice, description='Slice:')
zoom_slider = FloatSlider(min=0.5, max=3.0, step=0.1, value=1.0, description='Zoom:')

# Create buttons for forward and backward navigation
forward_button = Button(description="Forward")
backward_button = Button(description="Backward")

# Attach button click handlers
forward_button.on_click(on_forward_button_clicked)
backward_button.on_click(on_backward_button_clicked)

# Create output widget to display the images
out = Output()

# Update the image display function to use the output widget
def update_display(slice_index, zoom, images):
    with out:
        out.clear_output(wait=True)
        show_image(slice_index, zoom, images)

# Link the widgets to the display function
interact(update_display, slice_index=slice_slider, zoom=zoom_slider, images=fixed(images));

# Display the buttons
VBox([HBox([backward_button, forward_button]),out])

interactive(children=(IntSlider(value=0, description='Slice:', max=132), FloatSlider(value=1.0, description='Z…

VBox(children=(HBox(children=(Button(description='Backward', style=ButtonStyle()), Button(description='Forward…