In [None]:
# Install necessary libraries
!pip install pydicom SimpleITK matplotlib kaggle

# Set up Kaggle authentication
from google.colab import files
files.upload()  # Upload kaggle.json file

!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the RSNA dataset
!kaggle competitions download -c rsna-2024-lumbar-spine-degenerative-classification
!unzip -q rsna-2024-lumbar-spine-degenerative-classification.zip


Reading and Visualizing DICOM files

In [None]:
!pip install pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg


In [None]:
!pip install pydicom


In [None]:
import pydicom
import matplotlib.pyplot as plt
import os

# Set the path to the unzipped data folder
data_path = "/content/train_images"

# Recursively search for all files in subdirectories
dicom_files = []
for dirpath, _, filenames in os.walk(data_path):
    for f in filenames:
        dicom_files.append(os.path.join(dirpath, f))

# Check if files are found
print(f"Number of DICOM files found: {len(dicom_files)}")

# Try reading the first valid DICOM file
if dicom_files:
    for file in dicom_files:
        try:
            # Read the DICOM file
            dicom_image = pydicom.dcmread(file)

            # Visualize the DICOM image using grayscale
            plt.imshow(dicom_image.pixel_array, cmap=plt.cm.bone)
            plt.title(f"File: {file}")
            plt.show()

            # Stop after successfully loading one image
            break
        except Exception as e:
            print(f"Error reading file {file}: {e}")
else:
    print("No DICOM files found.")


Extracting Coordinates and Metadata

In [None]:
# Extract patient metadata and image properties from the loaded DICOM image

# Use .get() to avoid errors if the attribute doesn't exist
patient_id = dicom_image.PatientID
study_date = dicom_image.get("StudyDate", "Study Date not available")
modality = dicom_image.get("Modality", "Modality not available")
image_position = dicom_image.get("ImagePositionPatient", "Image Position not available")
image_orientation = dicom_image.get("ImageOrientationPatient", "Image Orientation not available")

# Print the metadata
print(f"Patient ID: {patient_id}")
print(f"Study Date: {study_date}")
print(f"Modality: {modality}")
print(f"Image Position (Patient): {image_position}")
print(f"Image Orientation (Patient): {image_orientation}")

# Access pixel spacing (spacing between image pixels)
pixel_spacing = dicom_image.get("PixelSpacing", "Pixel Spacing information not available")
print(f"Pixel Spacing: {pixel_spacing}")


Mapping Labels to Coordinates

In [None]:
import pandas as pd

# Load the CSV file that contains the labels
labels_df = pd.read_csv('/content/train.csv')  # Replace with the actual path to your CSV file

# Ensure both PatientID and study_id are in the same format (string)
labels_df['study_id'] = labels_df['study_id'].astype(str)
patient_id = str(dicom_image.PatientID)

# Attempt to find the corresponding label row in the DataFrame
label_row = labels_df.loc[labels_df['study_id'] == patient_id]

# Check if a matching row is found
if not label_row.empty:
    # Extract the spinal canal stenosis label for L1-L2
    stenosis_label = label_row['spinal_canal_stenosis_l1_l2'].values[0]
    print(f"Spinal Canal Stenosis L1-L2: {stenosis_label}")
else:
    print(f"No matching label found for Patient ID: {patient_id}")


Visualizing Labeled Areas

In [None]:
# Function to overlay labels on DICOM images
def overlay_label(image, label_text, position):
    plt.imshow(image, cmap=plt.cm.bone)
    plt.text(position[0], position[1], label_text, color="red", fontsize=12)
    plt.show()

# Example: Visualize spinal canal stenosis label on the MRI
image = dicom_image.pixel_array
overlay_label(image, f"Stenosis L1-L2: {stenosis_label}", (50, 50))


Iterating Over Multiple DICOM Files

In [None]:
import os
import pydicom
import matplotlib.pyplot as plt

# Function to load DICOM files, apply labels, and plot
def process_dicom_with_labels(study_id, series_id, instance_number, label_data):
    dicom_path = f'/content/train_images/{study_id}/{series_id}/{instance_number}.dcm'

    # Check if DICOM file exists
    if os.path.exists(dicom_path):
        # Load DICOM file
        dicom_image = pydicom.dcmread(dicom_path)

        # Visualize DICOM image with condition markings
        plt.imshow(dicom_image.pixel_array, cmap=plt.cm.bone)

        # Filter relevant labels for the current image
        current_labels = label_data[(label_data['study_id'] == study_id) &
                                    (label_data['series_id'] == series_id) &
                                    (label_data['instance_number'] == instance_number)]

        # Plot each label on the DICOM image
        for _, row in current_labels.iterrows():
            x, y = row['x'], row['y']
            condition = row['condition']
            plt.scatter(x, y, color='red', s=50)
            plt.text(x, y, condition, color='red', fontsize=12)

        # Display the image with annotations
        plt.title(f'DICOM Image for Study {study_id}, Series {series_id}')
        plt.show()
    else:
        print(f"File {dicom_path} not found.")

# Function to iterate over multiple studies and series
def process_multiple_dicoms(label_data, limit=10):
    # Check if the necessary columns exist in the dataset
    required_columns = ['study_id', 'series_id', 'instance_number']
    if not all(col in label_data.columns for col in required_columns):
        print("One or more required columns are missing. Please check the dataset.")
        return

    # Group the labels by study_id, series_id, and instance_number
    grouped = label_data.groupby(['study_id', 'series_id', 'instance_number'])

    # Iterate through each unique combination and process the corresponding DICOM file
    count = 0
    for (study_id, series_id, instance_number), _ in grouped:
        process_dicom_with_labels(study_id, series_id, instance_number, label_data)
        count += 1
        if count >= limit:  # Limit the number of processed files for testing
            break

# Call the function to process multiple DICOM files (limit to 10 for testing)
process_multiple_dicoms(labels_df, limit=10)


MODEL

Define the CNN Model

In [None]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_height, image_width, channels)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])


Compile the Model

In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


Train the Model

In [None]:
model.fit(train_images, train_labels, epochs=10, validation_data=(val_images, val_labels))


Applying Grad-CAM (Class Activation Mapping)

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Function to generate Grad-CAM heatmap
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_layer_output, predictions = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]

    # Compute the gradients of the output for the predicted class wrt to the conv layer
    grads = tape.gradient(class_channel, conv_layer_output)

    # Compute the guided gradients (pooled gradients)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # Multiply each channel in the feature map array by the "importance" of the channel
    conv_layer_output = conv_layer_output[0]
    heatmap = conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # Normalize the heatmap between 0 and 1 for better visualization
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# Example usage: Generating heatmap for a sample input
image_input = np.expand_dims(dicom.pixel_array, axis=0)  # Add batch dimension
last_conv_layer_name = "conv2d_2"  # Change this to the last convolutional layer in your model

heatmap = make_gradcam_heatmap(image_input, model, last_conv_layer_name)

# Display heatmap on top of original image
def display_gradcam(heatmap, image, alpha=0.4):
    # Rescale heatmap to image size
    heatmap = np.uint8(255 * heatmap)
    heatmap = np.resize(heatmap, (image.shape[0], image.shape[1]))

    # Convert to RGB format
    heatmap = plt.cm.jet(heatmap)[:, :, :3]
    superimposed_img = heatmap * alpha + image

    # Display the image with heatmap overlay
    plt.imshow(superimposed_img, cmap='bone')
    plt.show()

display_gradcam(heatmap, dicom.pixel_array)


Explainable AI (XAI) with Grad-CAM

In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np

# Assuming you have trained a CNN model
model = load_model('path_to_model.h5')

def grad_cam(model, img_array, layer_name="conv5_block3_out"):
    # Create gradient tape and get model predictions
    grad_model = tf.keras.models.Model([model.inputs], [model.get_layer(layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        loss = predictions[:, np.argmax(predictions)]

    grads = tape.gradient(loss, conv_outputs)
    grad_cam_output = np.mean(grads, axis=(0, 1, 2))

    # Overlay Grad-CAM on the original image
    heatmap = np.maximum(grad_cam_output, 0)
    heatmap /= np.max(heatmap)
    return heatmap

# Generate Grad-CAM heatmap for a sample image
image_input = np.expand_dims(dicom.pixel_array, axis=0)
heatmap = grad_cam(model, image_input)

# Display heatmap overlay
plt.imshow(dicom.pixel_array, cmap=plt.cm.bone)
plt.imshow(heatmap, cmap='jet', alpha=0.5)  # Overlay heatmap
plt.title("Grad-CAM Heatmap")
plt.show()


In [None]:
import pandas as pd

# Load the CSV file
labels_df = pd.read_csv('/content/train.csv')

# Display the column names
print(labels_df.columns)


In [None]:
import os
import pydicom
import pandas as pd
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV file containing labels
labels_df = pd.read_csv('/content/train.csv')

# Ensure both the patient_id and study_id in the labels file are strings
labels_df['study_id'] = labels_df['study_id'].astype(str)

# Define the root directory where DICOM files are stored
dicom_root_dir = '/content/train_images/'

# Step 2: Iterate through the DataFrame to dynamically load the DICOM files and annotate them
for _, row in labels_df.iterrows():
    study_id = row['study_id']

    # Assuming the DICOM file path is structured as "/content/train_images/{study_id}/image.dcm"
    dicom_folder = os.path.join(dicom_root_dir, study_id)

    # Find any DICOM files in the folder
    dicom_files = [f for f in os.listdir(dicom_folder) if f.endswith('.dcm')]

    # Iterate through DICOM files if they exist
    for dicom_file in dicom_files:
        dicom_path = os.path.join(dicom_folder, dicom_file)

        # Check if the DICOM file exists and load it
        if os.path.exists(dicom_path):
            dicom_image = pydicom.dcmread(dicom_path)

            # Extract patient ID from DICOM (optional, as we're using `study_id`)
            patient_id = dicom_image.PatientID

            # Annotate based on spinal canal stenosis
            stenosis_l1_l2 = row['spinal_canal_stenosis_l1_l2']
            stenosis_l2_l3 = row['spinal_canal_stenosis_l2_l3']

            # (More conditions can be extracted similarly...)

            # Step 3: Display or annotate the DICOM image (example of annotation)
            pixel_array = dicom_image.pixel_array
            normalized_image = cv2.normalize(pixel_array, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
            colored_image = cv2.cvtColor(normalized_image, cv2.COLOR_GRAY2BGR)

            # Example: Display condition on the image
            cv2.putText(colored_image, f'L1-L2 Stenosis: {stenosis_l1_l2}', (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
            cv2.putText(colored_image, f'L2-L3 Stenosis: {stenosis_l2_l3}', (10, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)

            # Step 4: Display the annotated image
            plt.imshow(colored_image, cmap='gray')
            plt.title(f"Annotated DICOM Image for Study ID: {study_id}")
            plt.axis('off')
            plt.show()

            # Optionally, save the annotated image
            output_path = f'annotated_dicom_image_{study_id}_{dicom_file}.png'
            cv2.imwrite(output_path, colored_image)
        else:
            print(f"DICOM file not found: {dicom_path}")


In [None]:
import os
import pydicom
import pandas as pd
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV files
labels_df = pd.read_csv('/content/train.csv')
series_descriptions_df = pd.read_csv('/content/train_series_descriptions.csv')
label_coordinates_df = pd.read_csv('/content/train_label_coordinates.csv')

# Ensure both the patient_id and study_id in the labels file are strings
label_coordinates_df['study_id'] = label_coordinates_df['study_id'].astype(str)

# Define the root directory where DICOM files are stored
dicom_root_dir = '/content/train_images/'

# Step 1: Iterate through the DataFrame to dynamically load the DICOM files and annotate them
for _, row in label_coordinates_df.iterrows():
    study_id = row['study_id']
    series_id = row['series_id']
    instance_number = row['instance_number']

    # Construct the DICOM file path
    dicom_folder = os.path.join(dicom_root_dir, study_id, str(series_id))
    dicom_file = f"{instance_number}.dcm"
    dicom_path = os.path.join(dicom_folder, dicom_file)

    # Check if the DICOM file exists
    if os.path.exists(dicom_path):
        # Load the DICOM file
        dicom_image = pydicom.dcmread(dicom_path)

        # Extract patient ID from DICOM (optional)
        patient_id = dicom_image.PatientID

        # Extract the coordinates and condition
        x, y = int(row['x']), int(row['y'])
        condition = row['condition']
        level = row['level']

        # Step 2: Annotate the DICOM image with bounding boxes and labels
        pixel_array = dicom_image.pixel_array
        normalized_image = cv2.normalize(pixel_array, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
        colored_image = cv2.cvtColor(normalized_image, cv2.COLOR_GRAY2BGR)

        # Draw a circle around the anomaly
        cv2.circle(colored_image, (x, y), 10, (0, 255, 0), 2)

        # Annotate the condition and level
        label_text = f"{condition} {level}"
        cv2.putText(colored_image, label_text, (x + 15, y), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

        # Step 3: Display the annotated image
        plt.imshow(colored_image, cmap='gray')
        plt.title(f"Annotated DICOM Image for Study ID: {study_id}")
        plt.axis('off')
        plt.show()

        # Optionally, save the annotated image
        output_path = f'annotated_dicom_image_{study_id}_{series_id}_{instance_number}.png'
        cv2.imwrite(output_path, colored_image)
    else:
        print(f"DICOM file not found: {dicom_path}")


Compare Coordinates with Image Dimensions

In [None]:
height, width = dicom_image.pixel_array.shape
if not (0 <= x < width and 0 <= y < height):
    print(f"Warning: Coordinates ({x}, {y}) are outside the image bounds for study {study_id}.")


Plot Coordinates on the Original Image

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Original DICOM image
axes[0].imshow(dicom_image.pixel_array, cmap='gray')
axes[0].set_title(f"Original Image for {study_id}")

# Annotated image with coordinates
axes[1].imshow(colored_image, cmap='gray')
axes[1].set_title(f"Annotated Image for {study_id}")

plt.show()


Validate Against Known Landmarks

In [None]:
# Example code to validate extracted coordinates
height, width = dicom_image.pixel_array.shape

# Check if coordinates are within bounds
if 0 <= x < width and 0 <= y < height:
    print(f"Coordinates ({x}, {y}) are valid within the image bounds.")
else:
    print(f"Warning: Coordinates ({x}, {y}) are out of bounds for image of size ({width}, {height}).")

# Optional: Draw a reference box around the center of the image for validation purposes
center_x, center_y = width // 2, height // 2
cv2.rectangle(colored_image, (center_x - 50, center_y - 50), (center_x + 50, center_y + 50), (255, 0, 0), 2)

# Display both the original and annotated images side by side for comparison
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

axes[0].imshow(dicom_image.pixel_array, cmap='gray')
axes[0].set_title("Original Image")

axes[1].imshow(colored_image, cmap='gray')
axes[1].set_title("Annotated Image with Validated Coordinates")

plt.show()


In [None]:
import os
import pydicom
import pandas as pd
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV files
labels_df = pd.read_csv('/content/train.csv')
series_descriptions_df = pd.read_csv('/content/train_series_descriptions.csv')
label_coordinates_df = pd.read_csv('/content/train_label_coordinates.csv')

# Ensure both the patient_id and study_id in the labels file are strings
label_coordinates_df['study_id'] = label_coordinates_df['study_id'].astype(str)

# Define the root directory where DICOM files are stored
dicom_root_dir = '/content/train_images/'

# Define the output directory for annotated images
output_dir = '/content/annotated_images/'

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Step 1: Iterate through the DataFrame to dynamically load the DICOM files and annotate them
for study_id in label_coordinates_df['study_id'].unique():
    study_data = label_coordinates_df[label_coordinates_df['study_id'] == study_id]

    for series_id, series_data in study_data.groupby('series_id'):
        for instance_number in series_data['instance_number'].unique():
            dicom_folder = os.path.join(dicom_root_dir, study_id, str(series_id))
            dicom_file = f"{instance_number}.dcm"
            dicom_path = os.path.join(dicom_folder, dicom_file)

            # Check if the DICOM file exists
            if os.path.exists(dicom_path):
                # Load the DICOM file
                dicom_image = pydicom.dcmread(dicom_path)

                # Extract the pixel data and normalize the image
                pixel_array = dicom_image.pixel_array
                normalized_image = cv2.normalize(pixel_array, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
                colored_image = cv2.cvtColor(normalized_image, cv2.COLOR_GRAY2BGR)

                # Step 2: Iterate through all the labels for this instance_number
                instance_data = series_data[series_data['instance_number'] == instance_number]
                for _, row in instance_data.iterrows():
                    # Extract coordinates and labels
                    x, y = int(row['x']), int(row['y'])
                    condition = row['condition']
                    level = row['level']

                    # Draw a circle around the anomaly
                    cv2.circle(colored_image, (x, y), 10, (0, 255, 0), 2)

                    # Annotate the condition and level
                    label_text = f"{condition} {level}"
                    cv2.putText(colored_image, label_text, (x + 15, y), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

                # Step 3: Display the annotated image
                plt.imshow(colored_image, cmap='gray')
                plt.title(f"Annotated DICOM Image for Study ID: {study_id}, Series ID: {series_id}, Instance: {instance_number}")
                plt.axis('off')
                plt.show()

                # Step 4: Save the annotated image in the separate folder
                output_path = os.path.join(output_dir, f'annotated_dicom_image_{study_id}_{series_id}_{instance_number}.png')
                cv2.imwrite(output_path, colored_image)
            else:
                print(f"DICOM file not found: {dicom_path}")


In [None]:
import os
import pydicom
import numpy as np
import pandas as pd
import cv2
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Step 1: Load and preprocess DICOM images
def load_and_preprocess_dicom(dicom_path, target_size=(224, 224)):
    # Load DICOM file
    dicom_image = pydicom.dcmread(dicom_path)
    pixel_array = dicom_image.pixel_array

    # Normalize the pixel values to 0-255
    normalized_image = cv2.normalize(pixel_array, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)

    # Convert grayscale to 3 channels (since ResNet expects 3-channel input)
    colored_image = cv2.cvtColor(normalized_image, cv2.COLOR_GRAY2RGB)

    # Resize the image to the target size
    resized_image = cv2.resize(colored_image, target_size)

    return resized_image

# Step 2: Load the CSV files containing labels
labels_df = pd.read_csv('/content/train.csv')  # Replace with actual path

# Prepare labels (you need to decide the task: classification, regression, etc.)
# For simplicity, let's assume a binary classification based on one of the conditions
labels_df['label'] = labels_df['spinal_canal_stenosis_l1_l2'].apply(lambda x: 1 if x != 'Normal/Mild' else 0)

# Step 3: Load ResNet50 Pre-trained model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Step 4: Add custom layers on top of the pre-trained ResNet50
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)  # Add a fully connected layer
predictions = Dense(1, activation='sigmoid')(x)  # Binary classification

# Step 5: Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Step 6: Freeze the layers of ResNet50 to keep the pre-trained weights
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Step 7: Create a generator to load images in batches
# Prepare image data generator
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)  # Normalizing images

def data_generator(df, batch_size, target_size):
    while True:
        for start in range(0, len(df), batch_size):
            end = min(start + batch_size, len(df))
            batch_df = df[start:end]

            images = []
            labels = []
            for _, row in batch_df.iterrows():
                dicom_path = f"/content/train_images/{row['study_id']}/{row['series_id']}/{row['instance_number']}.dcm"  # Adjust as necessary
                if os.path.exists(dicom_path):
                    image = load_and_preprocess_dicom(dicom_path, target_size)
                    images.append(image)
                    labels.append(row['label'])

            yield np.array(images), np.array(labels)

# Step 8: Train the model using the generator
batch_size = 16
train_gen = data_generator(labels_df, batch_size=batch_size, target_size=(224, 224))

# Train the model (adjust steps_per_epoch and validation_split as needed)
model.fit(train_gen, epochs=10, steps_per_epoch=len(labels_df) // batch_size)

# Step 9: Save the model
model.save("resnet50_finetuned_dicom.h5")


In [None]:
print(labels_df.columns)


In [None]:
import os
import pydicom
import pandas as pd
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV files
labels_df = pd.read_csv('/content/train.csv')
label_coordinates_df = pd.read_csv('/content/train_label_coordinates.csv')

# Ensure both the patient_id and study_id in the labels file are strings
label_coordinates_df['study_id'] = label_coordinates_df['study_id'].astype(str)

# Define the root directory where DICOM files are stored
dicom_root_dir = '/content/train_images/'

# Define the output directory for annotated images
output_dir = '/content/annotated_images/'

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Step 1: Iterate through the DataFrame to dynamically load the DICOM files and annotate them
for study_id in label_coordinates_df['study_id'].unique():
    study_data = label_coordinates_df[label_coordinates_df['study_id'] == study_id]

    for instance_number in study_data['instance_number'].unique():
        dicom_folder = os.path.join(dicom_root_dir, study_id)
        dicom_file = f"{instance_number}.dcm"
        dicom_path = os.path.join(dicom_folder, dicom_file)

        # Check if the DICOM file exists
        if os.path.exists(dicom_path):
            # Load the DICOM file
            dicom_image = pydicom.dcmread(dicom_path)

            # Extract the pixel data and normalize the image
            pixel_array = dicom_image.pixel_array
            normalized_image = cv2.normalize(pixel_array, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
            colored_image = cv2.cvtColor(normalized_image, cv2.COLOR_GRAY2BGR)

            # Step 2: Iterate through all the labels for this instance_number
            instance_data = study_data[study_data['instance_number'] == instance_number]
            for _, row in instance_data.iterrows():
                # Extract coordinates and labels
                x, y = int(row['x']), int(row['y'])
                condition = row['condition']
                level = row['level']

                # Draw a circle around the anomaly
                cv2.circle(colored_image, (x, y), 10, (0, 255, 0), 2)

                # Annotate the condition and level
                label_text = f"{condition} {level}"
                cv2.putText(colored_image, label_text, (x + 15, y), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

            # Step 3: Display the annotated image
            plt.imshow(colored_image, cmap='gray')
            plt.title(f"Annotated DICOM Image for Study ID: {study_id}, Instance: {instance_number}")
            plt.axis('off')
            plt.show()

            # Step 4: Save the annotated image in the separate folder
            output_path = os.path.join(output_dir, f'annotated_dicom_image_{study_id}_{instance_number}.png')
            cv2.imwrite(output_path, colored_image)
        else:
            print(f"DICOM file not found: {dicom_path}")


new attempt


In [None]:
import os
import pydicom
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

# Step 1: Load and preprocess DICOM images
def load_dicom(dicom_path):
    dicom_image = pydicom.dcmread(dicom_path)
    pixel_array = dicom_image.pixel_array
    return pixel_array

def preprocess_image(image, target_size=(224, 224)):
    # Normalize pixel values to [0, 1]
    image = image.astype(np.float32) / np.max(image)
    # Resize the image
    image = cv2.resize(image, target_size)
    # Convert grayscale to RGB
    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    return image

# Load CSV data
label_coordinates_df = pd.read_csv('/content/train_label_coordinates.csv')
labels_df = pd.read_csv('/content/train.csv')
series_descriptions_df = pd.read_csv('/content/train_series_descriptions.csv')

# Merge the data if necessary (using study_id)
merged_df = pd.merge(label_coordinates_df, labels_df, on='study_id')
merged_df = pd.merge(merged_df, series_descriptions_df, on=['study_id', 'series_id'])

# Prepare X (images) and y (labels)
X = []
y = []

# Step 2: Iterate through the DataFrame, load DICOM images, preprocess them, and prepare the labels
dicom_root_dir = '/content/train_images/'  # Define the root directory of DICOM files

for _, row in merged_df.iterrows():
    study_id = row['study_id']
    series_id = row['series_id']
    instance_number = row['instance_number']  # Assuming this exists in label_coordinates_df
    dicom_folder = os.path.join(dicom_root_dir, str(study_id), str(series_id))
    dicom_file = f"{instance_number}.dcm"
    dicom_path = os.path.join(dicom_folder, dicom_file)

    # Load and preprocess the DICOM image
    if os.path.exists(dicom_path):
        image = load_dicom(dicom_path)
        image = preprocess_image(image)
        X.append(image)

        # Assuming the label is binary (you can adjust based on your task)
        label = 1 if row['spinal_canal_stenosis_l1_l2'] != 'Normal/Mild' else 0
        y.append(label)

# Convert X and y to numpy arrays
X = np.array(X)
y = np.array(y)

# Step 3: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Load ResNet50 Pre-trained model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top of the pre-trained ResNet50
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)  # Add a fully connected layer
predictions = Dense(1, activation='sigmoid')(x)  # Binary classification

# Step 5: Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the ResNet50 layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Step 6: Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=16)

# Step 7: Save the model
model.save("resnet50_finetuned_dicom.h5")


In [None]:
!pip install pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg
!pip install pylibjpeg-rle



In [None]:
import os
import pydicom
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

# Step 1: Load and preprocess DICOM images
def load_dicom(dicom_path):
    dicom_image = pydicom.dcmread(dicom_path)
    pixel_array = dicom_image.pixel_array
    return pixel_array

def preprocess_image(image, target_size=(224, 224)):
    # Normalize pixel values to [0, 1]
    image = image.astype(np.float32) / np.max(image)
    # Resize the image
    image = cv2.resize(image, target_size)
    # Convert grayscale to RGB
    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    return image

# Load CSV data
label_coordinates_df = pd.read_csv('/content/train_label_coordinates.csv')
labels_df = pd.read_csv('/content/train.csv')
series_descriptions_df = pd.read_csv('/content/train_series_descriptions.csv')

# Merge the data if necessary (using study_id)
merged_df = pd.merge(label_coordinates_df, labels_df, on='study_id')
merged_df = pd.merge(merged_df, series_descriptions_df, on=['study_id', 'series_id'])

# Step 2: Sample a subset of the data
sample_size = 100  # Adjust this to use the desired number of samples
subset_df = merged_df.sample(n=sample_size, random_state=42)  # Randomly select n samples

# Prepare X (images) and y (labels)
X = []
y = []

# Step 3: Iterate through the sampled DataFrame, load DICOM images, preprocess them, and prepare the labels
dicom_root_dir = '/content/train_images/'  # Define the root directory of DICOM files

for _, row in subset_df.iterrows():
    study_id = row['study_id']
    series_id = row['series_id']
    instance_number = row['instance_number']  # Assuming this exists in label_coordinates_df
    dicom_folder = os.path.join(dicom_root_dir, str(study_id), str(series_id))
    dicom_file = f"{instance_number}.dcm"
    dicom_path = os.path.join(dicom_folder, dicom_file)

    # Load and preprocess the DICOM image
    if os.path.exists(dicom_path):
        image = load_dicom(dicom_path)
        image = preprocess_image(image)
        X.append(image)

        # Assuming the label is binary (you can adjust based on your task)
        label = 1 if row['spinal_canal_stenosis_l1_l2'] != 'Normal/Mild' else 0
        y.append(label)

# Convert X and y to numpy arrays
X = np.array(X)
y = np.array(y)

# Step 4: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Load ResNet50 Pre-trained model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top of the pre-trained ResNet50
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)  # Add a fully connected layer
predictions = Dense(1, activation='sigmoid')(x)  # Binary classification

# Step 6: Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the ResNet50 layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Step 7: Train the model with the subset of data
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=16)

# Step 8: Save the model
model.save("resnet50_finetuned_dicom.h5")
