- Convert dicom to jpg

In [1]:
%%time

import os
from tqdm import tqdm
import pydicom
import numpy as np
from PIL import Image

# Define the parent directory containing subdirectories with DICOM files
parent_directory = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images'
jpg_output_directory = '/kaggle/working/converted_test_images'

# Create output directory if it doesn't exist
os.makedirs(jpg_output_directory, exist_ok=True)

# Iterate through all subdirectories
for subdir, _, files in os.walk(parent_directory):
    for dicom_file in tqdm(files):
        dicom_path = os.path.join(subdir, dicom_file)
        if os.path.isfile(dicom_path) and dicom_file.endswith('.dcm'):
            # Read the DICOM file
            dicom_data = pydicom.dcmread(dicom_path)
            
            # Convert the DICOM pixel array to a PIL Image
            dicom_image = Image.fromarray(
                np.uint8(dicom_data.pixel_array / np.max(dicom_data.pixel_array) * 255)
            )
            
            # Save the image as a JPEG file
            relative_path = os.path.relpath(subdir, parent_directory)
            output_subdir = os.path.join(jpg_output_directory, relative_path)
            os.makedirs(output_subdir, exist_ok=True)
            jpg_filename = os.path.splitext(dicom_file)[0] + '.jpg'
            jpg_path = os.path.join(output_subdir, jpg_filename)
            dicom_image.save(jpg_path)

0it [00:00, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 25/25 [00:01<00:00, 24.27it/s]
100%|██████████| 47/47 [00:00<00:00, 95.02it/s]
100%|██████████| 25/25 [00:00<00:00, 40.72it/s]

CPU times: user 1.7 s, sys: 143 ms, total: 1.84 s
Wall time: 2.88 s





- Image classification using ResNet50

In [2]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load the ResNet50 model pre-trained on ImageNet
model = ResNet50(weights='imagenet')

def preprocess_image(image_file_path, target_size=(224, 224)):
    """ Preprocess image for ResNet50 input.
    
    Parameters:
    -----------
    image_file_path : str
        Path to image file
    target_size : tuple
        Target size for the image (default is 224x224 for ResNet50)

    Returns:
    --------
    numpy array : Preprocessed image
    """
    try:
        # Load and preprocess the image
        image = load_img(image_file_path, target_size=target_size)
        image_array = img_to_array(image)
        image_array = np.expand_dims(image_array, axis=0)
        image_array = preprocess_input(image_array)
        return image_array
    except Exception as e:
        print(f"Error processing {image_file_path}: {str(e)}")
        return None

def classify_image_file(image_file_path):
    """ Classify image file using ResNet50.
    
    Parameters:
    -----------
    image_file_path : str
        Path to image file

    Returns:
    --------
    tuple : (predicted_class, confidence_score)
    """
    try:
        # Preprocess image
        image_array = preprocess_image(image_file_path)
        if image_array is None:
            return None, None
        # Run the model
        predictions = model.predict(image_array)
        # Decode predictions
        decoded_predictions = decode_predictions(predictions, top=1)[0][0]
        predicted_class = decoded_predictions[1]
        confidence_score = decoded_predictions[2]
        return predicted_class, confidence_score
    except Exception as e:
        print(f"Error classifying {image_file_path}: {str(e)}")
        return None, None

def process_images_in_directory(directory_path, limit=None):
    """ Process all images in a directory and its subdirectories.
    
    Parameters:
    -----------
    directory_path : str
        Path to the root directory containing image files
    limit : int, optional
        Maximum number of files to process (default is None, meaning no limit)

    Returns:
    --------
    list : List of (file_path, predicted_class, confidence_score) tuples
    """
    results = []
    count = 0
    for root, _, files in os.walk(directory_path):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                file_path = os.path.join(root, file)
                predicted_class, confidence_score = classify_image_file(file_path)
                if predicted_class is not None:
                    results.append((file_path, predicted_class, confidence_score))
                    print(f"Processed: {file_path} - Class: {predicted_class} - Confidence: {confidence_score:.3f}")
                count += 1
                if limit and count >= limit:
                    return results
    return results

# Example usage
if __name__ == "__main__":
    # Path to the root directory containing images
    root_directory = '/kaggle/working/converted_test_images'
    # Process images
    results = process_images_in_directory(root_directory, limit=500)

    # Print results
    for file_path, predicted_class, confidence_score in results:
        print(f"File: {file_path}, Class: {predicted_class}, Confidence: {confidence_score:.3f}")


# Save results to a CSV file
    if results:
        # Convert results to a DataFrame
        df_results = pd.DataFrame(results, columns=['file_path', 'predicted_class', 'confidence_score'])
        # Save DataFrame to a CSV file
        output_csv_path = 'image_test_rsna_2024_results.csv'
        df_results.to_csv(output_csv_path, index=False)
        print(f"Results saved to {output_csv_path}")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5
[1m102967424/102967424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json
[1m35363/35363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Processed: /kaggle/working/converted_test_images/44036939/3844393089/25.jpg - Class: fountain - Confidence: 0.646
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Processed: /kaggle/working/converted_test_images/44036939/3844393089/14.jpg - Class: trilobite - Confidence: 0.234
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Processed: /kaggle/working/converted_test_images/44036939/3844393089/2.jpg - Class: mask - Confidence: 0.164
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━