In [5]:
import os
import numpy as np
import cv2
import nibabel as nib
import pydicom
import pandas as pd
import multiprocessing

# Function to resize a 2D image to the desired size with interpolation
def resize_image(image, target_size=(128, 128)):
    return cv2.resize(image, target_size, interpolation=cv2.INTER_LINEAR)

# Define default values for slice thickness and pixel spacing
default_slice_thickness = 1.0
default_pixel_spacing_x = 1.0
default_pixel_spacing_y = 1.0

# Create a 3D volume by dynamically sampling slices
def create_3d_volume_with_keyframes(images, target_size=(128, 128, 128), parquet_data=None):
    n_slices = len(images)
    
    keyframe_indices = np.linspace(0, n_slices - 1, target_size[2], dtype=int)
    
    selected_slices = []
    selected_filenames = []

    for idx in keyframe_indices:
        selected_slices.append(images[idx]["pixel_array"])
        selected_filenames.append(images[idx]["filename"])
    
    slice_thicknesses = []
    pixel_spacings = []
    for filename in selected_filenames:
        matching_row = parquet_data[parquet_data['path'] == filename]
        if not matching_row.empty:
            slice_thicknesses.append(matching_row['SliceThickness'].values[0])
            pixel_spacings.append([matching_row['PixelSpacingX'].values[0], matching_row['PixelSpacingY'].values[0]])
        else:
            slice_thicknesses.append(default_slice_thickness)
            pixel_spacings.append([default_pixel_spacing_x, default_pixel_spacing_y])
    
    resized_slices = [resize_image(slice_array, target_size[:2]) for slice_array in selected_slices]
    volume = np.stack(resized_slices, axis=-1)
    
    voxel_spacing = [
        pixel_spacings[0][0],
        pixel_spacings[0][1],
        slice_thicknesses[0]
    ]
    
    return volume, voxel_spacing, selected_filenames

# Function to load and sort DICOM files from a directory numerically
def load_and_sort_dicom_files(patient_dir):
    dicom_files = [os.path.join(root, file) for root, dirs, files in os.walk(patient_dir) for file in files if file.endswith('.dcm')]
    
    # Create a list to store DICOM images as dictionaries
    dicom_images = []
    
    for full_path in dicom_files:
        dicom_data = pydicom.dcmread(full_path)
        
        filename = f"{int(dicom_data.SeriesNumber)}_{int(dicom_data.InstanceNumber):04d}.nii.gz"
        pixel_array = dicom_data.pixel_array
        
        # Create a dictionary for each DICOM image
        dicom_image = {
            "filename": filename,
            "pixel_array": pixel_array,
        }
        
        dicom_images.append(dicom_image)
    
    # Sort the DICOM images by their slice positions
    dicom_images.sort(key=lambda x: x["filename"])
    
    return dicom_images

# Function to load Parquet metadata from a file
def load_parquet_metadata(parquet_file):
    return pd.read_parquet(parquet_file)

# Function to create the desired output directory structure
def create_output_directories(output_dir, patient_id, scan_id):
    patient_dir = os.path.join(output_dir, patient_id)
    scan_dir = os.path.join(patient_dir, scan_id)
    
    os.makedirs(scan_dir, exist_ok=True)
    
    return scan_dir

# Example usage for the input directory and Parquet file
input_dir = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images'
output_dir = '/kaggle/working/output_directory'
parquet_file = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_dicom_tags.parquet'

# Specify X, Y, and Z dimensions for the 3D volume
x_size, y_size, z_size = 128, 128, 128

# Load Parquet metadata
print("Loading Parquet metadata...")
parquet_metadata = load_parquet_metadata(parquet_file)

# Traverse the entire input directory
for patient_id in os.listdir(input_dir):
    patient_dir = os.path.join(input_dir, patient_id)
    
    if os.path.isdir(patient_dir):
        for scan_id in os.listdir(patient_dir):
            scan_dir = os.path.join(patient_dir, scan_id)
            
            if os.path.isdir(scan_dir):
                print(f"Processing patient {patient_id}, scan {scan_id}...")
                
                # List all DICOM files in the current scan directory
                dicom_images = load_and_sort_dicom_files(scan_dir)
                
                if len(dicom_images) > 0:
                    print("Creating 3D volume...")
                    output_volume, voxel_spacing, selected_filenames = create_3d_volume_with_keyframes(
                        dicom_images, (x_size, y_size, z_size), parquet_data=parquet_metadata)
                    
                    # Create the output directory for the current scan
                    output_scan_dir = create_output_directories(output_dir, patient_id, scan_id)
                    
                    print("Saving the 3D volume...")
                    nifti_image = nib.Nifti1Image(output_volume, np.diag(voxel_spacing + [1.0]))
                    output_file = os.path.join(output_scan_dir, 'ct_scan.nii.gz')
                    nib.save(nifti_image, output_file)
                    
                    print(f"Combined and resampled image saved as '{output_file}'.")
                else:
                    print("No DICOM files found in the current scan directory.")


Loading Parquet metadata...
Processing patient 26501, scan 22032...
Creating 3D volume...
Saving the 3D volume...
Combined and resampled image saved as '/kaggle/working/output_directory/26501/22032/ct_scan.nii.gz'.
Processing patient 26501, scan 31614...
Creating 3D volume...
Saving the 3D volume...
Combined and resampled image saved as '/kaggle/working/output_directory/26501/31614/ct_scan.nii.gz'.
Processing patient 32627, scan 51693...
Creating 3D volume...
Saving the 3D volume...
Combined and resampled image saved as '/kaggle/working/output_directory/32627/51693/ct_scan.nii.gz'.
Processing patient 44515, scan 12236...
Creating 3D volume...
Saving the 3D volume...
Combined and resampled image saved as '/kaggle/working/output_directory/44515/12236/ct_scan.nii.gz'.
Processing patient 44515, scan 2942...
Creating 3D volume...
Saving the 3D volume...
Combined and resampled image saved as '/kaggle/working/output_directory/44515/2942/ct_scan.nii.gz'.
Processing patient 33834, scan 17341...

KeyboardInterrupt: 

In [1]:
import os
import numpy as np
import pandas as pd
import nibabel as nib
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

# Define the path to the root directory containing CT scan data
data_root = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images'  # Update with the actual path

# Load the train.csv file
train_csv_path = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train.csv'  # Update with the actual path
train_df = pd.read_csv(train_csv_path)

# Function to load CT scans and labels
def load_ct_scans_and_labels(data_root, train_df):
    ct_scans = []
    labels = []

    for _, row in train_df.iterrows():
        patient_id = str(row['patient_id'])

        # Define the path to the patient's directory
        patient_dir = os.path.join(data_root, patient_id)

        # Check if the patient's directory exists
        if not os.path.exists(patient_dir):
            print(f"Patient directory not found for patient {patient_id}")
            continue

        # Walk through all subdirectories under the patient's directory
        for subdir, _, files in os.walk(patient_dir):
            for file in files:
                if file.endswith('ct_scan.nii.gz'):
                    ct_scan_path = os.path.join(subdir, file)

                    try:
                        # Load the CT scan using nibabel
                        ct_scan = nib.load(ct_scan_path).get_fdata()
                        ct_scans.append(ct_scan)

                        # Extract labels from the train.csv file
                        label_columns = ['bowel_healthy', 'bowel_injury', 'extravasation_healthy', 'extravasation_injury',
                                         'kidney_healthy', 'kidney_low', 'kidney_high', 'liver_healthy', 'liver_low',
                                         'liver_high', 'spleen_healthy', 'spleen_low', 'spleen_high', 'any_injury']
                        labels.append(row[label_columns].values)

                    except Exception as e:
                        print(f"Error loading CT scan for patient {patient_id}: {e}")

    # Duplicate labels for all scans of the same patient
    all_labels = []
    for _ in ct_scans:
        all_labels.extend(labels)

    return np.array(ct_scans), np.array(all_labels)

# Load CT scans and labels
ct_scans, labels = load_ct_scans_and_labels(data_root, train_df)

# Define input shape based on your CT scan dimensions (e.g., 128x128x128x1 for a single-channel scan)
input_shape = (128, 128, 128, 1)

# Define the number of classes for multi-label classification
num_classes = len(train_df.columns) - 2  # Subtracting 2 for 'patient_id' and 'scan_id' columns

# Define training parameters
epochs = 50  # You can adjust the number of training epochs
batch_size = 16  # You can adjust the batch size based on your system's capacity

# Configure early stopping callback
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',  # You can choose 'val_loss' or another metric to monitor
    patience=5,  # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True,  # Restore model weights from the epoch with the best value of the monitored metric
)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(ct_scans, labels, test_size=0.2, random_state=42)

# Define the CNN model
model = keras.Sequential([
    # Add 3D convolutional layers, pooling layers, and other layers as needed
    # Example:
    keras.layers.Conv3D(64, kernel_size=(3, 3, 3), activation='relu', input_shape=input_shape),
    keras.layers.MaxPooling3D(pool_size=(2, 2, 2)),
    # Add more layers as needed
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(num_classes, activation='sigmoid')  # Multi-label classification
])

# Compile the model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Train the model on GPU
with tf.device('/device:GPU:0'):
    history = model.fit(X_train, y_train,
                        validation_data=(X_val, y_val),
                        epochs=epochs,
                        batch_size=batch_size,
                        callbacks=[early_stopping])


# Evaluate the model
evaluation = model.evaluate(X_val, y_val)
print(f"Validation loss: {evaluation[0]}, Validation accuracy: {evaluation[1]}")

# Make predictions on new data
# predictions = model.predict(new_data)




KeyboardInterrupt: 