# Xception CNN Model for Knee Pathology Detection
This notebook demonstrates how to create and train an Xception CNN model to detect knee pathologies using DICOM files.

In [4]:
# Import Required Libraries
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import Xception
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import KFold

In [5]:
# Define Paths
data_dir = 'output_folder'
#image_size = (368, 640)
batch_size = 32
num_folds = 5

In [None]:
import os
import pydicom
import cv2
import numpy as np
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm  # For progress bar

# Updated preprocess_dicom_files function to handle variable image shapes
def preprocess_dicom_files(data_dir, batch_size=100):
    files_list = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.dcm'):
                files_list.append(os.path.join(root, file))

    num_files = len(files_list)
    images = []  # Use a list to store images dynamically
    labels = []

    for i in tqdm(range(0, num_files, batch_size), desc="Processing DICOM Files in Batches"):
        batch_files = files_list[i:i+batch_size]
        for file_path in batch_files:
            dicom = pydicom.dcmread(file_path)
            image = dicom.pixel_array.astype(np.float32)  # Convert to float32
            image = image / 255.0  # Normalize pixel values
            images.append(image)  # Append image to the list

            # Extract label from file naming convention
            label = int(os.path.basename(file_path).split('_')[0][4:])
            labels.append(label)

    # Keep images as a list to handle variable shapes
    labels = np.array(labels, dtype=np.int32)

    num_classes = len(np.unique(labels))
    print(f"Number of classes detected: {num_classes}")

    return images, labels, num_classes

# Generate data and visualize progress
images, labels, num_classes = preprocess_dicom_files(data_dir)

# Show dataset shape
print(f"Total images: {len(images)}, Image shapes: {[img.shape for img in images]}")

# Data Generator with tqdm for batch iteration
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(validation_split=0.2)
data_generator = datagen.flow(
    images,
    labels,
    batch_size=batch_size,
    shuffle=True
)

print("Data generator is ready. Starting training now!")

Processing DICOM Files in Batches:  65%|██████▍   | 296/458 [03:58<02:55,  1.08s/it]

In [None]:
import os
import pydicom
import numpy as np
import pandas as pd
from tqdm import tqdm  # Progress bar

# Define the path to the output_folder
output_folder = 'output_folder'
annotations = pd.read_csv('annotation.csv')  # Load your annotations CSV file

# Define the path for saving the .npy file
segmentation_data_file = 'segmentation_data.npy'

# Check if the .npy file already exists
if os.path.exists(segmentation_data_file):
    # Load the segmentation data from the .npy file
    segmentation_data = np.load(segmentation_data_file, allow_pickle=True)
    print(f"✅ Loaded segmentation data from {segmentation_data_file}")
else:
    # Prepare dataset
    segmentation_data = []
    files_list = [f for f in os.listdir(output_folder) if f.endswith('.dcm')]

    # Using tqdm to track progress
    for filename in tqdm(files_list, desc="Processing DICOM Segmentation"):
        filepath = os.path.join(output_folder, filename)

        # Read the DICOM file
        dicom_file = pydicom.dcmread(filepath)
        pixel_array = dicom_file.pixel_array
        height, width = pixel_array.shape

        # Create an empty mask
        mask = np.zeros((height, width), dtype=np.uint8)

        # Extract file ID and slice number
        try:
            file_id = filename.split('_')[0]
            slice_number = int(filename.split('_')[1].split('.')[0])  # Ensure slice_number is an integer

            # Get corresponding annotations
            file_annotations = annotations[
                (annotations['file'] == file_id) & (annotations['slice'] == slice_number)
            ]

            # Draw bounding boxes on the mask
            for _, row in file_annotations.iterrows():
                x, y, box_width, box_height = (
                    int(row['x']),  # Ensure x is an integer
                    int(row['y']),  # Ensure y is an integer
                    int(row['width']),  # Ensure width is an integer
                    int(row['height'])  # Ensure height is an integer
                )
                mask[y:y + box_height, x:x + box_width] = 1  # Mark region as foreground
        except Exception as e:
            print(f"Error processing {filename}: {e}")

        # Append to segmentation data
        segmentation_data.append({
            'image': pixel_array,
            'mask': mask
        })

    # Save the segmentation data to a .npy file
    np.save(segmentation_data_file, segmentation_data)
    print(f"✅ Saved segmentation data to {segmentation_data_file}")

# Print summary
print(f"✅ Total segmentation samples: {len(segmentation_data)}")

In [None]:
# Apply Segmentation Masks During Model Training with Cross-Validation
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
fold_no = 1

for train_index, val_index in kf.split(segmentation_data):
    print(f'Training on fold {fold_no}...')
    
    # Split the segmentation data into training and validation sets
    train_data = [segmentation_data[i] for i in train_index]
    val_data = [segmentation_data[i] for i in val_index]
    
    X_train = np.array([data['image'] for data in train_data]).reshape(-1, image_size[0], image_size[1], 1)
    y_train = np.array([data['mask'] for data in train_data]).reshape(-1, image_size[0], image_size[1], 1)
    X_val = np.array([data['image'] for data in val_data]).reshape(-1, image_size[0], image_size[1], 1)
    y_val = np.array([data['mask'] for data in val_data]).reshape(-1, image_size[0], image_size[1], 1)
    
    # Load Pre-trained Xception Model
    base_model = Xception(weights='imagenet', include_top=False, input_shape=(image_size[0], image_size[1], 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    
    # Freeze Base Model Layers
    for layer in base_model.layers:
        layer.trainable = False
    
    # Compile the Model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    # Train the Model
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=batch_size
    )
    
    print(f'Fold {fold_no} completed.')
    fold_no += 1

# Save the final model
model.save('xception_knee_segmentation_model.h5')