<a href="https://colab.research.google.com/github/shrprabh/BraTS-PolypSegmentation/blob/main/polysegmentationofbradtsdataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install nibabel for handling NIfTI files
!pip install nibabel

# Import libraries
import os
import numpy as np
import nibabel as nib
import cv2
import matplotlib.pyplot as plt
from glob import glob
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Conv2DTranspose,
                                     BatchNormalization, Activation, Concatenate)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


Collecting nibabel
  Downloading nibabel-5.3.2-py3-none-any.whl.metadata (9.1 kB)
Downloading nibabel-5.3.2-py3-none-any.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nibabel
Successfully installed nibabel-5.3.2


In [2]:
from google.colab import files
files.upload()


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"shrprabh","key":"323ee59f3e2f0c11df48be77bdb30712"}'}

In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d awsaf49/brats2020-training-data


Dataset URL: https://www.kaggle.com/datasets/awsaf49/brats2020-training-data
License(s): CC0-1.0
Downloading brats2020-training-data.zip to /content
100% 6.75G/6.76G [00:30<00:00, 264MB/s]
100% 6.76G/6.76G [00:31<00:00, 234MB/s]


In [4]:
!kaggle datasets download -d awsaf49/brats2020-training-data
!unzip -qq brats2020-training-data.zip -d brats2020


Dataset URL: https://www.kaggle.com/datasets/awsaf49/brats2020-training-data
License(s): CC0-1.0
brats2020-training-data.zip: Skipping, found more recently modified local copy (use --force to force download)


#Step 3: Data Preprocessing
#Load the images and masks, extract slices, and prepare them for training.#

In [5]:
import os
import h5py
import numpy as np
import cv2
from sklearn.model_selection import train_test_split

# Define your data directory
data_dir = 'brats2020/BraTS2020_training_data/content/data/'

# Validate the data directory
if not os.path.exists(data_dir):
    raise FileNotFoundError(f"Data directory not found: {data_dir}")

# Initialize lists to store image and mask slices
image_slices = []
mask_slices = []

# Process only .h5 files
h5_files = [file_name for file_name in os.listdir(data_dir) if file_name.endswith('.h5')]

if not h5_files:
    raise FileNotFoundError(f"No .h5 files found in directory: {data_dir}")

# Limit to 200 images
processed_images = 0
max_images = 200

# Loop over each .h5 file
for file_name in h5_files:
    if processed_images >= max_images:
        break

    file_path = os.path.join(data_dir, file_name)
    print(f"Processing file: {file_name}")

    # Open the .h5 file and load the data
    with h5py.File(file_path, 'r') as f:
        # Validate expected keys in the .h5 file
        if 'image' not in f or 'mask' not in f:
            print(f"Skipping file {file_name}: missing 'image' or 'mask' keys")
            continue

        img_slice = np.array(f['image'])
        mask_slice = np.array(f['mask'])

        # Normalize image slice (if needed)
        if img_slice.max() != img_slice.min():
            img_slice = (img_slice - img_slice.min()) / (img_slice.max() - img_slice.min())
        else:
            img_slice = img_slice / 255.0  # Fallback normalization

        # Resize image and mask to 256x256 for DUCKNet input
        img_resized = cv2.resize(img_slice, (256, 256))
        mask_resized = cv2.resize(mask_slice, (256, 256), interpolation=cv2.INTER_NEAREST)

        # Expand dimensions to add channel axis (for CNN input)
        img_resized = np.expand_dims(img_resized, axis=-1)
        mask_resized = np.expand_dims(mask_resized, axis=-1)

        # Append to lists
        image_slices.append(img_resized)
        mask_slices.append(mask_resized)
        processed_images += 1

# Convert lists to NumPy arrays
X = np.array(image_slices)
y = np.array(mask_slices)

# Split data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data loading complete.")
print(f"Total processed images: {processed_images}")
print(f"Training data shape: {X_train.shape}, Validation data shape: {X_val.shape}")


Processing file: volume_62_slice_131.h5
Processing file: volume_233_slice_82.h5
Processing file: volume_305_slice_42.h5
Processing file: volume_8_slice_126.h5
Processing file: volume_212_slice_53.h5
Processing file: volume_229_slice_109.h5
Processing file: volume_66_slice_14.h5
Processing file: volume_44_slice_20.h5
Processing file: volume_202_slice_52.h5
Processing file: volume_21_slice_148.h5
Processing file: volume_78_slice_117.h5
Processing file: volume_184_slice_92.h5
Processing file: volume_321_slice_63.h5
Processing file: volume_225_slice_14.h5
Processing file: volume_160_slice_86.h5
Processing file: volume_62_slice_34.h5
Processing file: volume_50_slice_36.h5
Processing file: volume_163_slice_142.h5
Processing file: volume_14_slice_63.h5
Processing file: volume_248_slice_133.h5
Processing file: volume_125_slice_134.h5
Processing file: volume_192_slice_29.h5
Processing file: volume_129_slice_39.h5
Processing file: volume_122_slice_52.h5
Processing file: volume_360_slice_38.h5
Pr

In [6]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, BatchNormalization, Activation, Concatenate
from tensorflow.keras.models import Model

def conv_block(x, filters):
    x = Conv2D(filters, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(filters, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    return x

def DuckNet(input_shape=(256, 256, 1)):
    inputs = Input(input_shape)

    # Encoder
    c1 = conv_block(inputs, 64)
    p1 = MaxPooling2D((2, 2))(c1)

    c2 = conv_block(p1, 128)
    p2 = MaxPooling2D((2, 2))(c2)

    c3 = conv_block(p2, 256)
    p3 = MaxPooling2D((2, 2))(c3)

    c4 = conv_block(p3, 512)
    p4 = MaxPooling2D((2, 2))(c4)

    # Bottleneck
    bn = conv_block(p4, 1024)

    # Decoder
    u1 = Conv2DTranspose(512, (2, 2), strides=(2, 2), padding='same')(bn)
    u1 = Concatenate()([u1, c4])
    c5 = conv_block(u1, 512)

    u2 = Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c5)
    u2 = Concatenate()([u2, c3])
    c6 = conv_block(u2, 256)

    u3 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c6)
    u3 = Concatenate()([u3, c2])
    c7 = conv_block(u3, 128)

    u4 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c7)
    u4 = Concatenate()([u4, c1])
    c8 = conv_block(u4, 64)

    outputs = Conv2D(1, (1 ,1), activation='sigmoid')(c8)

    model = Model(inputs=[inputs], outputs=[outputs])

    return model

# Create the model
model = DuckNet(input_shape=(256 ,256 ,1))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 256, 256, 1)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 256, 256, 64)         640       ['input_1[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, 256, 256, 64)         256       ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 activation (Activation)     (None, 256, 256, 64)         0         ['batch_normalization[0][0

In [18]:
print(f"X_val shape: {X_val.shape}")
print(f"y_val shape: {y_val.shape}")


X_val shape: (40, 256, 256, 1)
y_val shape: (40, 256, 256, 1)


In [19]:
# If X_val has shape (num_samples, 256, 256, 4, 1)
if X_val.ndim == 5:
    X_val = X_val.reshape(-1, 256, 256, X_val.shape[3])  # Result: (num_samples, 256, 256, 4)
    print(f"X_val shape after reshaping: {X_val.shape}")

# Since your model expects input with one channel, select one channel
if X_val.shape[-1] == 4:
    X_val = X_val[..., 0]  # Select the first channel; shape: (num_samples, 256, 256)
    print(f"X_val shape after selecting first channel: {X_val.shape}")

# Expand dimensions to add the channel axis back
X_val = np.expand_dims(X_val, axis=-1)  # Shape: (num_samples, 256, 256, 1)
print(f"X_val shape after expanding dims: {X_val.shape}")


X_val shape after expanding dims: (40, 256, 256, 1, 1)


In [20]:
# If y_val has shape (num_samples, 256, 256, num_channels, 1)
if y_val.ndim == 5:
    y_val = y_val.reshape(-1, 256, 256, y_val.shape[3])  # Result: (num_samples, 256, 256, num_channels)
    print(f"y_val shape after reshaping: {y_val.shape}")

# Combine mask channels into one (for binary segmentation)
if y_val.shape[-1] > 1:
    y_val = np.sum(y_val, axis=-1)  # Sum over channels; shape: (num_samples, 256, 256)
    print(f"y_val shape after summing channels: {y_val.shape}")

    # Ensure mask is binary
    y_val[y_val > 0] = 1

    # Expand dimensions to add the channel axis
    y_val = np.expand_dims(y_val, axis=-1)  # Shape: (num_samples, 256, 256, 1)
    print(f"y_val shape after expanding dims: {y_val.shape}")
else:
    # If y_val already has one channel
    y_val = np.squeeze(y_val, axis=-1)  # Remove last singleton dimension
    print(f"y_val shape after squeezing: {y_val.shape}")
    y_val = np.expand_dims(y_val, axis=-1)
    print(f"y_val shape after expanding dims: {y_val.shape}")


y_val shape after squeezing: (40, 256, 256)
y_val shape after expanding dims: (40, 256, 256, 1)


In [22]:
print(f"X_val dtype: {X_val.dtype}")
print(f"y_val dtype: {y_val.dtype}")


X_val dtype: float64
y_val dtype: uint64


In [23]:
history = model.fit(
    augmented_X_train,
    augmented_y_train,
    batch_size=batch_size,
    epochs=4,
    validation_data=(X_val, y_val),
)


Epoch 1/4

ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2066, in test_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2049, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2037, in run_step  **
        outputs = model.test_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1917, in test_step
        y_pred = self(x, training=False)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 280, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'model_2' (type Functional).
    
    Input 0 of layer "conv2d_21" is incompatible with the layer: expected axis -1 of input shape to have value 1, but received input with shape (8, 256, 256, 4)
    
    Call arguments received by layer 'model_2' (type Functional):
      • inputs=tf.Tensor(shape=(8, 256, 256, 4, 1), dtype=float32)
      • training=False
      • mask=None
