In [None]:
# Import Libraries
import os
import cv2
import numpy as np
import random
import shutil
from tqdm import tqdm # Progress Bar.
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Define paths for jaundice and normal images
jaundice_path = "/content/drive/MyDrive/Project 5: Jaundice Tracker/jaundice_dataset/jaundice/"
normal_path = "/content/drive/MyDrive/Project 5: Jaundice Tracker/jaundice_dataset/normal/"
output_dir = "/content/drive/MyDrive/Project 5: Jaundice Tracker/jaundice_dataset/jaundice_preprocessed/"
jaundice_images = os.listdir(jaundice_path)
target_size = (224, 224) # Resizing images

# Create output directories
for split in["train", "val", "test"]:
  for category in["jaundice", "normal"]:
    os.makedirs(os.path.join(output_dir, split, category), exist_ok=True)

# Define dataset split ratios
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1

# Preprocessing Function
def preprocess_image(image_path, target_size=(224,224), augment=False):

  # Load the image
  image = cv2.imread(image_path)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Converting BGR to RGB

  if augment:
    # Apply random rotation (-10 to 10 degrees)
    angle = random.uniform(-10, 10)
    h, w = image.shape[:2]
    M = cv2.getRotationMatrix2D((w/2, h/2), angle, 1)
    image = cv2.warpAffine(image, M, (w, h))

    # Apply horizontal flipping with 50 % probability
    if random.random() > 0.5:
      image = cv2.flip(image, 1)

    # Apply brightness adjustment (random factor between 0.9 and 1.1)
    factor = random.uniform(0.9, 1.1)
    image = np.clip(image * factor, 0, 255).astype(np.uint8)


  # Convert to YCrCb, LAB, and HSV Color Spaces
  ycrcb = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)
  lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
  hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)

  # Extract Cr (red chrominance), B (blue-yellow), and H (hue) channels
  cr_channel = ycrcb[:,:, 2]
  b_channel = lab[:,:,2]
  h_channel = hsv[:,:,0]

  # Apply CLAHE for contrast enahncement
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
  cr_channel = clahe.apply(cr_channel)
  b_channel = clahe.apply(b_channel)

  # Apply Otsu's Thresholding to Cr channel
  _, cr_thresh = cv2.threshold(cr_channel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

  # Resize channels
  image = cv2.resize(image, target_size).astype(np.float32)
  cr_channel = cv2.resize(cr_channel, target_size).astype(np.float32)
  b_channel = cv2.resize(b_channel, target_size).astype(np.float32)
  h_channel = cv2.resize(h_channel, target_size).astype(np.float32)
  cr_thresh = cv2.resize(cr_thresh, target_size).astype(np.float32)

  # Standardize(mean subtraction) channels
  epsilon = 1e-8
  cr_channel = (cr_channel - np.mean(cr_channel)) / (np.std(cr_channel) + epsilon)
  b_channel = (b_channel - np.mean(b_channel)) / (np.std(b_channel) + epsilon)
  h_channel = (h_channel - np.mean(h_channel)) / (np.std(h_channel) + epsilon)

  # Normalize binary mask (Cr theshold)
  cr_thresh = cr_thresh / 255.0

  # Normalize image
  image = image / 255.0

  # Expand dimensions to match shape (224, 224, 1)
  cr_channel = np.expand_dims(cr_channel, axis=-1)
  b_channel = np.expand_dims(b_channel, axis=-1)
  h_channel = np.expand_dims(h_channel, axis=-1)
  cr_thresh = np.expand_dims(cr_thresh, axis=-1)

  # Concatenate channels RGB + extra channels
  processed_image = np.concatenate([image, cr_channel, b_channel, h_channel, cr_thresh], axis=-1)


  return image

def split_and_preprocess(category, category_path):
  image_paths = [os.path.join(category_path, img) for img in os.listdir(category_path)]
  random.shuffle(image_paths)

  train_idx = int(len(image_paths) * train_ratio)
  val_idx = int(len(image_paths) * (train_ratio + val_ratio))

  split_dict = {
      "train": image_paths[:train_idx],
      "val": image_paths[train_idx:val_idx],
      "test": image_paths[val_idx:]
  }

  for split, paths in split_dict.items():
    print(f"Processing {split} split for {category} ({len(paths)} images)...")
    for img_path in tqdm(paths):
      processed_image = preprocess_image(img_path, augment=(split == "train"))

      if processed_image is not None:
        save_path = os.path.join(output_dir, split, category, os.path.basename(img_path))
        processed_image_bgr = cv2.cvtColor((processed_image * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR)
        cv2.imwrite(save_path,processed_image_bgr)

# Process jaundice and normal images
split_and_preprocess("jaundice", jaundice_path)
split_and_preprocess("normal", normal_path)

print("All images processed and saved!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Processing train split for jaundice (160 images)...


100%|██████████| 160/160 [00:20<00:00,  7.73it/s]


Processing val split for jaundice (20 images)...


100%|██████████| 20/20 [00:01<00:00, 16.36it/s]


Processing test split for jaundice (20 images)...


100%|██████████| 20/20 [00:01<00:00, 15.50it/s]


Processing train split for normal (448 images)...


100%|██████████| 448/448 [01:00<00:00,  7.41it/s]


Processing val split for normal (56 images)...


100%|██████████| 56/56 [00:03<00:00, 16.79it/s]


Processing test split for normal (56 images)...


100%|██████████| 56/56 [00:04<00:00, 12.84it/s]

All images processed and saved!





In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Sample Image from Dataset
sample_image_path = os.path.join(jaundice_path, jaundice_images[9])

# Load Original Image
original_image = cv2.imread(sample_image_path)
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

# Process one image
processed_image = preprocess_image(sample_image_path)

# Ensure correct scaling for grayscale
cr_channel = processed_image[:, :, 3]
b_channel = processed_image[:, :, 4]
h_channel = processed_image[:, :, 5]
cr_thresh = processed_image[:, :, 6]
processed_image_display = (processed_image[:,:,3] * 255.0).astype(np.uint8)


# Plot Original, Cr channel, B channel, H channel, and Processed Image
plt.figure(figsize=(18,10))

plt.subplot(2,3,1)
plt.imshow(original_image)
plt.title("Original Image")
plt.axis("off")

plt.subplot(2,3,2)
plt.imshow(cr_channel, cmap="gray")
plt.title("Cr Channel (Red Chrominance)")
plt.axis("off")

plt.subplot(2,3,3)
plt.imshow(b_channel, cmap="gray")
plt.title("B Channel (Blue-Yellow)")
plt.axis("off")

plt.subplot(2,3,4)
plt.imshow(h_channel, cmap="gray")
plt.title("Hue Channel")
plt.axis("off")

plt.subplot(2,3,5)
plt.imshow(cr_thresh, cmap="gray")
plt.title("Cr Channel (Threshold)")
plt.axis("off")

plt.show()

plt.figure(figsize=(18,5))
plt.imshow(processed_image_display)
plt.title("Processed Image")
plt.axis("off")
plt.show()

IndexError: index 3 is out of bounds for axis 2 with size 3