In [45]:
#Creating directories for image and mask
import os

base_dir = "segmentation_dataset"
os.makedirs(os.path.join(base_dir, "images"), exist_ok=True)
os.makedirs(os.path.join(base_dir, "masks"), exist_ok=True)

In [46]:
#Converting dataset to mask
import cv2
import os

input_folder = 'segmentation_dataset/images'
output_folder = 'segmentation_dataset/masks'
os.makedirs(output_folder, exist_ok=True)

for filename in os.listdir(input_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        img_path = os.path.join(input_folder, filename)
        img = cv2.imread(img_path)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(os.path.join(output_folder, "mask_"+filename), gray)


In [47]:
# Data Split
import os
import shutil
import random

def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def split_dataset(image_dir, mask_dir, output_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    assert train_ratio + val_ratio + test_ratio == 1.0, "Ratios must sum to 1"

    # Get image filenames
    images = sorted(os.listdir(image_dir))
    masks = sorted(os.listdir(mask_dir))

    # Ensure that images and masks match
    assert len(images) == len(masks), "Number of images and masks do not match."

    # Shuffle consistently
    data = list(zip(images, masks))
    random.shuffle(data)

    total = len(data)
    train_end = int(total * train_ratio)
    val_end = train_end + int(total * val_ratio)

    train_data = data[:train_end]
    val_data = data[train_end:val_end]
    test_data = data[val_end:]

    # Create folders
    for split in ['train', 'val', 'test']:
        create_dir(os.path.join(output_dir, split, 'images'))
        create_dir(os.path.join(output_dir, split, 'masks'))

    # Copy data
    def copy_files(split_data, split_name):
        for img, msk in split_data:
            shutil.copy(os.path.join(image_dir, img), os.path.join(output_dir, split_name, 'images', img))
            shutil.copy(os.path.join(mask_dir, msk), os.path.join(output_dir, split_name, 'masks', msk))

    copy_files(train_data, 'train')
    copy_files(val_data, 'val')
    copy_files(test_data, 'test')

    print(f"Dataset split complete.\nTrain: {len(train_data)}\nVal: {len(val_data)}\nTest: {len(test_data)}")

# Example usage
split_dataset(
    image_dir='segmentation_dataset/images',
    mask_dir='segmentation_dataset/masks',
    output_dir='segment_dataset',
    train_ratio=0.7,
    val_ratio=0.15,
    test_ratio=0.15
)


Dataset split complete.
Train: 0
Val: 0
Test: 0


In [48]:
# Preprocessing the image for VGG-Unet
import os
import numpy as np
import cv2

def preprocess_data(image_dir, mask_dir, image_size=(128, 128)):
    X, Y = [], []
    
    image_filenames = sorted(os.listdir(image_dir))
    
    for img_name in image_filenames:
        img_path = os.path.join(image_dir, img_name)
        mask_name = "mask_" + img_name  # assuming naming like mask_cat_1.jpg
        mask_path = os.path.join(mask_dir, mask_name)

        if not os.path.exists(mask_path):
            print(f"Mask not found for {img_name}")
            continue

        # Load and preprocess image
        image = cv2.imread(img_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if image is None or mask is None:
            print(f"Failed to load: {img_path} or {mask_path}")
            continue

        image = cv2.resize(image, image_size)
        mask = cv2.resize(mask, image_size)

        image = image / 255.0
        mask = np.expand_dims(mask / 255.0, axis=-1)  # make it (H, W, 1)

        X.append(image)
        Y.append(mask)

    return np.array(X), np.array(Y)


In [49]:
X_train, Y_train = preprocess_data('./segment_dataset/train/images', './segment_dataset/train/masks')
X_val, Y_val = preprocess_data('./segment_dataset/val/images', './segment_dataset/val/masks')

print(X_train.shape)  # (num_images, 128, 128, 3)
print(Y_train.shape)  # (num_images, 128, 128, 1)


(254, 128, 128, 3)
(254, 128, 128, 1)


In [None]:
#Building and Training DataSet#
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import tensorflow as tf

IMG_HEIGHT = 128
IMG_WIDTH = 128
IMG_CHANNELS = 3

# Load and preprocess dataset
def load_dataset(image_dir, mask_dir):
    images = []
    masks = []

    for img_name in os.listdir(image_dir):
        img_path = os.path.join(image_dir, img_name)
        mask_name = f"mask_{img_name}"
        mask_path = os.path.join(mask_dir, mask_name)

        if not os.path.exists(mask_path):
            print(f"Skipping {img_name}, mask {mask_name} not found.")
            continue

        img = load_img(img_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
        img = img_to_array(img) / 255.0
        images.append(img)

        mask = load_img(mask_path, target_size=(IMG_HEIGHT, IMG_WIDTH), color_mode="grayscale")
        mask = img_to_array(mask) / 255.0
        masks.append(mask)

    return np.array(images, dtype=np.float32), np.array(masks, dtype=np.float32)

# Load data
train_img_dir = "segment_dataset/train/images"
train_mask_dir = "segment_dataset/train/masks"
val_img_dir = "segment_dataset/val/images"
val_mask_dir = "segment_dataset/val/masks"

X_train, Y_train = load_dataset(train_img_dir, train_mask_dir)
X_val, Y_val = load_dataset(val_img_dir, val_mask_dir)

# Define VGG16 + U-Net architecture
def build_vgg16_unet(input_shape):
    vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

    # Encoder
    c1 = vgg16.get_layer("block1_conv2").output
    c2 = vgg16.get_layer("block2_conv2").output
    c3 = vgg16.get_layer("block3_conv3").output
    c4 = vgg16.get_layer("block4_conv3").output
    c5 = vgg16.get_layer("block5_conv3").output

    # Decoder
    u6 = Conv2DTranspose(512, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = concatenate([u6, c4])
    c6 = Conv2D(512, (3, 3), activation='relu', padding='same')(u6)

    u7 = Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = concatenate([u7, c3])
    c7 = Conv2D(256, (3, 3), activation='relu', padding='same')(u7)

    u8 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c7)
    u8 = concatenate([u8, c2])
    c8 = Conv2D(128, (3, 3), activation='relu', padding='same')(u8)

    u9 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c8)
    u9 = concatenate([u9, c1])
    c9 = Conv2D(64, (3, 3), activation='relu', padding='same')(u9)

    outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9)

    model = Model(inputs=vgg16.input, outputs=outputs)
    return model

# Build and compile model
model = build_vgg16_unet((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

# Train model
history = model.fit(
    X_train, Y_train,
    validation_data=(X_val, Y_val),
    epochs=25,
    batch_size=8
)


Epoch 1/25
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 2s/step - accuracy: 0.4876 - loss: 0.9408 - val_accuracy: 0.8095 - val_loss: 0.3010
Epoch 2/25
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 4s/step - accuracy: 0.8135 - loss: 0.2635 - val_accuracy: 0.8556 - val_loss: 0.1735
Epoch 3/25
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4s/step - accuracy: 0.8634 - loss: 0.1552 - val_accuracy: 0.8664 - val_loss: 0.1387
Epoch 4/25
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 4s/step - accuracy: 0.8673 - loss: 0.1327 - val_accuracy: 0.8715 - val_loss: 0.1227
Epoch 5/25
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 5s/step - accuracy: 0.8790 - loss: 0.1213 - val_accuracy: 0.8784 - val_loss: 0.1030
Epoch 6/25
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 4s/step - accuracy: 0.8789 - loss: 0.1017 - val_accuracy: 0.8823 - val_loss: 0.0912
Epoch 7/25
[1m32/32[0m [32m━━━━━

In [None]:
model.save("vgg16_unet.h5")

In [44]:
from PIL import Image
import matplotlib.pyplot as plt

# --- Save the model after training ---
model.save("vgg16_unet.h5")

# --- Load model (optional if you're in a new script) ---
from tensorflow.keras.models import load_model
model = load_model("vgg16_unet.h5")

#--load the test dataset---
IMG_HEIGHT = 256
IMG_WIDTH = 256
IMG_CHANNELS = 3 # VGG16 expects 3 channels (RGB)
TEST_IMAGE_DIR="segment_dataset/test/images"
#X_test = load_dataset(image_dir="segment_dataset/test/images",mask_dir=None)
#X_test = load_dataset(
#    image_dir="segment_dataset/test/images",
#    mask_dir=None,  # <--- This is the key: set mask_dir to None
#)

# Function to load and preprocess all test images
def load_test_images(directory, target_size=(128, 128)):
    test_images = []
    image_names = []

    for filename in os.listdir(directory):
        if filename.endswith((".jpg", ".png", ".jpeg")):
            img_path = os.path.join(directory, filename)
            img = load_img(img_path, target_size=target_size)
            img = img_to_array(img) / 255.0  # Normalize
            test_images.append(img)
            image_names.append(filename)
    
    return np.array(test_images), image_names

X_test, test_filenames = load_test_images(TEST_IMAGE_DIR)

# --- Predict on validation set or test set ---
preds = model.predict(X_test)
preds = (preds > 0.5).astype(np.uint8)  # Thresholding for binary mask

# --- Visualize predictions ---


#def visualize_prediction(index):
    #plt.figure(figsize=(12, 4))

#    plt.subplot(1, 3, 1)
#    plt.title("Input Image")
#    plt.imshow(X_test[index])
#    plt.axis('off')

#    plt.subplot(1, 3, 2)
 ##   plt.title("True Mask")
 #   plt.title("True Mask")
 #   plt.imshow(Y_test[index].squeeze(), cmap='gray')
 #   plt.axis('off')

  #  plt.subplot(1, 3, 3)
 #   plt.title("Predicted Mask")
 #   plt.imshow(preds[index].squeeze(), cmap='gray')
 #   plt.axis('off')

   # plt.tight_layout()
 #   plt.show()
    
output_dir="segment_dataset/test/masks"
image_ids = [f"id_{i:03d}" for i in range(len(preds))]

# Example: Show prediction for first 5 validation images
for i in range(len(preds)):
    mask_array = preds[i].squeeze() # Remove singleton dimension if (H,W,1) -> (H,W)
    mask_image = Image.fromarray(mask_array * 255) # Convert to 0-255 for saving as image

    # Construct filename for the predicted mask
    # You might want to use the original filename or a unique ID
    pred_filename = f"pred_mask_{image_ids[i]}.png"
    pred_filepath = os.path.join(output_dir, pred_filename)

    mask_image.save(pred_filepath)

    ##mask_filepaths.append(pred_filepath)
    ##original_image_filenames.append(original_filenames[i]) # Or image_ids[i]

    # Calculate metrics for each image and store (example)
    # current_iou = calculate_iou(preds[i], Y_test[i]) # Assuming you have this function
    # iou_scores.append(current_iou)

print("Masks saved.")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 448ms/step
Masks saved.
