In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
import os
from PIL import Image

In [None]:
# Download the Oxford Pets dataset (images and masks)
import tensorflow_datasets as tfds

dataset_name = "oxford_iiit_pet:3.*.*"
dataset, info = tfds.load(dataset_name, with_info=True, split=["train", "test"], as_supervised=True)

print("Dataset Information:")
print(info)

# Visualize a sample image and its mask
def display_sample(dataset):
    for image, mask in dataset.take(1):
        plt.figure(figsize=(8, 4))
        plt.subplot(1, 2, 1)
        plt.imshow(image)
        plt.title("Image")

        plt.subplot(1, 2, 2)
        plt.imshow(mask)
        plt.title("Mask")
        plt.show()

# Display a sample from the training dataset
display_sample(dataset[0])

In [None]:
# Resize and normalize the images and masks
IMG_SIZE = (128, 128)

def preprocess(image, mask):
    image = tf.image.resize(image, IMG_SIZE) / 255.0  # Normalize image to [0, 1]
    mask = tf.image.resize(mask, IMG_SIZE)
    mask = tf.cast(mask, tf.uint8) - 1  # Ensure mask classes start from 0
    return image, mask

# Preprocess train and test datasets
train_dataset = dataset[0].map(preprocess).batch(32)
test_dataset = dataset[1].map(preprocess).batch(32)

In [None]:
# Define the U-Net model for semantic segmentation
def unet_model(input_shape=(128, 128, 3), num_classes=3):
    inputs = layers.Input(shape=input_shape)

    # Encoder
    c1 = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(inputs)
    c1 = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(p1)
    c2 = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    # Bottleneck
    b = layers.Conv2D(256, (3, 3), activation="relu", padding="same")(p2)
    b = layers.Conv2D(256, (3, 3), activation="relu", padding="same")(b)

    # Decoder
    u1 = layers.UpSampling2D((2, 2))(b)
    u1 = layers.concatenate([u1, c2])
    c3 = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(u1)
    c3 = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(c3)

    u2 = layers.UpSampling2D((2, 2))(c3)
    u2 = layers.concatenate([u2, c1])
    c4 = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(u2)
    c4 = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(c4)

    outputs = layers.Conv2D(num_classes, (1, 1), activation="softmax")(c4)

    return Model(inputs, outputs)

# Initialize the U-Net model
model = unet_model()
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()

In [None]:
# Train the U-Net model
history = model.fit(train_dataset, validation_data=test_dataset, epochs=10)


In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(test_dataset)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

# Function to display predictions
def display_predictions(dataset, model):
    for image, mask in dataset.take(1):
        pred_mask = model.predict(tf.expand_dims(image[0], axis=0))
        pred_mask = tf.argmax(pred_mask, axis=-1)[0]

        plt.figure(figsize=(12, 4))
        plt.subplot(1, 3, 1)
        plt.imshow(image[0])
        plt.title("Original Image")

        plt.subplot(1, 3, 2)
        plt.imshow(mask[0])
        plt.title("Ground Truth Mask")

        plt.subplot(1, 3, 3)
        plt.imshow(pred_mask)
        plt.title("Predicted Mask")

        plt.show()

# Visualize predictions
display_predictions(test_dataset, model)