In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

In [2]:
dataset, info = tfds.load('coco/2017', with_info=True)

# The dataset contains different splits like 'train', 'validation', 'test'
train_dataset = dataset['train']
val_dataset = dataset['validation']

# Inspect the dataset info




[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\ruben\tensorflow_datasets\coco\2017\1.1.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

KeyboardInterrupt: 

In [None]:
print(info)

In [None]:
# Set target class id (e.g., 1 for 'person')
target_class_id = 1

# Function to filter images containing the target class
def filter_class(instance):
    objects = instance['objects']
    return any(obj['label'] == target_class_id for obj in objects)

# Filter the dataset
filtered_train_ds = train_ds.filter(filter_class)

# Preprocess the dataset to create images and masks
def preprocess(instance):
    image = instance['image']  # The input image
    objects = instance['objects']  # Annotations for objects
    mask = np.zeros(image.shape[:2], dtype=np.uint8)  # Initialize mask with zeros

    # Set the mask pixel values corresponding to the target classId
    for obj in objects:
        if obj['label'] == target_class_id:
            # Convert bounding box to mask
            bbox = obj['bbox']  # Format: [ymin, xmin, ymax, xmax]
            mask[int(bbox[0]):int(bbox[2]), int(bbox[1]):int(bbox[3])] = target_class_id

    # Normalize the image (optional, depending on your model's needs)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    
    return image, mask

# Apply preprocessing to the dataset
train_ds = filtered_train_ds.map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)

# Batch and shuffle the dataset
train_ds = train_ds.shuffle(1000).batch(8).prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
from tensorflow.keras import layers, models

def unet(input_size=(256, 256, 3)):
    inputs = layers.Input(input_size)

    # Contracting path (Encoder)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c4)
    p4 = layers.MaxPooling2D((2, 2))(c4)

    # Bottleneck
    c5 = layers.Conv2D(1024, (3, 3), activation='relu', padding='same')(p4)
    c5 = layers.Conv2D(1024, (3, 3), activation='relu', padding='same')(c5)

    # Expanding path (Decoder)
    u6 = layers.Conv2DTranspose(512, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = layers.concatenate([u6, c4])
    c6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(u6)
    c6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c6)

    u7 = layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = layers.concatenate([u7, c3])
    c7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u7)
    c7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c7)

    u8 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c7)
    u8 = layers.concatenate([u8, c2])
    c8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u8)
    c8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c8)

    u9 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c8)
    u9 = layers.concatenate([u9, c1])
    c9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u9)
    c9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c9)

    # Output layer with softmax activation for segmentation
    output = layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

    model = models.Model(inputs=[inputs], outputs=[output])
    return model

# Instantiate the U-Net model
model = unet(input_size=(256, 256, 3))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Train the U-Net model
model.fit(train_ds, epochs=10)