In [1]:
import tensorflow as tf


print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  1


In [2]:
import tensorflow as tf


physical_devices = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(physical_devices[0], 'GPU')


In [3]:
import tensorflow as tf


physical_devices = tf.config.list_physical_devices('GPU')
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)


In [5]:
import tensorflow as tf
import cv2
import os
import numpy as np
from tensorflow.keras import layers, models


print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


physical_devices = tf.config.list_physical_devices('GPU')
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)


class_map = {
    "aeroplane": 1,
    "bicycle": 2,
    "bird": 3,
    "boat": 4,
    "bottle": 5,
    "bus": 6,
    "car": 7,
    "cat": 8,
    "chair": 9,
    "cow": 10,
    "diningtable": 11,
    "dog": 12,
    "horse": 13,
    "motorbike": 14,
    "person": 15,
    "pottedplant": 16,
    "sheep": 17,
    "sofa": 18,
    "train": 19,
    "tvmonitor": 20
}

def create_model(input_shape):
    inputs = layers.Input(shape=input_shape)

    #Encoder
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D((2, 2))(x)

    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)

    #Bottleneck
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)

    #Decoder
    x = layers.Conv2DTranspose(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)

    x = layers.Conv2DTranspose(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)

    
    outputs = layers.Conv2D(21, (1, 1), activation='softmax')(x)

    model = models.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model


def parse_xml(xml_file, image_size=(256, 256)):
    import xml.etree.ElementTree as ET
    mask = np.zeros(image_size, dtype=np.uint8)
    tree = ET.parse(xml_file)
    root = tree.getroot()

    for obj in root.findall('object'):
        class_name = obj.find('name').text
        if class_name in class_map:
            class_id = class_map[class_name]
            bndbox = obj.find('bndbox')

            
            xmin = int(float(bndbox.find('xmin').text))
            ymin = int(float(bndbox.find('ymin').text))
            xmax = int(float(bndbox.find('xmax').text))
            ymax = int(float(bndbox.find('ymax').text))

            mask[ymin:ymax, xmin:xmax] = class_id

    return mask

def load_and_process_image(image_file, xml_file, image_size=(256, 256)):
    image = cv2.imread(image_file)
    image = cv2.resize(image, image_size)
    mask = parse_xml(xml_file, image_size)
    return image, mask

def prepare_data(images_dir, annotations_dir, image_size=(256, 256)):
    images = []
    masks = []
    for image_name in os.listdir(images_dir):
        image_path = os.path.join(images_dir, image_name)
        xml_path = os.path.join(annotations_dir, image_name.replace('.jpg', '.xml'))
        if os.path.exists(xml_path):
            image, mask = load_and_process_image(image_path, xml_path, image_size)
            images.append(image)
            masks.append(mask)
    
    return np.array(images), np.array(masks)



Num GPUs Available:  1


In [6]:

images_dir = "/kaggle/input/pascal-voc-2012-dataset/VOC2012_train_val/VOC2012_train_val/JPEGImages"
annotations_dir = "/kaggle/input/pascal-voc-2012-dataset/VOC2012_train_val/VOC2012_train_val/Annotations"
images, masks = prepare_data(images_dir, annotations_dir, image_size=(256, 256))


model = create_model((256, 256, 3))
model.fit(images, masks, epochs=10, batch_size=8)


Epoch 1/10
[1m2141/2141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 32ms/step - accuracy: 0.5593 - loss: 2.1504
Epoch 2/10
[1m2141/2141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 30ms/step - accuracy: 0.5699 - loss: 1.6706
Epoch 3/10
[1m2141/2141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 30ms/step - accuracy: 0.5695 - loss: 1.6690
Epoch 4/10
[1m2141/2141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 30ms/step - accuracy: 0.5723 - loss: 1.6469
Epoch 5/10
[1m2141/2141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 30ms/step - accuracy: 0.5709 - loss: 1.6355
Epoch 6/10
[1m2141/2141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 30ms/step - accuracy: 0.5683 - loss: 1.6277
Epoch 7/10
[1m2141/2141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 30ms/step - accuracy: 0.5706 - loss: 1.6083
Epoch 8/10
[1m2141/2141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 30ms/step - accuracy: 0.5732 - loss: 1.5920
Epoch 9/

<keras.src.callbacks.history.History at 0x7bff10714b20>

In [7]:

model.save("/kaggle/working//unet_model.h5")
