In [5]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate

# Data preparation
def load_data(folder):
    images = []
    for filename in os.listdir(folder):
      try:
        img = cv2.imread(os.path.join(folder, filename))
        if img is None:
          print(f"Unable to read image: {os.path.join(folder, filename)}")
          continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
        img = cv2.resize(img, (256, 256))  # Resize to a standard size
        images.append(img)
      except Exception as e:
        print(f"Error processing image: {os.path.join(folder, filename)} - {e}")
    return np.array(images)

train_images = load_data('sample_data/Train')
train_masks = load_data('sample_data/Result')
test_images = load_data('sample_data/Test')
test_masks = load_data('sample_data/Test_Result')

train_masks = train_masks[..., 0:1]
test_masks = test_masks[..., 0:1]

# Verify shapes
print("Train Images Shape:", train_images.shape)
print("Train Masks Shape:", train_masks.shape)
print("Test Images Shape:", test_images.shape)
print("Test Masks Shape:", test_masks.shape)

# Normalize input data
train_images = train_images.astype('float32') / 255.0  # Normalize to [0, 1]
train_masks = train_masks.astype('float32') / 255.0  # Normalize to [0, 1]
test_images = test_images.astype('float32') / 255.0  # Normalize to [0, 1]
test_masks = test_masks.astype('float32') / 255.0  # Normalize to [0, 1]

# Verify shapes after normalization
print("Train Images Shape (after normalization):", train_images.shape)
print("Train Masks Shape (after normalization):", train_masks.shape)
print("Test Images Shape (after normalization):", test_images.shape)
print("Test Masks Shape (after normalization):", test_masks.shape)


def unet_model(input_shape):
  inputs = Input(input_shape)

  # Encoder
  conv1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
  conv1 = Conv2D(64, 3, activation='relu', padding='same')(conv1)
  pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

  conv2 = Conv2D(128, 3, activation='relu', padding='same')(pool1)
  conv2 = Conv2D(128, 3, activation='relu', padding='same')(conv2)
  pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

  conv3 = Conv2D(256, 3, activation='relu', padding='same')(pool2)
  conv3 = Conv2D(256, 3, activation='relu', padding='same')(conv3)
  pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

  conv4 = Conv2D(512, 3, activation='relu', padding='same')(pool3)
  conv4 = Conv2D(512, 3, activation='relu', padding='same')(conv4)
  pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

  # Bottom/Center
  conv5 = Conv2D(1024, 3, activation='relu', padding='same')(pool4)
  conv5 = Conv2D(1024, 3, activation='relu', padding='same')(conv5)

  # Decoder
  up6 = Conv2D(512, 2, activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv5))
  merge6 = concatenate([conv4, up6], axis=3)
  conv6 = Conv2D(512, 3, activation='relu', padding='same')(merge6)
  conv6 = Conv2D(512, 3, activation='relu', padding='same')(conv6)

  up7 = Conv2D(256, 2, activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv6))
  merge7 = concatenate([conv3, up7], axis=3)
  conv7 = Conv2D(256, 3, activation='relu', padding='same')(merge7)
  conv7 = Conv2D(256, 3, activation='relu', padding='same')(conv7)
  up8 = Conv2D(128, 2, activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv7))

  merge8 = concatenate([conv2, up8], axis=3)
  conv8 = Conv2D(128, 3, activation='relu', padding='same')(merge8)
  conv8 = Conv2D(128, 3, activation='relu', padding='same')(conv8)
  up9 = Conv2D(64, 2, activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv8))

  merge9 = concatenate([conv1, up9], axis=3)
  conv9 = Conv2D(64, 3, activation='relu', padding='same')(merge9)
  conv9 = Conv2D(64, 3, activation='relu', padding='same')(conv9)

  # Output
  outputs = Conv2D(1, 1, activation='sigmoid')(conv9)
  model = Model(inputs=inputs, outputs=outputs)
  return model

model = unet_model((256, 256, 3))
print(model.output_shape)

# Model training
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_images, train_masks, epochs=10, batch_size=8, validation_split=0.2)

# Model evaluation
train_loss, train_acc = model.evaluate(train_images, train_masks)
test_loss, test_acc = model.evaluate(test_images, test_masks)
print(f'Training Loss: {train_loss}, Accuracy: {train_acc}')
print(f'Testing Loss: {test_loss}, Accuracy: {test_acc}')

def segment_images(images):
    segmented_images = []
    for img in images:
      # Apply segmentation model
      segmented_img = model.predict(np.expand_dims(img, axis=0))[0]
      segmented_img = (segmented_img > 0.5).astype(np.uint8) * 255  # Thresholding
      segmented_images.append(segmented_img)


    # Convert the segmented images to 3-channel images
    for i in range(len(segmented_images)):
      # Create a white background image with the same shape as the original image
      white_bg_img = np.ones_like(test_images[i]) * 255
      white_bg_img_gray = cv2.cvtColor(white_bg_img, cv2.COLOR_RGB2GRAY)

      thresholded_img_rgb = cv2.cvtColor(segmented_images[i], cv2.COLOR_GRAY2RGB)

      # Set the pixels corresponding to the segmented glasses to black
      white_bg_img_gray[thresholded_img_rgb[:, :, 0] == 255] = 0

    # Save the segmented images
    for i, img in enumerate(segmented_images):
      cv2.imwrite(f'segmented_image_{i}.jpg', img)

    return segmented_images

segmented_test_images = segment_images(test_images)

Train Images Shape: (8, 256, 256, 3)
Train Masks Shape: (8, 256, 256, 1)
Test Images Shape: (8, 256, 256, 3)
Test Masks Shape: (8, 256, 256, 1)
Train Images Shape (after normalization): (8, 256, 256, 3)
Train Masks Shape (after normalization): (8, 256, 256, 1)
Test Images Shape (after normalization): (8, 256, 256, 3)
Test Masks Shape (after normalization): (8, 256, 256, 1)
(None, 256, 256, 1)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training Loss: 0.1951979696750641, Accuracy: 0.9366970062255859
Testing Loss: 0.16530221700668335, Accuracy: 0.9334011077880859
