In [1]:
import cv2
import numpy as np
import tensorflow as tf
from PIL import Image

# Load the saved U-Net model
model = tf.keras.models.load_model('best_unet_model.keras')

# Function to preprocess the frame for model prediction
def preprocess_frame(frame):
    # Resize frame to model's input size (128x128 in this case)
    resized_frame = cv2.resize(frame, (128, 128))  # Ensure frame is valid before resizing
    
    # Normalize the image to [0, 1]
    resized_frame = resized_frame / 255.0
    
    # Add a batch dimension
    input_frame = np.expand_dims(resized_frame, axis=0)
    
    return input_frame

# Function to post-process and display the predicted mask
def postprocess_mask(mask):
    # Postprocess the predicted mask (argmax over classes if it's a multi-class model)
    predicted_mask = tf.argmax(mask, axis=-1)
    
    # Remove batch dimension and return as a NumPy array
    return np.squeeze(predicted_mask)

# Open a connection to the webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open video capture.")
    exit()

while True:
    # Capture frame-by-frame from the webcam
    ret, frame = cap.read()

    if not ret:
        print("Error: Failed to capture frame.")
        break

    if frame is None:
        print("Error: Captured frame is None.")
        continue  # Skip this iteration and wait for the next valid frame

    # Preprocess the captured frame
    input_frame = preprocess_frame(frame)

    # Make predictions
    predicted_mask = model.predict(input_frame)

    # Post-process the predicted mask
    mask = postprocess_mask(predicted_mask)
    
    mask = mask.astype('float32')

    # Resize mask to match the original frame size
    mask_resized = cv2.resize(mask, (frame.shape[1], frame.shape[0]))

    # Overlay the mask on the original frame (optional: use different colors or blending)
    overlay = frame.copy()
    overlay[mask_resized == 1] = [0, 255, 0]  # Example: set mask area to green

    # Display the original frame with overlay
    cv2.imshow("Original Frame with Segmentation", overlay)

    # Display the mask alone
    cv2.imshow("Predicted Mask", mask_resized)

    # Press 'q' to exit the real-time loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()


I0000 00:00:1728375189.576466  279193 service.cc:146] XLA service 0x28269a350 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1728375189.576780  279193 service.cc:154]   StreamExecutor device (0): Host, Default Version
2024-10-08 11:13:09.634775: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 844ms/step


I0000 00:00:1728375190.210329  279193 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40

#### With mask and whole frame overlay

In [4]:
import cv2
import numpy as np
import tensorflow as tf

# Load the pre-trained U-Net model (adjust the path to your model)
model = tf.keras.models.load_model('best_unet_model.keras')

# Function to preprocess the frame for model prediction
def preprocess_frame(frame):
    # Resize frame to model's input size (assuming 128x128 in this case)
    resized_frame = cv2.resize(frame, (128, 128))
    resized_frame = resized_frame / 255.0  # Normalize pixel values to [0, 1]
    input_frame = np.expand_dims(resized_frame, axis=0)  # Add batch dimension
    return input_frame

# Function to post-process the mask (resize to original frame size and convert to binary)
def postprocess_mask(mask, original_frame):
    # Remove the batch dimension and resize the mask to the original frame size
    mask = tf.argmax(mask, axis=-1)
    mask = tf.squeeze(mask)  # Remove single-dimensional entries
    mask = mask.numpy().astype('float32')  # Convert TensorFlow tensor to NumPy array
    mask_resized = cv2.resize(mask, (original_frame.shape[1], original_frame.shape[0]))  # Resize mask
    return mask_resized

# Function to overlay the mask on the original frame
def overlay_mask_on_image(image, mask):
    colored_mask = np.zeros_like(image)
    colored_mask[mask == 1] = [0, 255, 0]  # Green mask for the segmentation
    
    # Blend the original image and the mask (70% original, 30% mask)
    blended_image = cv2.addWeighted(image, 0.7, colored_mask, 0.3, 0)
    return blended_image

# Open a connection to the webcam
cap = cv2.VideoCapture(0)  # Change the device index if necessary

# Check if the webcam is opened successfully
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

while True:
    # Capture frame-by-frame from the webcam
    ret, frame = cap.read()

    if not ret:
        print("Error: Failed to capture frame.")
        break

    # Preprocess the captured frame for prediction
    input_frame = preprocess_frame(frame)

    # Make predictions using the model
    predicted_mask = model.predict(input_frame)

    # Post-process the predicted mask
    mask_resized = postprocess_mask(predicted_mask, frame)

    # Overlay the mask onto the original frame
    blended_frame = overlay_mask_on_image(frame, mask_resized)

    # Display the blended frame with overlay
    cv2.imshow("Image with Mask Overlay", blended_frame)

    # Press 'q' to exit the real-time loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 680ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5