In [None]:
import os; os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import sys
sys.path.append("../../../../cds_vision_tools")
sys.path.append("../../../../cds_vision_tools/cds_vision_tools/pytorch")

In [None]:
from pathlib import Path
import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torchvision
from torchvision.transforms import v2
import time
from PIL import Image
from collections import deque

In [None]:
from segmentation.annotator import coco2mask, read_coco_annotation_json_file, load_and_display_image_with_mask, add_background_images, get_image_mask_tensor
from segmentation.dataset import SegmentationDataset
from segmentation.model import LoadSegmentationModel
from segmentation.trainer import SegmentationTrainer
from segmentation.inference import img_inference, display_results
from vision_utils import preprocess_image, save_image

# Data Prep

### Data Input Parameters

In [None]:
# Define data directories
seg_data_dir = Path("../../../test/cds_vision_tools/pytorch/segmentation/TestProject")
# Annotation file
path_to_annotation = seg_data_dir / "annotations-unittest.json"

# images directory
original_images_dir = seg_data_dir / "images"
background_images_dir = seg_data_dir / "background"
augmented_images_dir = seg_data_dir / "augmented_images_path"

# masks directory
masks_dir = seg_data_dir / "masks"
train_masks_dir = masks_dir / "train"
val_masks_dir = masks_dir / "val"

batch_size = 4

### Create augmented images and masks from the original images

In [None]:
coco2mask([path_to_annotation],
            original_images_dir,
            masks_dir,
            augmented_images_dir,
            background_images_dir,
            train_portion=0.7,
            val_portion=0.2,
            shuffle=True,
            augmentation_portion=1.0,)

### Display image and mask of a sample image file

In [None]:
# Option 1
image_file = "../../../test/cds_vision_tools/pytorch/segmentation/TestProject/augmented_images_path/10-29-22-66_20231205-190806-6_augmented.jpg"
mask_file = "../../../test/cds_vision_tools/pytorch/segmentation/TestProject/masks/train/10-29-22-66_20231205-190806-6_augmented.pt"
load_and_display_image_with_mask(image_file, mask_file)

In [None]:
# Option 2
annotations_df = read_coco_annotation_json_file(Path(path_to_annotation))
annotations_df = add_background_images(background_images_dir, annotations_df)
display(annotations_df)

# Check the mask of an image with annotation
img_t, mask_t = get_image_mask_tensor(original_images_dir, background_images_dir, annotations_df.iloc[0]["file_name"], annotations_df.iloc[0]["segmentation"])
mask_array = mask_t.squeeze().numpy()
plt.imshow(mask_array)

In [None]:
# Check if the mask is empty for images with only background
img_t, mask_t = get_image_mask_tensor(original_images_dir, background_images_dir, annotations_df.iloc[12]["file_name"], annotations_df.iloc[12]["segmentation"])
mask_array = mask_t.squeeze().numpy()
plt.imshow(mask_array)


### Generate Training and Validation data

In [None]:
# Create DataLoaders for training and validation
train_dataset = SegmentationDataset(
    [original_images_dir, augmented_images_dir], train_masks_dir, os.listdir(train_masks_dir)
)
val_dataset = SegmentationDataset(
    [original_images_dir, augmented_images_dir], val_masks_dir, os.listdir(val_masks_dir)
)

train_data_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, drop_last=True
)
val_data_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=1, shuffle=True, drop_last=True
)

# Modeling

In [None]:
# Load the default segmentation model i.e deeplabv3 with MobileNet
model_loader = LoadSegmentationModel()

### Model Input Parameters

In [None]:
epochs = 10
loss_fun = torch.nn.CrossEntropyLoss()
LR = 0.001
optimizer = torch.optim.Adam(model_loader.model.parameters(), lr=LR)

model_dir = seg_data_dir / "models"
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

# Training

In [None]:
trainer = SegmentationTrainer(
            model_loader.model,
            train_data_loader,
            val_data_loader,
            optimizer,
            model_dir,
            loss_fun,
            num_epochs=epochs,
        )
# Train the model and capture the returned metrics dictionary
metrics_dict = trainer.train()
metrics_dict

# Load trained Segmentation model

### Saved Model Input Parameters

In [None]:
model_dir = seg_data_dir / "models"
model_path = model_dir / "../models/wreckathon_seg_epoch_10.pt"
device = "cpu"

### Load model

In [None]:
# wreckathon_seg_epoch_10 is deeplabv3_mobilenet model
model_loader = LoadSegmentationModel()

# Load model state dictionary
model_loader.model.load_state_dict(torch.load(model_path, map_location=device))
model_loader.model.eval()
model_loader.model.to(device)

# Inference on Single image

### Image Input Parameters

In [None]:
image_file = "10-29-22-66_20231205-200437-10.jpeg"
image_file_path = original_images_dir / image_file

# Read image with OpenCV
img = cv2.imread(str(image_file_path))
hotspot = {"top": 100, "bottom": 415, "left": 500, "right": 900}

### Run Inference


In [None]:
# start time
start = time.perf_counter()

# Preprocess the image
resized_img, processed_img = preprocess_image(img, hotspot)

# Inference
overlay_img = img_inference(model_loader.model, resized_img, processed_img)

# end time
inf_time = time.perf_counter()-start
print(f'Cost {inf_time} s')

# plot the images
display_results(resized_img, overlay_img)

# Save overlay image
overlay_img_file = "overlay-" + image_file
save_image(overlay_img, original_images_dir / overlay_img_file)

# DataLoader for a batchsize of 4

### DataLoader Input Parameters

In [None]:
image_display_limit = 3
hotspot = {"top": 100, "bottom": 250, "left": 180, "right": 350}
# hotspot = None

### Run Inference
Note: Model results shown in the displayed images are not great as the model is trained on the cropped hotspot images but currently run on the entire frame

In [None]:
# Initialize counter for number of images to display
i = 0
with torch.no_grad():
    # Loop through batches of data within the data loader
    for x_batch, y_true_batch in iter(val_data_loader):
        for x_img in x_batch:

            # convert image from [channels, height, width] to [height, width, channels]
            img = np.array(x_img.permute(1, 2, 0).type(torch.int))

            # Preprocess the image based on hotspot information
            resized_img, processed_img = preprocess_image(img, hotspot)

            # Inference
            overlay_img = img_inference(model_loader.model, resized_img, processed_img)
    
            # Display the results
            display_results(resized_img, overlay_img)

            # Stop processing after certain iterations
            i+=1
            if i == image_display_limit:
                break
                
        # Exit outer loop as well
        if i == image_display_limit:
          break

# Inference on whole frame 
Video

### Input Parameters

In [None]:
# video to process
video_dir = seg_data_dir / "videos"
video_file = "video2_crop.mp4" 
video_path = video_dir / video_file

# path to save the video with overlay 
output_video_file = "video2_crop_whole_frame.avi"
output_video_path = video_dir / output_video_file

### Video Capture and Output Initialization

In [None]:
# Open the video capture object using the video path
cap = cv2.VideoCapture(str(video_path))

# Check if video capture was successful
if cap.isOpened() == False:
    logger.info("Error opening video stream or file")
    cap.release()

# Get the video frame width and height
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the output video writer object
output = cv2.VideoWriter(str(output_video_path),
                         cv2.VideoWriter_fourcc(*'MJPG'),
                         cap.get(5) * 0.25,
                         (width, height))

### Run Inference
Note: Model results shown in the displayed images are not great as the model is trained on the cropped hotspot images but currently run on the entire frame

In [None]:
# Main video processing loop
while True:
    # Read a frame from the video capture object
    ret, frame = cap.read()

    # Check if frame reading was successful (end of video or error)
    if not ret:
        break  # Exit the loop if frame reading fails

    # Preprocess the frame for model inference
    resized_img, processed_img = preprocess_image(frame)

    # Perform inference on the preprocessed frame using the loaded model
    overlay_img = img_inference(model_loader.model, resized_img, processed_img)
    
    # Resize the overlay image to match the original frame size for proper overlay
    masked_image = cv2.resize(overlay_img, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_AREA)
    plt.imshow(masked_image)
    
    # Write the processed frame (with segmentation mask) to the output video
    output.write(masked_image)  

# Release resources after the loop exits
cap.release()  # Release video capture object
output.release()  # Release video writer object
cv2.destroyAllWindows()   # Close any OpenCV windows

# Inference on a hotspot 
within the whole frame of the video

### Input Parameters

In [None]:
# video to process
video_dir = seg_data_dir / "videos"
video_file = "video2_crop.mp4" 
video_path = video_dir / video_file

# path to save the video with overlay 
output_video_file = "video2_crop_hotspot.avi"
output_video_path = video_dir / output_video_file

# Define hotspot area
hotspot = {"top": 5, "bottom": 320, "left": 500, "right": 900}
entropy_list = deque(maxlen=1000) 
entropy_change_threshold=2
prev_entropy_change_percent = 0
event_number = 0

### Video Capture and Output Initialization

In [None]:
# Open the video capture object using the video path
cap = cv2.VideoCapture(str(video_path))

# Check if video capture was successful
if cap.isOpened() == False:
    logger.info("Error opening video stream or file")
    cap.release()

# Get the video frame width and height
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the output video writer object
output = cv2.VideoWriter(str(output_video_path),
                         cv2.VideoWriter_fourcc(*'MJPG'),
                         cap.get(5) * 0.25,
                         (width, height))

### Run Inference and count the number of bones passed through the hotspot area

In [None]:
# Main video processing loop
while True:
    # Read a frame from the video capture object
    ret, frame = cap.read()

    # Check if frame reading was successful (end of video or error)
    if not ret:    
        break  # Exit the loop if frame reading fails

    # Calculate entropy of the hotspot area
    entropy, hot_spot_img = get_entropy(frame, hotspot)
    
    # Update entropy tracking and calculate entropy change percent
    entropy_list, entropy_change_percent = entropy_calculator(entropy, entropy_list)

    # Determine event status based on entropy change
    event_status = get_event_status(entropy_change_percent,
                                        prev_entropy_change_percent,
                                        entropy_change_threshold)

    # Image collection
    if event_status == "image_collection":
        # Preprocess the frame for model inference
        resized_img, processed_img = preprocess_image(frame, hotspot)
    
        # Perform inference on the preprocessed frame using the loaded model
        overlay_img = img_inference(model_loader.model, resized_img, processed_img)

        # Resize the overlay image to match the hotspot region for proper masking
        masked_image = cv2.resize(overlay_img, (hot_spot_img.shape[1], hot_spot_img.shape[0]), interpolation=cv2.INTER_AREA)
        # Convert masked image to RGB format for display
        masked_image = cv2.cvtColor(masked_image, cv2.COLOR_BGR2RGB)
        
        # Apply mask to the original frame within the hotspot region
        frame[hotspot["top"]:hotspot["bottom"],
              hotspot["left"]:hotspot["right"],
              :] = masked_image

    # Increment event counter for tracking
    elif event_status == "event_end":
        event_number += 1

    # Update tracking variables and display frame
    prev_entropy_change_percent = entropy_change_percent

    # Draw a rectangle around the hotspot region on the frame
    frame = cv2.rectangle(
            frame,
            (hotspot["left"], hotspot["top"]),
            (hotspot["right"], hotspot["bottom"]),
            (255, 0, 0),
            1,
        )

    # Add text overlay on the frame to display the current event count
    frame = cv2.putText(
            frame,
            f"Events : {event_number}",
            (100, 100),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
            (255, 0, 0),
            2,
            cv2.LINE_AA,
        )

    plt.imshow(frame)
    # Write the processed frame (with segmentation mask) to the output video
    output.write(frame)

# Release resources after the loop exits
cap.release()  # Release video capture object
output.release()  # Release video writer object
cv2.destroyAllWindows()   # Close any OpenCV windows

# Inference on a whole frame 
IP address of the camera

### Input Parameters

In [None]:
# Image capture settings
images_per_cycle = 400
cam_id = 63
stream_protocol = "rtsp"

# connect to ip camera
ip_address = f"10.29.22.{cam_id}/profile2/media.smp"
# AXIS camera - f"10.29.22.{cam_id}/axis-media/media.amp"
video_address = f"{stream_protocol}://{username}:{password}@{ip_address}"

# path to save the video from IP address with overlay 
output_video_file = f"ip_{cam_id}.avi"
output_video_path = video_dir / output_video_file

### Video Capture and Output Initialization

In [None]:
# Open the video capture object using the IP address
cap = cv2.VideoCapture(video_address)

# Check if video capture was successful
if not cap.isOpened():
    print(f"Cannot open RTSP stream for camera - {cam_id}")
    cap.release()

# fps of the camera
input_fps = cap.get(cv2.CAP_PROP_FPS)
frame_rate = 0.25
output_fps = input_fps * frame_rate

# Get the video frame width and height
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the output video writer object
output = cv2.VideoWriter(str(output_video_path),
                         cv2.VideoWriter_fourcc(*'MJPG'),
                         output_fps,
                         (width, height))

### Run Inference on limited frames

In [None]:
frame_count = 0

# Main video processing loop
while True & (frame_count < images_per_cycle):    
    # Read a frame from the video capture object
    ret, frame = cap.read()

    # Check if frame reading was successful (end of video or error)
    if not ret:
        break  # Exit the loop if frame reading fails

    # Preprocess the frame for model inference
    resized_img, processed_img = preprocess_image(frame)

    # Perform inference on the preprocessed frame using the loaded model
    overlay_img = img_inference(model_loader.model, resized_img, processed_img)

    # Resize the overlay image to match the original frame size for proper overlay
    masked_image = cv2.resize(overlay_img, (np.shape(frame)[1],np.shape(frame)[0]), interpolation=cv2.INTER_AREA)
    # Convert masked image to RGB format for display
    masked_image = cv2.cvtColor(masked_image, cv2.COLOR_BGR2RGB)
    plt.imshow(masked_image)

    # increase counter
    frame_count += 1

    # Write the processed frame (with segmentation mask) to the output video
    output.write(masked_image)

# Release resources after the loop exits
cap.release()  # Release video capture object
output.release()  # Release video writer object
cv2.destroyAllWindows()   # Close any OpenCV windows