In [20]:
import numpy as np
import open3d as o3d
from PIL import Image
import matplotlib.pyplot as plt
import os
from utils.vis_utils import show_mask_ins, show_points_color

In [21]:
device = "cuda:0"

# dataset params
dataset_dir = "/home/rpartsey/code/eai/SAMPro3D-fork/data/scannet/scans"
scene_name = "scene0000_02"
model = "sam"
# scene_name = "scene0008_00"
# scene_name = "scene0013_01"

In [22]:
# experiment params
# experiments_dir = "/home/rpartsey/code/eai/SAMPro3D-fork/experiments/scannet/scans/3js_prompt"
experiments_dir = "/home/kuzhum/sam/SAMPro3D/experiments"
# experiment_name = "guitar"

prompt_path = f"{dataset_dir}/{scene_name}/{scene_name}_guitar.ply"
# sam_output_path =  f"{experiments_dir}/{scene_name}/{experiment_name}/sam_output"
# sampro3d_predictions =  f"{experiments_dir}/{scene_name}/{experiment_name}/sampro3d_predictions"
# output_vis_path = f"{experiments_dir}/{scene_name}/{experiment_name}/visualization"

In [23]:
import cv2
import numpy as np
import os
from tqdm import tqdm

# Define your dataset and experiment directories
# dataset_dir = 'path_to_dataset_dir'       # Replace with your dataset directory
# experiments_dir = 'path_to_experiments_dir'  # Replace with your experiments directory
# scene_name = 'scene_name'                   # Replace with your scene name

# Generate random colors for each object
colors = [np.random.randint(0, 256, 3).tolist() for _ in range(1000)]  # BGR format
object_names = [name for name in os.listdir(f"{experiments_dir}/{scene_name}") if name.endswith(model)]
print(f"Object names: {object_names}")

def show_mask_ins(mask, frame, color):
    # print(mask)
    # Create a color mask where the mask is True
    color_mask = frame.copy()
    color_mask[mask > 0] = color

    # Blend the color mask with the frame
    alpha = 0.5  # Transparency factor
    frame = cv2.addWeighted(frame, 1 - alpha, color_mask, alpha, 0)

    return frame

def show_points_color(points, frame, color, marker_size=5):
    for coord in points:
        x, y = int(coord[0]), int(coord[1])
        cv2.circle(frame, (x, y), marker_size, color, -1)  # Filled circle
    return frame

def create_visualization_video_with_opencv(start_frame_idx, end_frame_idx, output_video_path, fps=30, frame_size=(1920, 1080)):
    # Define the codec and initialize the video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can change the codec if needed
    video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, frame_size)

    for i in tqdm(range(start_frame_idx, end_frame_idx + 1)):
        frame_path = f"{dataset_dir}/{scene_name}/color/{i}.jpg"
        frame = cv2.imread(frame_path)

        if frame is None:
            print(f"Frame {i} not found at {frame_path}")
            continue

        original_height, original_width = frame.shape[:2]

        # Resize frame to desired size if necessary
        if (original_width, original_height) != frame_size:
            frame = cv2.resize(frame, frame_size)

        for j, object_name in enumerate(object_names):
            sam_output_path = f"{experiments_dir}/{scene_name}/{object_name}/sam_output"
            mask_path = f"{sam_output_path}/masks_npy/{i}.npy"
            points_path = f"{sam_output_path}/points_npy/{i}.npy"

            # Overlay mask if it exists
            if os.path.exists(mask_path):
                mask = np.load(mask_path)
                
                if model == "sam2":
                    mask = mask.reshape(-1, 480, 640)

                if mask is None or mask.size == 0:
                    print(f"Mask at {mask_path} is empty or invalid.")
                    continue

                # **Squeeze the mask to remove singleton dimensions**
                mask = np.squeeze(mask)

                # # Ensure mask is 2D
                # if mask.ndim != 2:
                #     print(f"Mask at {mask_path} is not 2D after squeezing.")
                #     continue

                # # Convert mask to uint8 if necessary
                # if mask.dtype != np.uint8:
                #     mask = mask.astype(np.uint8)

                # Resize mask to match frame size if necessary
                if mask.shape != (frame_size[1], frame_size[0]):
                    if frame_size[0] > 0 and frame_size[1] > 0:
                        mask = cv2.resize(mask, (frame_size[0], frame_size[1]), interpolation=cv2.INTER_NEAREST)
                    else:
                        print(f"Invalid frame size: {frame_size}")
                        continue

                frame = show_mask_ins(mask, frame, colors[j])

            # Draw points if they exist
            if os.path.exists(points_path):
                points = np.load(points_path)

                # Adjust points if frame size changed
                if (original_width, original_height) != frame_size:
                    scale_x = frame_size[0] / original_width
                    scale_y = frame_size[1] / original_height
                    points[:, 0] = points[:, 0] * scale_x
                    points[:, 1] = points[:, 1] * scale_y

                frame = show_points_color(points, frame, colors[j], marker_size=5)

        # Write the processed frame to the video
        video_writer.write(frame)

    # Release the video writer
    video_writer.release()
    print(f"Video saved to {output_video_path}")

# Get number of frames in the scene
num_frames = len(os.listdir(f"{dataset_dir}/{scene_name}/color"))

# Example usage
create_visualization_video_with_opencv(
    start_frame_idx=0,
    end_frame_idx=num_frames,
    output_video_path=f'{scene_name}_{model}.mp4',
    frame_size=(640, 480)
)


Object names: ['guitar_sam']


  0%|          | 0/6171 [00:00<?, ?it/s]

100%|██████████| 6171/6171 [00:21<00:00, 285.17it/s][ WARN:0@7410.506] global loadsave.cpp:241 findDecoder imread_('/home/rpartsey/code/eai/SAMPro3D-fork/data/scannet/scans/scene0000_02/color/6170.jpg'): can't open/read file: check file path/integrity



Frame 6170 not found at /home/rpartsey/code/eai/SAMPro3D-fork/data/scannet/scans/scene0000_02/color/6170.jpg
Video saved to scene0000_02_sam.mp4


In [None]:
i = 772
frame_path = f"{dataset_dir}/{scene_name}/color/{i}.jpg"
frame = Image.open(frame_path)
frame.show()


mask_path = f"{sam_output_path}/masks_npy/{i}.npy"
mask = np.load(mask_path)
print(mask.shape)

points_path = f"{sam_output_path}/points_npy/{i}.npy"
points = np.load(points_path)
print(points.shape)


def visualize_masks_and_points(frame, points):
    """
    Visualize masks and points on the given frame for specified mask indices.
    
    Args:
    frame (PIL.Image): The input image
    mask (np.array): Array of masks
    points (np.array): Array of points
    start_idx (int): Starting index of masks to visualize
    end_idx (int): Ending index of masks to visualize
    """
    # Convert PIL Image to numpy array
    frame_np = np.array(frame)
    
    # Create a figure and axis
    fig, ax = plt.subplots(1, 1, figsize=(15, 10))
    
    # Display the frame
    ax.imshow(frame_np)
    
    # Plot masks and points
    show_points_color(points, np.ones(points.shape[0]), ax, np.random.rand(3), marker_size=200)
    
    ax.axis('off')
    plt.tight_layout()
    plt.show()

# Example usage:
start_idx = 0
end_idx = 2  # Visualize first 5 masks and points
visualize_masks_and_points(frame, points)