In [None]:
# segment image region using  fine tune model
# See Train.py on how to fine tune/train the model
import numpy as np
import torch
import cv2
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

# use bfloat16 for the entire script (memory efficient)
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()

# Load image

image_path = r"C:\Users\sinha\Desktop\python\captured_image.png" # path to image
mask_path = r"C:\Users\sinha\segment-anything-2\notebooks\mask.png" # path to mask, the mask will define the image region to segment

def read_image(image_path, mask_path): # read and resize image and mask
        img = cv2.imread(image_path)[...,::-1]  # read image as rgb
        mask = cv2.imread(mask_path,0) # mask of the region we want to segment

        # Resize image to maximum size of 1024

        r = np.min([1024 / img.shape[1], 1024 / img.shape[0]])
        img = cv2.resize(img, (int(img.shape[1] * r), int(img.shape[0] * r)))
        mask = cv2.resize(mask, (int(mask.shape[1] * r), int(mask.shape[0] * r)),interpolation=cv2.INTER_NEAREST)
        return img, mask
image,mask = read_image(image_path, mask_path)
num_samples = 30 # number of points/segment to sample
def get_points(mask,num_points): # Sample points inside the input mask
        points=[]
        for i in range(num_points):
            coords = np.argwhere(mask > 0)
            yx = np.array(coords[np.random.randint(len(coords))])
            points.append([[yx[1], yx[0]]])
        return np.array(points)
input_points = get_points(mask,num_samples)
# read image and sample points


# Load model you need to have pretrained model already made
sam2_checkpoint = r"C:\Users\sinha\segment-anything-2\checkpoints\sam2_hiera_small.pt" # "sam2_hiera_large.pt"
model_cfg = "sam2_hiera_s.yaml" # "sam2_hiera_l.yaml"
sam2_model = build_sam2(model_cfg, sam2_checkpoint, device="cuda")

# Build net and load weights
predictor = SAM2ImagePredictor(sam2_model)
predictor.model.load_state_dict(torch.load("model.torch"))

# predict mask

with torch.no_grad():
        predictor.set_image(image)
        masks, scores, logits = predictor.predict(
            point_coords=input_points,
            point_labels=np.ones([input_points.shape[0],1])
        )

# Short predicted masks from high to low score

masks=masks[:,0].astype(bool)
shorted_masks = masks[np.argsort(scores[:,0])][::-1].astype(bool)

# Stitch predicted mask into one segmentation mask

seg_map = np.zeros_like(shorted_masks[0],dtype=np.uint8)
occupancy_mask = np.zeros_like(shorted_masks[0],dtype=bool)
for i in range(shorted_masks.shape[0]):
    mask = shorted_masks[i]
    if (mask*occupancy_mask).sum()/mask.sum()>0.15: continue
    mask[occupancy_mask]=0
    seg_map[mask]=i+1
    occupancy_mask[mask]=1

# create colored annotation map
height, width = seg_map.shape

# Create an empty RGB image for the colored annotation
rgb_image = np.zeros((seg_map.shape[0], seg_map.shape[1], 3), dtype=np.uint8)
for id_class in range(1,seg_map.max()+1):
    rgb_image[seg_map == id_class] = [np.random.randint(255), np.random.randint(255), np.random.randint(255)]

# save and display

cv2.imwrite("annotation.png",rgb_image)
# cv2.imwrite("mix.png",(rgb_image/2+image/2).astype(np.uint8))
cv2.imwrite("mix.png",(2 * (rgb_image/5)+3*(image/5)).astype(np.uint8))
cv2.imwrite("image.png" , (image))

cv2.imshow("annotation",rgb_image)
# cv2.imshow("mix",(rgb_image / 2 + image / 2).astype(np.uint8))
cv2.imshow("mix",(2 * (rgb_image / 5) + 3 * (image / 5)).astype(np.uint8))
cv2.imshow("image",image)
cv2.waitKey()

In [4]:
from sam2.sam2_image_predictor import SAM2ImagePredictor
from sam2.build_sam import build_sam2
import cv2
import numpy as np
import torch

# Define paths
checkpoint_path = r"C:\Users\sinha\segment-anything-2\notebooks\model.torch"
config_file = "sam2_hiera_l.yaml"  # Adjust this path as needed

# Initialize SAM model and predictor
sam_model = build_sam2(checkpoint_path=checkpoint_path, config_file=config_file)
predictor = SAM2ImagePredictor(sam_model)

mask_path = r"C:\Users\sinha\segment-anything-2\notebooks\mask.png"  # Adjust the mask path if needed

# Load and preprocess image
image_path = r"C:\Users\sinha\Desktop\python\captured_image.png"# Add the path to your image
image = cv2.imread(image_path)[..., ::-1]  # BGR to RGB

# Create a copy of the image array to avoid negative strides issue
image = image.copy()

predictor.set_image(image)

# Define input points and labels for segmentation
input_point = np.array([[587, 447]])  # replace x, y with coordinates
input_label = np.array([1])  # 1 for foreground, 0 for background

# Generate mask
# mask_input, unnorm_coords, labels, unnorm_box = predictor._prep_prompts(input_point, input_label)
# sparse_embeddings, dense_embeddings = predictor.model.sam_prompt_encoder(
#     points=(unnorm_coords, labels), boxes=None, masks=None,
# )
# prd_masks, prd_scores, _, _ = predictor.model.sam_mask_decoder(
#     image_embeddings=predictor._features["image_embed"][-1].unsqueeze(0).to(device),
#     image_pe=predictor.model.sam_prompt_encoder.get_dense_pe(),
#     sparse_prompt_embeddings=sparse_embeddings,
#     dense_prompt_embeddings=dense_embeddings,
#     multimask_output=True,
#     repeat_image=False,
#     high_res_features=[feat_level[-1].unsqueeze(0).to(device) for feat_level in predictor._features["high_res_feats"]],
# )
# final_mask = predictor._transforms.postprocess_masks(prd_masks, predictor._orig_hw[-1])
masks, scores, logits = predictor.predict(
    point_coords=input_point,
    point_labels=input_label,
    multimask_output=False,
)
print(masks)

# Save the mask
# cv2.imwrite(mask_path, masks)


[[[0. 0. 0. ... 1. 1. 1.]
  [0. 0. 0. ... 1. 1. 1.]
  [0. 0. 0. ... 1. 1. 1.]
  ...
  [0. 0. 1. ... 1. 1. 1.]
  [0. 0. 0. ... 0. 1. 1.]
  [0. 0. 0. ... 0. 1. 1.]]]


In [5]:
import cv2
import numpy as np
import os

# Assuming masks is a binary numpy array (0s and 1s) with an extra dimension
# Remove the extra dimension
if masks.ndim == 3 and masks.shape[0] == 1:
    masks = masks.squeeze(0)  # Remove the first dimension

# Convert binary mask to uint8 format (0 or 255)
masks = (masks * 255).astype(np.uint8)

# Define the path where the mask should be saved
mask_output_path = r"C:\Users\sinha\segment-anything-2\notebooks\mask.png"

# Check if the directory exists
output_dir = os.path.dirname(mask_output_path)
if not os.path.exists(output_dir):
    print(f"Directory does not exist: {output_dir}")
else:
    # Attempt to save the mask and check the result
    success = cv2.imwrite(mask_output_path, masks)
    if success:
        print(f"Mask successfully saved to {mask_output_path}")
    else:
        print(f"Failed to save mask to {mask_output_path}")


Mask successfully saved to C:\Users\sinha\segment-anything-2\notebooks\mask.png
