###1. Start by installing the Personalised Segment Anything Model from Hugging face

In [None]:
!pip install -q git+https://github.com/huggingface/transformers.git
from transformers import AutoProcessor, SamModel
# from transformers import PerSamModel

processor = AutoProcessor.from_pretrained("facebook/sam-vit-huge")
# model = PerSamModel.from_pretrained("facebook/sam-vit-huge")
model = SamModel.from_pretrained("facebook/sam-vit-huge")

# 2. Import Necessary python libraries

In [None]:
from huggingface_hub import hf_hub_download
from PIL import Image
import torch
import torch.nn.functional as F
import numpy as np
from torchvision.transforms.functional import resize, to_pil_image
import matplotlib.pyplot as plt
import cv2

from transformers import AutoProcessor, SamModel
# from transformers import PerSamModel

# 3. You can try mounting your google drive where additional models can be downloaded and imported directly into the collab

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# This is where helper fucntions were created for further simplification of the code

In [None]:
import os
import torch
import numpy as np
from PIL import Image
from sklearn.cluster import KMeans
from huggingface_hub import hf_hub_download
import torch.nn.functional as F
from torchvision.transforms.functional import resize, to_pil_image
import matplotlib.pyplot as plt
import math

# Function to load images from a directory
def load_images_from_directory(directory):
    images = []
    for filename in os.listdir(directory):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            img = Image.open(os.path.join(directory, filename)).convert("RGB")
            if img is not None:
                images.append((filename, img))
    return images

# Function to extract features from an image using spatial average
def get_image_feature(ref_image):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    model.eval()

    # Step 1: Image features encoding
    pixel_values = processor(images=ref_image, return_tensors="pt").pixel_values
    with torch.no_grad():
        ref_feat = model.get_image_embeddings(pixel_values.to(device))
        ref_feat = ref_feat.squeeze().permute(1, 2, 0)

    # Compute the mean across the height and width dimensions
    spatial_average = ref_feat.mean(dim=(0, 1))

    # Reshape to (1, 1, C)
    spatial_average = spatial_average.view(1, 1, -1)

    return spatial_average

# Function to count the number of images in a directory
def count(directory):
    valid_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif'}
    image_count = sum(1 for filename in os.listdir(directory) if filename.lower().endswith(tuple(valid_extensions)))
    return image_count

# Segment images using k-means clustering
def segment_images(image_direc,cluster_direc, images, k):
    # Extract features from all images
    features = [get_image_feature(img).cpu().numpy().flatten() for _, img in images]

    # Perform k-means clustering
    kmeans = KMeans(n_clusters=k, random_state=0).fit(features)
    labels = kmeans.labels_

    # Create directory for each cluster
    for i in range(k):
        os.makedirs(os.path.join(cluster_direc, f'cluster_{i}'), exist_ok=True)

    # Save images to corresponding cluster directory
    # for (filename, img), label in zip(images, labels):
    #     img.save(os.path.join(image_direc, f'cluster_{label}', filename))
    cluster_log = {i: [] for i in range(k)}
    for (filename, img), label in zip(images, labels):
        img.save(os.path.join(cluster_direc, f'cluster_{label}', filename))
        cluster_log[label].append(filename)

    return labels,cluster_log



import shutil

def clusters(cluster_log, image_direc, saliency_direc, output_direc):
    # Iterate over each cluster and its associated filenames
    for cluster, filenames in cluster_log.items():
        # Create the cluster directory in the output directory if it doesn't exist
        cluster_output_dir = os.path.join(output_direc, f'cluster_{cluster}')
        os.makedirs(cluster_output_dir, exist_ok=True)

        for filename in filenames:
            # Construct the image path in the image directory
            # img_path = os.path.join(image_direc, f'cluster_{cluster}', filename)
            img_path = os.path.join(image_direc, filename)
            # Open the image to ensure it exists and is accessible
            try:
                image = Image.open(img_path)
            except FileNotFoundError:
                print(f"Image {filename} not found in {img_path}. Skipping...")
                continue

            # Determine the corresponding saliency map filename
            base_filename = os.path.splitext(filename)[0]
            saliency_filename = base_filename + '.png'
            saliency_path = os.path.join(saliency_direc, saliency_filename)

            # Check if the saliency map exists
            if os.path.exists(saliency_path):
                # Move or copy the saliency map to the corresponding cluster folder in the output directory
                target_path = os.path.join(cluster_output_dir, saliency_filename)
                shutil.copy(saliency_path, target_path)
            else:
                print(f"Saliency map {saliency_filename} not found in {saliency_direc}. Skipping...")




# Example usage:
# clusters(cluster_log, 'path/to/image_directory', 'path/to/saliency_directory', 'path/to/output_directory')


# Main function to execute the workflow
def main(image_directory,cluster_direc, k):
    images = load_images_from_directory(image_directory)
    labels,cluster_log = segment_images(image_directory,cluster_direc, images, k)
    return labels,cluster_log


In [None]:
from huggingface_hub import hf_hub_download
from PIL import Image, ImageDraw, ImageFont
import cv2
import numpy as np
import os
import torch
import torch.nn.functional as F
from torchvision.transforms.functional import resize, to_pil_image
import matplotlib.pyplot as plt
from typing import Tuple


import os
import math
import torch
import numpy as np
import cv2
from PIL import Image
import torch.nn.functional as F
from google.colab.patches import cv2_imshow
from torchvision.transforms import ToTensor, ToPILImage
from skimage.measure import label, regionprops
from torchvision.transforms.functional import resize, to_pil_image

def prepare_mask(image, target_length=1024):
    target_size = get_preprocess_shape(image.shape[0], image.shape[1], target_length)
    mask = np.array(resize(to_pil_image(image), target_size))

    if len(mask.shape) == 2:
        mask = mask[:, :, None]

    input_mask = torch.as_tensor(mask)
    input_mask = input_mask.permute(2, 0, 1).contiguous()[None, :, :, :]
    input_mask = preprocess(input_mask)
    return input_mask

# def point_selection(mask_sim, topk=1):
#     w, h = mask_sim.shape
#     topk_xy = mask_sim.flatten(0).topk(topk)[1]
#     topk_x = (topk_xy // h).unsqueeze(0)
#     topk_y = (topk_xy - topk_x * h)
#     topk_xy = torch.cat((topk_y, topk_x), dim=0).permute(1, 0)
#     topk_label = np.array([1] * topk)
#     topk_xy = topk_xy.cpu().numpy()
#     last_xy = mask_sim.flatten(0).topk(topk, largest=False)[1]
#     last_x = (last_xy // h).unsqueeze(0)
#     last_y = (last_xy - last_x * h)
#     last_xy = torch.cat((last_y, last_x), dim=0).permute(1, 0)
#     last_label = np.array([0] * topk)
#     last_xy = last_xy.cpu().numpy()
#     return topk_xy, topk_label, last_xy, last_label


def point_selection22(sim,original_image):
    attention_similarity = sim.sigmoid_().unsqueeze(0)
    threshold = 0.7
    binary_mask = (attention_similarity > threshold).float()

              # Ensure the mask is 2D
    binary_mask_np = binary_mask.squeeze(0).cpu().numpy().astype(np.uint8)
    # print(f"binary_mask_np shape: {binary_mask_np.shape}, dtype: {binary_mask_np.dtype}")

      #Perform connected component analysis
    labeled_mask, num_labels = label(binary_mask_np, return_num=True)
    # print(f"labeled_mask shape: {labeled_mask.shape}, dtype: {labeled_mask.dtype}, num_labels: {num_labels}")

    #Find the size of the largest connected component
    max_region_area = max(region.area for region in regionprops(labeled_mask))
    size_threshold = max_region_area * 0.1

    #Filter connected components based on size and find centroids
    final_mask = np.zeros_like(labeled_mask)
    centroids = []
    for region in regionprops(labeled_mask):
        if region.area >= size_threshold:  # Only keep regions that are at least 10% the size of the largest region
          centroids.append((region.centroid[0], region.centroid[1]))
          for coords in region.coords:
            final_mask[coords[0], coords[1]] = 1

    if isinstance(original_image, Image.Image) and original_image.size:
        # Resize final mask to original image size
        original_size = original_image.size[::-1]  # PIL Image size is (width, height), so reverse
        print("Original image size:", original_image.size)
        print("final_mask size:", final_mask.size)
        if original_size:
          final_mask_2d = final_mask.reshape((64, 64))
          final_mask_resized = cv2.resize(final_mask_2d, original_size, interpolation=cv2.INTER_NEAREST)

        # Convert centroids to original image scale
          scale_factor_y = original_size[0] / final_mask.shape[0]
          scale_factor_x = original_size[1] / final_mask.shape[1]
          # centroids = [tuple(region.centroid) for region in regionprops(labeled_mask) if region.area >= size_threshold]
          centroids = [(int(y * scale_factor_y), int(x * scale_factor_x)) for y, x in centroids]
          print("Centroid  ", centroids)
          topk_coords = np.array(centroids)[:len(centroids)]
          bottomk_coords = np.array(centroids)[:0]

          topk_labels = np.array([1] * len(centroids))
          bottomk_labels = np.array([0] * 0)
          print("Top K Coordinates: ", topk_coords)
          print("Top K Labels: ", topk_labels)
          print("Bottom K Coordinates: ", bottomk_coords)
          print("Bottom K Labels: ", bottomk_labels)
          return topk_coords, topk_labels, bottomk_coords, bottomk_labels
        else:
          print("Error: Original image has invalid size. Cannot resize mask.")
          return None, None, None, None  # Handle the error as needed

    else:
        print("Error: Invalid original image. Unable to resize mask.")
        return None, None, None, None # or handle the error appropriately

def preprocess(x: torch.Tensor, pixel_mean=[123.675, 116.28, 103.53], pixel_std=[58.395, 57.12, 57.375], img_size=1024) -> torch.Tensor:
    pixel_mean = torch.Tensor(pixel_mean).view(-1, 1, 1)
    pixel_std = torch.Tensor(pixel_std).view(-1, 1, 1)
    x = (x - pixel_mean) / pixel_std
    h, w = x.shape[-2:]
    padh = img_size - h
    padw = img_size - w
    x = F.pad(x, (0, padw, 0, padh))
    return x

def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
    scale = long_side_length * 1.0 / max(oldh, oldw)
    newh, neww = oldh * scale, oldw * scale
    neww = int(neww + 0.5)
    newh = int(newh + 0.5)
    return (newh, neww)


def show_mask(mask, ax, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([255, 0, 0, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# 4. Importing the Segment Anything model for using its decoder after the point prompts are generated


> Add blockquote



In [None]:
if True:
    import torch
    import torchvision
    print("PyTorch version:", torch.__version__)
    print("Torchvision version:", torchvision.__version__)
    print("CUDA is available:", torch.cuda.is_available())
    import sys
    !{sys.executable} -m pip install opencv-python matplotlib
    !{sys.executable} -m pip install 'git+https://github.com/facebookresearch/segment-anything.git'

    !mkdir images
    !wget -P images https://raw.githubusercontent.com/facebookresearch/segment-anything/main/notebooks/images/truck.jpg
    !wget -P images https://raw.githubusercontent.com/facebookresearch/segment-anything/main/notebooks/images/groceries.jpg

    !wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

# 5. Load the SAM model and build necesary fucntions

In [None]:
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamPredictor

sam_checkpoint = "sam_vit_h_4b8939.pth"
model_type = "vit_h"

device = "cuda"

sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)

predictor = SamPredictor(sam)

In [None]:
def show_mask(mask, ax, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels==1]
    neg_points = coords[labels==0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)

def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))

Now we need to create few directories where our output would be stored.

In [None]:
!mkdir  '/content/IMAGES_IN_CLUSTERS/14'
!mkdir  '/content/OUT-PUT-MASK/14'
!mkdir  '/content/SALIENCY_IN_CLUSTERS/14'


mkdir: cannot create directory ‘/content/OUT-PUT-MASK/14’: No such file or directory


In [None]:
import os
import math
import torch
import numpy as np
import cv2
from PIL import Image
import torch.nn.functional as F
from torchvision.transforms import ToTensor, ToPILImage
from skimage.measure import label, regionprops

# Directory setup
image_direct = '/content/drive/MyDrive/SURGEIMAGEfolder/images/018 Agra Taj Mahal-Inde du Nord 2004-Mhln'
saliency_direc = '/content/drive/MyDrive/TOTAL-SALIENCY/18'
!mkdir -p '/content/SALIENCY_IN_CLUSTERS/18'
saliency_map_cluster_direc = '/content/SALIENCY_IN_CLUSTERS/18'
!mkdir -p '/content/IMAGES_IN_CLUSTERS/18'
image_cluster_direc = '/content/IMAGES_IN_CLUSTERS/18'
!mkdir -p '/content/drive/MyDrive/OUT-PUT-MASK(BB)/18'
output_directory = '/content/drive/MyDrive/OUT-PUT-MASK(BB)/18'

k = math.ceil(count(image_direct) / 10)  # Number of clusters

labels, cluster_log = main(image_direct, image_cluster_direc, k)
print(labels)
clusters(cluster_log, image_direct, saliency_direc, saliency_map_cluster_direc)



image_clusters = [d for d in os.listdir(image_cluster_direc) if os.path.isdir(os.path.join(image_cluster_direc, d))]
# List all image cluster folders
for image_cluster in image_clusters:
    # Construct the corresponding saliency map cluster directory
    saliency_map_cluster = os.path.join(saliency_map_cluster_direc, image_cluster)

    # Check if the corresponding saliency map cluster folder exists
    if os.path.isdir(saliency_map_cluster):
        image_cluster_path = os.path.join(image_cluster_direc, image_cluster)

        image_files = sorted([f for f in os.listdir(image_cluster_path) if f.endswith(".jpg") or f.endswith(".png")])
        image_files = [os.path.join(image_cluster_path, f) for f in image_files]  # Include full path

        mask_files = sorted([f for f in os.listdir(saliency_map_cluster) if f.endswith(".jpg") or f.endswith(".png")])
        mask_files = [os.path.join(saliency_map_cluster, f) for f in mask_files]  # Include full path

        for test_image_filename in image_files:
            if test_image_filename.endswith(".jpg") or test_image_filename.endswith(".png"):
                file_path = os.path.join(image_cluster_path, test_image_filename)
                test_image = Image.open(file_path).convert("RGB")

                # Prepare test image
                inputs = processor(images=test_image, return_tensors="pt").to(device)
                pixel_values = inputs.pixel_values
                with torch.no_grad():
                    test_feat = model.get_image_embeddings(pixel_values).squeeze()

                num_channels, height, width = test_feat.shape
                test_feat = test_feat / test_feat.norm(dim=0, keepdim=True)
                test_feat_reshaped = test_feat.reshape(num_channels, height * width)

                # Initialize a list to store similarity matrices
                sims_list = []

                # Iterate over reference images
                for ref_image_filename, ref_mask_filename in zip(image_files, mask_files):
                    ref_image_path = os.path.join(image_cluster_path, ref_image_filename)
                    ref_mask_path = os.path.join(saliency_map_cluster, ref_mask_filename)

                    # Load reference image and mask
                    ref_image = Image.open(ref_image_path).convert("RGB")
                    ref_mask = cv2.imread(ref_mask_path)
                    ref_mask = cv2.cvtColor(ref_mask, cv2.COLOR_BGR2RGB)
                    np.unique(ref_mask)

                    # Precompute reference image features and mask processing
                    pixel_values = processor(images=ref_image, return_tensors="pt").pixel_values
                    with torch.no_grad():
                        ref_feat = model.get_image_embeddings(pixel_values.to(device))
                        ref_feat = ref_feat.squeeze().permute(1, 2, 0)

                    ref_mask = prepare_mask(ref_mask)
                    ref_mask = F.interpolate(ref_mask, size=ref_feat.shape[0:2], mode="bilinear")
                    ref_mask = ref_mask.squeeze()[0]
                    target_feat = ref_feat[ref_mask > 0]
                    target_embedding = target_feat.mean(0).unsqueeze(0)
                    target_feat = target_embedding / target_embedding.norm(dim=-1, keepdim=True)
                    target_embedding = target_embedding.unsqueeze(0)

                    # Compute similarity
                    sim = target_feat @ test_feat_reshaped
                    sim = sim.reshape(1, 1, height, width)
                    sim = F.interpolate(sim, scale_factor=4, mode="bilinear")
                    sim = processor.post_process_masks(sim.unsqueeze(1), original_sizes=inputs["original_sizes"].tolist(), reshaped_input_sizes=inputs["reshaped_input_sizes"].tolist(), binarize=False)
                    sim = sim[0].squeeze()
                    sims_list.append(sim)
                    print(f"Processing: Test Image: {test_image_filename}, Reference Image: {ref_image_filename}, Reference Mask: {ref_mask_filename}")

                # Compute the average similarity matrix
                avg_sim = torch.mean(torch.stack(sims_list), dim=0)

                if avg_sim.max() > 1.0:
                    avg_sim = avg_sim / avg_sim.max()

                sim = avg_sim
                sim = (sim - sim.mean()) / torch.std(sim)
                # sim = F.interpolate(sim.unsqueeze(0).unsqueeze(0), size=(height, width), mode="bilinear")  # Adjusted size here
                attention_similarity = sim.sigmoid_().unsqueeze(0)


                threshold = 0.69
                binary_mask = (attention_similarity > threshold).float()

# Ensure the mask is 2D
                binary_mask_np = binary_mask.squeeze().cpu().numpy().astype(np.uint8)
                print(f"binary_mask_np shape: {binary_mask_np.shape}, dtype: {binary_mask_np.dtype}")

# Perform connected component analysis
                labeled_mask, num_labels = label(binary_mask_np, return_num=True)
                print(f"labeled_mask shape: {labeled_mask.shape}, dtype: {labeled_mask.dtype}, num_labels: {num_labels}")

# Find the size of the largest connected component
                max_region_area = max(region.area for region in regionprops(labeled_mask))
                size_threshold = max_region_area * 0.1

# Filter connected components based on size and find centroids and bounding boxes
                final_mask = np.zeros_like(labeled_mask)
                centroids = []
                bounding_boxes = []
                for region in regionprops(labeled_mask):
                    if region.area >= size_threshold:  # Only keep regions that are at least 10% the size of the largest region
                        centroids.append(region.centroid)
                        bounding_boxes.append(region.bbox)
                        for coords in region.coords:
                            final_mask[coords[0], coords[1]] = 1

# Resize final mask to original image size
                original_size = test_image.size[::-1]  # PIL Image size is (width, height), so reverse
                final_mask_resized = cv2.resize(final_mask, original_size, interpolation=cv2.INTER_NEAREST)

# Convert centroids to original image scale
                scale_factor_y = original_size[0] / final_mask.shape[0]
                scale_factor_x = original_size[1] / final_mask.shape[1]
                centroids_original_scale = [(int(y * scale_factor_y), int(x * scale_factor_x)) for y, x in centroids]

# Print or save the centroids
                topk_xy_i = np.array(centroids_original_scale)[:len(centroids_original_scale)]
                topk_label_i = np.array([1] * len(centroids_original_scale))

                topk_xy = np.concatenate([topk_xy_i], axis=0)
                topk_label = np.concatenate([topk_label_i], axis=0)

                print(topk_xy)

# Convert bounding boxes to original image scale
                bounding_boxes_original_scale = []
                for bbox in bounding_boxes:
                    min_row, min_col, max_row, max_col = bbox
                    x1 = int(min_col * scale_factor_x)
                    y1 = int(min_row * scale_factor_y)
                    x2 = int(max_col * scale_factor_x)
                    y2 = int(max_row * scale_factor_y)
                    bounding_boxes_original_scale.append([x1, y1, x2, y2])

# Convert bounding boxes to tensor
                input_boxes = bounding_boxes_original_scale
                input_boxes = torch.tensor(input_boxes, device=device)  # Add this line


# Print the input boxes
                print(input_boxes)

# Prepare the input points and labels for the predictor
                test_image_np = np.array(test_image)
                predictor.set_image(test_image_np)

                input_point = topk_xy_i
                input_point = input_point[:, ::-1]
                input_label = topk_label_i
# Create a copy of input_point before converting to tensor
                input_point_tensor = torch.tensor(input_point.copy(), device=device)


                transformed_boxes = predictor.transform.apply_boxes_torch(input_boxes, test_image_np.shape[:2])

                masks, _, _ = predictor.predict_torch(
                point_coords=None,  # Use the tensor here
                point_labels=None,
                boxes=transformed_boxes,
                multimask_output=False,
                )

                # masks, scores, logits = predictor.predict(
                #     point_coords=input_point,
                #     point_labels=input_label,
                #     multimask_output=True,
                # )

                def show_points(coords, labels, ax, marker_size=375):
                    pos_points = coords[labels==1]
                    neg_points = coords[labels==0]
                    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
                    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)




                fig, ax = plt.subplots(figsize=(10, 10))
                ax.imshow(test_image)

# Combine masks and display them
                combined_mask = np.zeros(test_image_np.shape[:2], dtype=np.uint8)
                for mask in masks:
                  mask_np = mask.cpu().numpy().squeeze()
                  combined_mask = np.maximum(combined_mask, mask_np)
                  show_mask(mask_np, ax, random_color=True)

# Display boxes
                for box in input_boxes:
                  show_box(box.cpu().numpy(), ax)

# Remove axis


# Save the figure with the masks and boxes
                output_path = os.path.join(output_directory, os.path.basename(test_image_filename))
                plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
                plt.close(fig)
                print(f"Saved visualization to: {output_path}")

# Convert the selected mask to a binary mask
                binary_mask = combined_mask.astype(np.uint8)

# Convert binary mask to PIL image
                binary_mask_pil = Image.fromarray(binary_mask * 255)

# Save the binary mask
                binary_mask_output_path = os.path.join(output_directory, os.path.basename(test_image_filename).replace('.jpg', '.png'))
                binary_mask_pil.save(binary_mask_output_path)
                print(f"Saved binary mask to: {binary_mask_output_path}")

# --------new above --------------










                # all_masks = []
                # all_scores = []

                # for points, labels, box in zip(points_sets, labels_sets, boxes):
                #       masks, scores, _ = predictor.predict(point_coords=points, point_labels=labels, box=box)
                #       all_masks.append(masks)
                #       all_scores.append(scores)
                # plt.figure(figsize=(10, 10))
                # plt.imshow(test_image)
                # show_mask(all_masks.cpu().numpy(), plt.gca(), random_color=True)


#                 def show_points(coords, labels, ax, marker_size=375):
#                     pos_points = coords[labels==1]
#                     neg_points = coords[labels==0]
#                     ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
#                     ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)




#                 plt.figure(figsize=(10, 10))
#                 plt.imshow(test_image)
#                 combined_mask = np.zeros(test_image_np.shape[:2], dtype=np.uint8)
#                 for mask in masks:
#                     mask_np = mask.cpu().numpy().squeeze()
#                     combined_mask = np.maximum(combined_mask, mask_np)
#                     show_mask(mask.cpu().numpy(), plt.gca(), random_color=True)
#                 for box in input_boxes:
#                     show_box(box.cpu().numpy(), plt.gca())
#                 plt.axis('off')
#                 plt.show()

# # Convert the selected mask to a binary mask
#                 binary_mask = combined_mask.astype(np.uint8)

# # Convert binary mask to PIL image
#                 binary_mask_pil = Image.fromarray(binary_mask * 255)

# # Save the binary mask
#                 output_path = os.path.join(output_directory, os.path.basename(test_image_filename))
#                 binary_mask_pil.save(output_path)
#                 print(f"Saved mask to: {output_path}")

                # # Convert to binary mask
                # binary_mask = (masks[2] > 0.5).astype(np.uint8)

                # # Convert binary mask to PIL image
                # binary_mask_pil = Image.fromarray(binary_mask * 255)

                # # Create a draw object

                # # Draw the points on the mask


                # # Save the binary mask
                # output_path = os.path.join(output_directory, os.path.basename(test_image_filename))
                # binary_mask_pil.save(output_path)
                # print(f"Saved mask to: {output_path}")



# After the results is stored you can also check the iou score with its Ground Truth

In [None]:
#IOU SCORE.
import os
import numpy as np
from PIL import Image

def calculate_iou(ground_truth, prediction):
    # Convert binary masks to float (0.0 or 1.0)
    gt = ground_truth.astype(np.float32)
    pred = prediction.astype(np.float32)

    if gt.shape != pred.shape:
      pred = np.array(Image.fromarray(pred).resize(gt.shape[::-1], Image.NEAREST))

    # Add both masks
    combined = gt + pred

    # Calculate number of 2's (intersection) and number of 1's (union - intersection)
    intersection = np.sum(combined == 2)
    union = np.sum(combined >= 1)

    # Calculate IoU score
    iou = intersection / union if union != 0 else 0
    return iou

def find_file_with_extension(directory, basename, extensions):
    for ext in extensions:
        filename = f"{basename}{ext}"
        if os.path.isfile(os.path.join(directory, filename)):
            return filename
    return None

def main2():
    # Directories containing the ground truth and predicted masks
    ground_truth_dir = "/content/drive/MyDrive/ground_truth/040 Monks-LAO PDR 2008-Rolandito"
    predicted_dir = "/content/drive/MyDrive/OUT-PUT-MASK/40"
    extensions = ['.jpg', '.png']

    !mkdir = "/content/drive/MyDrive/IOU-NEW"
    output_file = "/content/drive/MyDrive/IOU-NEW/iou_scores_40.txt"
    # List of mask filenames (assuming filenames are the same in both directories)
    ground_truth_files = [f for f in os.listdir(ground_truth_dir) if os.path.isfile(os.path.join(ground_truth_dir, f)) and (f.endswith('.jpg') or f.endswith('.png'))]
    predicted_files = [f for f in os.listdir(predicted_dir) if os.path.isfile(os.path.join(predicted_dir, f)) and (f.endswith('.jpg') or f.endswith('.png'))]

    ground_truth_basenames = {os.path.splitext(f)[0]: f for f in ground_truth_files}
    predicted_basenames = {os.path.splitext(f)[0]: f for f in predicted_files}

    # Check if both directories contain the same number of masks
    assert len(ground_truth_files) == len(predicted_files), "Mismatch in the number of masks between the two directories"

    # Initialize a list to store individual IoU scores
    iou_scores = []

    # Calculate IoU for each pair of masks
    for basename, gt_filename in ground_truth_basenames.items():
      pred_filename = find_file_with_extension(predicted_dir, basename, extensions)
      if not pred_filename:
            print(f"Corresponding predicted file for {gt_filename} not found.")
            continue
      gt_mask = np.array(Image.open(os.path.join(ground_truth_dir, gt_filename)).convert('L'))
      pred_mask = np.array(Image.open(os.path.join(predicted_dir, pred_filename)).convert('L'))

        # Convert to binary masks (assuming threshold at 128)
      gt_mask = (gt_mask >= 128).astype(np.float32)
      pred_mask = (pred_mask >= 128).astype(np.float32)

        # Calculate IoU score
      iou = calculate_iou(gt_mask, pred_mask)
      iou_scores.append((basename, iou))

    # Calculate average IoU
    average_iou = np.mean([score for _, score in iou_scores])

    # Save individual IoU scores and final average to a text file

    with open(output_file, 'w') as f:
        for filename, score in iou_scores:
            f.write(f"{filename}: {score:.4f}\n")
        f.write(f"\nAverage IoU: {average_iou:.4f}\n")

    print(f"Individual IoU scores and average IoU saved to {output_file}")

if __name__ == "__main__":
    main2()

In [None]:
import shutil
import os

def delete_non_empty_folder(folder_path):
    if os.path.exists(folder_path):
        if os.path.islink(folder_path):  # Check if it's a symbolic link
            os.unlink(folder_path)  # Remove the symbolic link itself
            print(f"Deleted symbolic link: {folder_path}")
        elif os.path.isdir(folder_path):
            shutil.rmtree(folder_path)  # Use rmtree for directories
            print(f"Deleted non-empty folder: {folder_path}")
        else:
            print(f"Path is not a folder or symbolic link: {folder_path}")
    else:
        print(f"Path does not exist: {folder_path}")

# Usage
folder_to_delete = '/content/Output/50_2'
delete_non_empty_folder(folder_to_delete)

In [None]:
import os

def extract_average_iou(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
        # The last line contains the average IoU
        for line in lines:
            if "Average IoU" in line:
                avg_iou = float(line.split(":")[-1].strip())
                return avg_iou
    return None

def main3():
    # Directory containing the IoU score text files
    iou_scores_dir = "/content/drive/MyDrive/IOU_SCORE"  # Replace with your directory path
    iou_files = [f for f in os.listdir(iou_scores_dir) if f.endswith('.txt')]

    # List to store average IoU scores from each file
    average_ious = []

    for iou_file in iou_files:
        iou_file_path = os.path.join(iou_scores_dir, iou_file)
        avg_iou = extract_average_iou(iou_file_path)
        if avg_iou is not None:
            average_ious.append(avg_iou)

    # Calculate the overall average IoU
    if average_ious:
        overall_average_iou = sum(average_ious) / len(average_ious)

        output_file = "final_iou_average.txt"
        with open(output_file, 'w') as f:
            f.write(f"Overall Average IoU: {overall_average_iou:.4f}\n")
            f.write(f"Total number of IoU scores: {len(average_ious)}\n")

        print(f"Results saved to {output_file}")
        print(f"Overall Average IoU: {overall_average_iou:.4f}")
    else:
        print("No valid IoU scores found.")

if __name__ == "__main__":
    main3()


In [None]:
import os

def extract_individual_ious(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
        ious = []
        for line in lines:
            if "Average IoU" in line:
                continue
            if ":" in line:
                iou = float(line.split(":")[-1].strip())
                ious.append(iou)
    return ious

def main4():
    # Directory containing the IoU score text files
    iou_scores_dir = "/content/drive/MyDrive/IOU_SCORE"  # Replace with your directory path
    iou_files = [f for f in os.listdir(iou_scores_dir) if f.endswith('.txt')]

    # List to store individual IoU scores from each file
    all_ious = []

    for iou_file in iou_files:
        iou_file_path = os.path.join(iou_scores_dir, iou_file)
        ious = extract_individual_ious(iou_file_path)
        if ious:
            all_ious.extend(ious)

    # Calculate the overall average IoU
    if all_ious:
        overall_average_iou = sum(all_ious) / len(all_ious)
        print(f"Overall Average IoU: {overall_average_iou:.4f}")
        print(f"Total number of IoU scores: {len(all_ious)}")

        # Save the result to a text file
        output_file = "final_iou_average_per_image.txt"
        with open(output_file, 'w') as f:
            f.write(f"Overall Average IoU: {overall_average_iou:.4f}\n")
            f.write(f"Total number of IoU scores: {len(all_ious)}\n")

        print(f"Results saved to {output_file}")
    else:
        print("No valid IoU scores found.")

if __name__ == "__main__":
    main4()


In [None]:
import os

def extract_average_iou(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
        # The last line contains the average IoU
        for line in lines:
            if "Average IoU" in line:
                avg_iou = float(line.split(":")[-1].strip())
                return avg_iou
    return None

def main():
    # Directory containing the IoU score text files
    iou_scores_dir = "/content/drive/MyDrive/IOU_SCORE"  # Replace with your directory path
    iou_files = [f for f in os.listdir(iou_scores_dir) if f.endswith('.txt')]

    # List to store average IoU scores from each file
    average_ious = []
    num_folders = 0
    num_folders_above_09 = 0
    num_folders_above_08 = 0
    num_folders_above_07 = 0
    num_folders_above_06 = 0

    # Lists to store average IoU for folders with IoU > 0.9, 0.8, 0.7, and 0.6
    avg_iou_above_09 = []
    avg_iou_above_08 = []
    avg_iou_above_07 = []
    avg_iou_above_06 = []

    for iou_file in iou_files:
        iou_file_path = os.path.join(iou_scores_dir, iou_file)
        avg_iou = extract_average_iou(iou_file_path)
        if avg_iou is not None:
            average_ious.append(avg_iou)
            num_folders += 1
            if avg_iou > 0.9:
                num_folders_above_09 += 1
                avg_iou_above_09.append(avg_iou)
            if avg_iou > 0.8:
                num_folders_above_08 += 1
                avg_iou_above_08.append(avg_iou)
            if avg_iou > 0.7:
                num_folders_above_07 += 1
                avg_iou_above_07.append(avg_iou)
            if avg_iou > 0.6:
                num_folders_above_06 += 1
                avg_iou_above_06.append(avg_iou)

    # Calculate the total average IoU
    if average_ious:
        total_average_iou = sum(average_ious) / len(average_ious)
        # Calculate average IoU for folders with IoU > 0.9, 0.8, 0.7, and 0.6
        avg_iou_09 = sum(avg_iou_above_09) / len(avg_iou_above_09) if avg_iou_above_09 else 0
        avg_iou_08 = sum(avg_iou_above_08) / len(avg_iou_above_08) if avg_iou_above_08 else 0
        avg_iou_07 = sum(avg_iou_above_07) / len(avg_iou_above_07) if avg_iou_above_07 else 0
        avg_iou_06 = sum(avg_iou_above_06) / len(avg_iou_above_06) if avg_iou_above_06 else 0

        # Save the result to a text file
        output_file = "folder_iou_statistics2.txt"
        with open(output_file, 'w') as f:
            f.write(f"Total Average IoU: {total_average_iou:.4f}\n")
            f.write(f"Total number of folders: {num_folders}\n")
            f.write(f"Number of folders with IoU > 0.9: {num_folders_above_09}\n")
            f.write(f"Average IoU for folders with IoU > 0.9: {avg_iou_09:.4f}\n")
            f.write(f"Number of folders with IoU > 0.8: {num_folders_above_08}\n")
            f.write(f"Average IoU for folders with IoU > 0.8: {avg_iou_08:.4f}\n")
            f.write(f"Number of folders with IoU > 0.7: {num_folders_above_07}\n")
            f.write(f"Average IoU for folders with IoU > 0.7: {avg_iou_07:.4f}\n")
            f.write(f"Number of folders with IoU > 0.6: {num_folders_above_06}\n")
            f.write(f"Average IoU for folders with IoU > 0.6: {avg_iou_06:.4f}\n")

        print(f"Results saved to {output_file}")
    else:
        print("No valid IoU scores found.")

if __name__ == "__main__":
    main()
