In [99]:
import os
# if using Apple MPS, fall back to CPU for unsupported ops
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
import numpy as np
import torch
import matplotlib.pyplot as plt
from PIL import Image

In [100]:
# select the device for computation
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(f"using device: {device}")

if device.type == "cuda":
    # use bfloat16 for the entire notebook
    torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
    # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
    if torch.cuda.get_device_properties(0).major >= 8:
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
elif device.type == "mps":
    print(
        "\nSupport for MPS devices is preliminary. SAM 2 is trained with CUDA and might "
        "give numerically different outputs and sometimes degraded performance on MPS. "
        "See e.g. https://github.com/pytorch/pytorch/issues/84936 for a discussion."
    )

using device: cuda


In [101]:
np.random.seed(3)

def show_mask(mask, ax, random_color=False, borders = True):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])
    h, w = mask.shape[-2:]
    mask = mask.astype(np.uint8)
    mask_image =  mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    if borders:
        import cv2
        contours, _ = cv2.findContours(mask,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 
        # Try to smooth contours
        contours = [cv2.approxPolyDP(contour, epsilon=0.01, closed=True) for contour in contours]
        mask_image = cv2.drawContours(mask_image, contours, -1, (1, 1, 1, 0.5), thickness=2) 
    ax.imshow(mask_image)

def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels==1]
    neg_points = coords[labels==0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)   

def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))    

def show_masks(image, masks, scores, point_coords=None, box_coords=None, input_labels=None, borders=True):
    for i, (mask, score) in enumerate(zip(masks, scores)):
        plt.figure(figsize=(10, 10))
        plt.imshow(image)
        show_mask(mask, plt.gca(), borders=borders)
        if point_coords is not None:
            assert input_labels is not None
            show_points(point_coords, input_labels, plt.gca())
        if box_coords is not None:
            # boxes
            show_box(box_coords, plt.gca())
        if len(scores) > 1:
            plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
        plt.axis('off')
        plt.show()

In [102]:
import torch
import torchvision
print("PyTorch version:", torch.__version__)
print("Torchvision version:", torchvision.__version__)
print("CUDA is available:", torch.cuda.is_available())
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import json
from monai.metrics import DiceMetric, MeanIoU, SurfaceDiceMetric, SSIMMetric, GeneralizedDiceScore
#from segment_anything.utils.transforms import ResizeLongestSide
from collections import defaultdict
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split
from monai.losses import GeneralizedDiceLoss, DiceLoss, GeneralizedDiceFocalLoss
from monai.metrics import DiceMetric, GeneralizedDiceScore
from LinearWarmupCosine import LinearWarmupCosineAnnealingLR

PyTorch version: 2.4.0+cu121
Torchvision version: 0.19.0+cu121
CUDA is available: True


In [103]:
def show_anns(anns):
    if len(anns) == 0:
        return
    sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
    ax = plt.gca()
    ax.set_autoscale_on(False)
    polygons = []
    color = []
    for ann in sorted_anns:
        m = ann['segmentation']
        img = np.ones((m.shape[0], m.shape[1], 3))
        color_mask = np.random.random((1, 3)).tolist()[0]
        for i in range(3):
            img[:,:,i] = color_mask[i]
        ax.imshow(np.dstack((img, m*0.35)))

In [104]:
# image_folder = "C:/Users/39327/Desktop/SAM/DataSet/CVC-300/images"
# mask_folder =  "C:/Users/39327/Desktop/SAM/DataSet/CVC-300/masks"
# save_folder = "C:/Users/39327/Desktop/SAM/DataSet/CVC-300/dtatset"
image_folder = "C:/Users/39327/Desktop/test/dataset/DataSet/CVC-ClinicDB/images"
mask_folder = "C:/Users/39327/Desktop/test/dataset/DataSet/CVC-ClinicDB/masks"
save_folder = "C:/Users/39327/Desktop/test"

os.makedirs(save_folder, exist_ok = True)

image_path = []
mask_path = []

for root, dirs, files in os.walk(image_folder, topdown=False): #finds MRI files
    for name in files:
        if name.endswith(".png"):
            apath=os.path.join(root, name)
            image_path.append(apath)
            
for root, dirs, files in os.walk(mask_folder, topdown=False): #finds MRI files
    for name in files:
        if name.endswith(".png"):
            apath=os.path.join(root, name)
            mask_path.append(apath)
            
print(image_path[-1], mask_path[-1])

# with open('D:\Yuheng Li\Segment Anything\kvasir-seg\\kavsir_bboxes.json') as f:
#     labels = json.load(f)

X_train, X_test, y_train, y_test = train_test_split(image_path, mask_path, test_size=0.1, random_state=49)

C:/Users/39327/Desktop/test/dataset/DataSet/CVC-ClinicDB/images\99.png C:/Users/39327/Desktop/test/dataset/DataSet/CVC-ClinicDB/masks\99.png


In [105]:
# import numpy as np
# import torch
# from ultralytics import YOLO
# 
# # Define the function to convert YOLO bounding boxes to the required format
# def extract_bboxes_from_yolo(yolo_data):
#     """
#     Compute bounding boxes from YOLO data.
# 
#     yolo_data: Tensor with shape (num_instances, 4) containing detected bounding boxes.
#                Each bounding box is represented as [x_min, y_min, x_max, y_max].
# 
#     Returns: bbox array [num_instances, (y1, x1, y2, x2)].
#     """
#     num_instances = yolo_data.shape[0]
#     boxes = np.zeros([num_instances, 4], dtype=np.int32)
# 
#     for i in range(num_instances):
#         x_min, y_min, x_max, y_max = yolo_data[i, :4]
#         boxes[i] = np.array([y_min, x_min, y_max, x_max])
#     
#     return boxes
# 
# # Load the model
# model = YOLO("C:/Users/39327/runs/detect/train57/weights/best.pt")
# 
# def extract_bboxes(image, num_instances):
#     """
#     Perform object detection on the image and extract bounding boxes.
# 
#     image: The input image on which to perform object detection.
# 
#     Returns: bbox array [num_instances, (y1, x1, y2, x2)].
#              If no boxes are detected, returns a bounding box that covers the entire image.
#     """
#     results = model(image, device='cuda', conf=0.5)  # Perform inference
#     #print("results", results)
# 
#     if not results:  # If results list is empty
#         height, width = image.shape[:2]
#         return np.array([[0, 0, height, width]], dtype=np.int32)
#     
#     result = results[0]  # Assuming only one result is returned for one image
#     boxes = result.boxes  # Boxes object for bounding box outputs
#     #print("boxes", boxes)
# 
#     # Extract the 'xyxy' attribute
#     xyxy = boxes.xyxy.cpu().numpy()  # Convert tensor to numpy array
# 
#     if xyxy.size == 0:  # No boxes detected
#         height, width = image.shape[:2]
#         return np.array([[0, 0, height, width]], dtype=np.int32)
# 
#     # Convert to the required format
#     extracted_boxes = extract_bboxes_from_yolo(xyxy)
#     
# 
# 
# 
#     print("Extracted boxes:", extracted_boxes)
#     return extracted_boxes


In [106]:
from ultralytics import YOLO

# Load a model
#model = YOLO("yolov8x.yaml").load("yolov8x.pt")
model=YOLO("C:/Users/39327/runs/detect/train10/weights/best.pt")
# Run batched inference on a list of images
def extract_bboxes(image, num_instances):
        
    #print("image address for YOLO model:", image)
    results = model(image,device='cuda',conf=0.2)  # return a list of Results objects
    
    # Process results list
    for result in results:
        boxes = result.boxes  # Boxes object for bounding box outputs
        #print("boxess",boxes)
        import numpy as np
    
        xyxy_numpy = boxes.xyxy.cpu().numpy()
        if xyxy_numpy.size == 0:  # No boxes detected
             height, width = image.shape[:2]
             return np.array([[0, 0, height, width]], dtype=np.int32)
        return xyxy_numpy

In [107]:
class ColonDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        # Load image and mask
        image = Image.open(self.image_paths[idx]).convert("RGB")
        mask = Image.open(self.mask_paths[idx]).convert("L")  # Convert mask to grayscale

        # Convert to numpy array for processing
        image = np.array(image)
        mask = np.array(mask)

        # Extract bounding boxes from the mask
        #bbox_arr = extract_bboxes(image,1)
        bbox_arr= extract_bboxes(image, len(self.image_paths))
        # Apply transformations (if any)
        if self.transform:
            image = self.transform(image)

        # Convert back to tensor
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)  # From (H, W, C) to (C, H, W)
        mask = torch.tensor(mask, dtype=torch.float32).unsqueeze(0)  # Add channel dimension

        return image, mask, bbox_arr


In [108]:
def my_collate(batch):
    images = []
    masks = []
    bboxes = []

    for item in batch:
        images.append(item[0])
        masks.append(item[1])
        bboxes.append(item[2])

    # Convert lists to tensors
    images = torch.stack(images)
    masks = torch.stack(masks)
    
    return images, masks, bboxes


In [109]:
train_dataset = ColonDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn = my_collate)

val_dataset = ColonDataset(X_test, y_test)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=True, collate_fn = my_collate)

In [110]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torch
import numpy as np


In [111]:
import PIL.Image as Image
import numpy as np
import matplotlib.pyplot as plt 
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

checkpoint = "segment-anything-2/checkpoints/sam2_hiera_tiny.pt"
model_cfg = "sam2_hiera_t.yaml"
predictor = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint))
sam2_model = build_sam2(model_cfg,checkpoint, device='cuda')

predictor = SAM2ImagePredictor(sam2_model)
batch_dice = []
for batch in train_dataloader:
    dice = DiceMetric()
    images, masks, bboxes = batch
    
    # Loop through each image, mask, and bounding box in the current batch
    for i in range(len(images)):
        image = images[i]   # Select the i-th image in the batch
        mask = masks[i]     # Select the i-th mask in the batch
        bbox = bboxes[i]    # Select the i-th bounding box in the batch
        image_np = image.permute(1, 2, 0).numpy()
        image = image_np.astype(np.float32)
        bbox=bbox[0]
        image_np_normalized = image/255.0
        predictor.set_image(image_np_normalized)
        masks1, scores, _ = predictor.predict(
                point_coords=None,
                point_labels=None,
                box=bbox,
                #box=None,
                multimask_output=False,
            )

                # Convert masks1 (NumPy array) to a Torch tensor
        
        masks1_tensor = torch.tensor(masks1, dtype=torch.float32)
        masks1_tensor = (masks1_tensor > 0.2).float()
        mask=(mask>0.5).float()
    #    masks1_tensor = masks1_tensor.unsqueeze(0).unsqueeze(0)  # Shape: (1, 1, 500, 574)
        
        # Add batch dimension to the ground truth mask if not already present
     #   if len(mask.shape) == 3:  # Shape: (1, 500, 574)
      #      mask = mask.unsqueeze(0)  # Shape: (1, 1, 500, 574)
    # Assuming mask is a tensor of shape (1, 500, 574)
        # Squeeze the channel dimension


        
        # Assuming `dice` is an object or function used to calculate the Dice coefficient
        dice.reset()
        print("Mother fackerrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr")

        # Compute the Dice coefficient using masks1_tensor and mask
        dice(mask.unsqueeze(0),masks1_tensor.unsqueeze(0))
        final_dice = dice.aggregate().numpy()[0]
        batch_dice.append(final_dice)
        #print(f"Dice Score for image {i}: {final_dice}")
        #show_masks(image_np_normalized, masks1, scores, box_coords=bbox)
    print(f'Mean val dice: {sum(batch_dice) / len(batch_dice)}')
    #print("batch_dice", batch_dice)
    #print("len batch dice", len(batch_dice))
    #print(f"Batch Dice Scores: {batch_dice}")
        

        
        # Example: You could visualize, save, or pass the image, mask, bbox to a model, etc.



0: 480x640 1 polyp, 41.4ms
Speed: 2.0ms preprocess, 41.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 polyp, 34.0ms
Speed: 1.0ms preprocess, 34.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 polyps, 34.1ms
Speed: 1.0ms preprocess, 34.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 polyp, 34.0ms
Speed: 1.0ms preprocess, 34.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 polyp, 32.5ms
Speed: 1.0ms preprocess, 32.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 20.5ms
Speed: 1.0ms preprocess, 20.5ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 polyp, 19.5ms
Speed: 2.0ms preprocess, 19.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 polyp, 19.0ms
Speed: 1.0ms preprocess, 19.0ms inference, 2.0ms postprocess per image at shape (1, 3, 4