In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

#libs
import matplotlib.pyplot as plt
from matplotlib import colormaps
import numpy as np
import os
import gc

import torch 
from torch.utils.data import DataLoader
from torch.utils.cpp_extension import CUDA_HOME

import torchvision
from torchvision.transforms.functional import pil_to_tensor
from torchmetrics import JaccardIndex

#our classes
import utils #contains sam_utils, visual_utils, and other utility functions
from datasets.dataset_loading import CocoLoader 

#sam
from segment_anything import SamPredictor, sam_model_registry, SamAutomaticMaskGenerator
from segment_anything.utils.transforms import ResizeLongestSide

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# CUDA tests and setup

os.environ["CUDA_VISIBLE_DEVICES"] = "1"  # might not be viable, check again!
torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("CUDA available:" + str(torch.cuda.is_available()))
print("CUDA_HOME:" + str(CUDA_HOME))

available_gpus = [torch.cuda.device(i) for i in range(torch.cuda.device_count())]
print(available_gpus)
print(torch.cuda.device_count())

CUDA available:True
CUDA_HOME:/home.stud/svobo114/.conda/envs/detect_env_clone
[<torch.cuda.device object at 0x7f9f98e1d750>]
1


In [3]:
coco = CocoLoader()
transforms = None
data_train, api = coco.load_train(transformations=transforms)
print(len(data_train))
# len 2014 = 82783
# len 2017 = 118287

loading annotations into memory...
Done (t=10.01s)
creating index...
index created!
118287


In [4]:
predictor, sam = utils.prepare_sam("cuda", model="b")
resize_transform = ResizeLongestSide(sam.image_encoder.img_size)

  state_dict = torch.load(f)


In [46]:
# Set batch size and number of workers
batch_size = 7
num_workers = 4
shuffle = True

# Clear CUDA cache
with torch.no_grad():
    torch.cuda.empty_cache()
gc.collect()

# Create DataLoader from the training dataset
data_loader = DataLoader(
    data_train,
    batch_size=batch_size,
    shuffle=shuffle,
    num_workers=num_workers,
    collate_fn=lambda x: tuple(zip(*x)),
)

# large SAM-1 for 20 GB GPU with cca 7 boxes per image      -> batch<=4, (maybe 5 if lucky)
# base SAM-1 for 20 GB  , 7 boxes                           -> batch<=7 

In [47]:
debug_visuals = False
batch_max = 10 #7200*7=50400 images cca 1/2 of 2017 dataset

# CUDA cleanup before running
with torch.no_grad():
    torch.cuda.empty_cache()
gc.collect()
torch.cuda.reset_peak_memory_stats()

dataset_IoU = JaccardIndex(task="binary")  # bg x mask

# run batches
for i, batch in enumerate(data_loader):
    print("Batch: " + str(i))
    images_pil = list(batch[0])
    metadata = list(batch[1])

    # separate GT for metrics
    gt_boxes = []
    gt_masks = []
    images_to_process = []
    for j in range(len(images_pil)):
        masks_img, boxes_img = utils.coco_masks_boxes(
            metadata[j], api
        )  # load boxes x0,y0,x1,y1
        if len(boxes_img) == 0:
            continue

        boxes_img = torch.Tensor(boxes_img)  # change format and to tensor
        masks_img = masks_img
        gt_boxes.append(boxes_img)
        gt_masks.append(masks_img)
        images_to_process.append(images_pil[j])

    if len(gt_masks) == 0:
        print("no masks in batch")
        continue  # next batch alltogether


    # TODO for now using GT boxes, later detection module HERE
    inferrence_boxes = gt_boxes

    # prepare input for batch
    sam_batched_inputs = []
    for j in range(len(images_to_process)):
        img = np.array(images_to_process[j])
        dict_img = {  # written according to official sam notebook predictor.ipynb
            "image": utils.prepare_image_for_batch(img, resize_transform, sam.device),
            "boxes": resize_transform.apply_boxes_torch(
                inferrence_boxes[j].to(sam.device), img.shape[:2]
            ),
            "original_size": img.shape[:2],
        }
        if len(inferrence_boxes[j]) > 0:
            sam_batched_inputs.append(dict_img)

    # run inference
    batched_output = sam(sam_batched_inputs, multimask_output=True)

    # Take best masks in each image
    for j, dict_output in enumerate(
        batched_output
    ):  # dict_keys(['masks', 'iou_predictions', 'low_res_logits'])
        pred_quality = dict_output["iou_predictions"]
        best = np.argmax(pred_quality.cpu(), axis=1)

        arange = torch.arange(best.shape[0])
        best_masks = dict_output["masks"][arange, best]  # take best mask for each box

        # visualize if needed?
        if debug_visuals:
            plt.imshow(images_pil[j])
            utils.plot_box(box=gt_boxes[j][0], ax=plt.gca())
            plt.axis("off")
            plt.show()

            plt.imshow(best_masks.cpu().numpy()[0])
            plt.axis("off")
            plt.show()

            plt.imshow(gt_masks[j][0])
            plt.axis("off")
            plt.show()

        # calculate metric over one
        if len(gt_masks[j]) == 0:
            print("no masks! but " + str(len(gt_boxes[j])) + " boxes")

        dataset_IoU.update(best_masks.cpu(), torch.Tensor(gt_masks[j]))  # both on cpu?

    print(
        "Max GB allocated: "
        + str(torch.cuda.max_memory_allocated() // 1000000000)  # full GB
        + "."
        + str(
            (torch.cuda.max_memory_allocated() % 1000_000_000) // 1_000_000
        )  # 3 digits
    )
    if i >= batch_max:  # just few batches for now, print final IoU
        print("Mean IoU: " + str(dataset_IoU.compute()))
        break


# CUDA cleanup after running
with torch.no_grad():
    gc.collect()

Batch: 0
Max GB allocated: 18.353
Batch: 1
Max GB allocated: 18.353
Batch: 2
Max GB allocated: 18.376
Batch: 3
Max GB allocated: 18.376
Batch: 4
Max GB allocated: 18.432
Batch: 5
Max GB allocated: 18.432
Batch: 6
Max GB allocated: 18.432
Batch: 7
Max GB allocated: 18.432
Batch: 8
Max GB allocated: 18.432
Batch: 9
Max GB allocated: 18.432
Batch: 10
Max GB allocated: 18.432
Mean IoU: tensor(0.7451)


In [43]:
print(1 / 0)

ZeroDivisionError: division by zero