In [None]:
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py

In [1]:
!python -m pip show torch

Name: torch
Version: 1.8.0
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: c:\users\m.aboelmagd\.conda\envs\yolov8_segmentation\lib\site-packages
Requires: numpy, typing_extensions
Required-by: torchaudio, torchvision


In [None]:
!unrar x /content/drive/MyDrive/DEV_000F3102E45A_22_October_2023_10_44_29_jpg.rar

In [None]:
!pip install numpy==1.23.5

In [1]:
import torch
torch.cuda.is_available()

True

In [2]:
import json
import os
import numpy as np
import pandas as pd
import torch
import torchvision
from PIL import Image
from torch.utils.data import Dataset
from shapely.geometry import Polygon
from torchvision.io import read_image
from torchvision.ops.boxes import masks_to_boxes
from torchvision import tv_tensors
from torchvision.transforms.v2 import functional as F
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

In [3]:
from detection import utils,transforms,coco_eval,coco_utils
from detection.engine import train_one_epoch, evaluate

In [4]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root, "masks"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "masks"))))
    def __getitem__(self, idx):
        # load images and masks
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        mask_path = os.path.join(self.root, "masks", self.masks[idx])
        img = read_image(img_path)
        mask = read_image(mask_path)
        # instances are encoded as different colors
        obj_ids = torch.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]
        num_objs = len(obj_ids)

        # split the color-encoded mask into a set
        # of binary masks
        masks = (mask == obj_ids[:, None, None]).to(dtype=torch.uint8)

        # get bounding box coordinates for each mask
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            if xmin == xmax or ymin == ymax:
                continue
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)

        image_id = idx
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Wrap sample and targets into torchvision tv_tensors:
        img = tv_tensors.Image(img)

        target = {}
        target["boxes"] = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=F.get_size(img))
        target["masks"] = tv_tensors.Mask(masks)
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [5]:
from torchvision.transforms import v2 as T
def get_transform(train):
    transforms = []
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    transforms.append(T.ToDtype(torch.float, scale=True))
    transforms.append(T.ToPureTensor())
    return T.Compose(transforms)

In [6]:
def get_model_instance_segmentation(num_classes:int):
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT")
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_features_mask,
        hidden_layer,
        num_classes
    )

    return model

In [7]:
model = get_model_instance_segmentation(num_classes=2)

In [8]:
root_dir = r"D:\graval detection project\datasets\unperpared data\images under water\DEV_000F3102E45A_22_October_2023_10_44_29_jpg"
transforms = get_transform(train=True)
dataset = CustomDataset(
        root=root_dir,
        transforms=transforms)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=1e-2, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=5,
                                                   gamma=0.1)
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    collate_fn=utils.collate_fn,
    pin_memory=False
)
num_epochs = 10
device = torch.device("cuda")
print(device)
model.to(device)
for epoch in range(num_epochs):
        run=train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=100)
        lr_scheduler.step()
        evaluate(model, data_loader, device=device)

cuda
Epoch: [0]  [  0/107]  eta: 0:04:59  lr: 0.000104  loss: 3.5689 (3.5689)  loss_classifier: 0.5987 (0.5987)  loss_box_reg: 0.3817 (0.3817)  loss_mask: 1.3363 (1.3363)  loss_objectness: 1.1008 (1.1008)  loss_rpn_box_reg: 0.1513 (0.1513)  time: 2.7987  data: 0.1758  max mem: 2118
Epoch: [0]  [100/107]  eta: 0:00:05  lr: 0.009529  loss: 0.3449 (0.6084)  loss_classifier: 0.0849 (0.1563)  loss_box_reg: 0.1637 (0.2107)  loss_mask: 0.0346 (0.0957)  loss_objectness: 0.0289 (0.0934)  loss_rpn_box_reg: 0.0148 (0.0522)  time: 0.7950  data: 0.1849  max mem: 2291
Epoch: [0]  [106/107]  eta: 0:00:00  lr: 0.010000  loss: 0.3962 (0.5980)  loss_classifier: 0.0827 (0.1535)  loss_box_reg: 0.1874 (0.2098)  loss_mask: 0.0346 (0.0923)  loss_objectness: 0.0394 (0.0924)  loss_rpn_box_reg: 0.0114 (0.0501)  time: 0.7918  data: 0.1833  max mem: 2291
Epoch: [0] Total time: 0:01:26 (0.8073 s / it)
creating index...
index created!
Test:  [  0/107]  eta: 0:01:54  model_time: 0.6925 (0.6925)  evaluator_time: 0.15

In [10]:
import matplotlib.pyplot as plt
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
test=r"D:\graval detection project\datasets\unperpared data\images under water\images\133_left_2023_10_22_10_44_52.jpg"
image = read_image(test)
eval_transform = get_transform(train=False)

model.eval()
with torch.no_grad():
    x = eval_transform(image)
    # convert RGBA -> RGB and move to device
    x = x[:3, ...].to(device)
    predictions = model([x, ])
    pred = predictions[0]


image = (255.0 * (image - image.min()) / (image.max() - image.min())).to(torch.uint8)
image = image[:3, ...]
pred_labels = [f"stone: {score:.3f}" for label, score in zip(pred["labels"], pred["scores"])]
pred_boxes = pred["boxes"]
output_image = draw_bounding_boxes(image, pred_boxes, pred_labels, colors="red")

masks = (pred["masks"] >=0.1).squeeze(1)
output_image = draw_segmentation_masks(output_image, masks, alpha=0.2, colors="green")


plt.figure(figsize=(12, 12))
plt.imshow(output_image.permute(1, 2, 0))

RuntimeError: [Errno 2] No such file or directory: 'D:\graval detection project\datasets\unperpared data\images under water\images\133_left_2023_10_22_10_44_52.jpg'

In [None]:
print(pred["boxes"])

tensor([[399.5537, 292.3679, 778.7164, 510.3171],
        [412.6888, 303.2516, 666.5576, 456.2682],
        [447.0803, 324.2172, 720.3475, 389.8848],
        [211.7322, 293.5160, 783.3342, 692.0176],
        [388.9870, 333.2180, 586.0803, 473.3236],
        [383.1368, 392.2585, 712.9292, 470.8636],
        [430.7133, 360.9658, 633.7264, 421.0163],
        [501.2540, 311.2120, 640.8521, 429.0163],
        [ 29.7483, 311.2657, 721.9089, 861.6812]], device='cuda:0')
