In [73]:
import lib.Mask2Former as m2f
import lib.Mask2Former.mask2former as mask2former
import os
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from detectron2.engine import (launch)
from detectron2.config import get_cfg
from detectron2.projects.deeplab import add_deeplab_config
from detectron2.data import build_detection_train_loader
import numpy as np

In [74]:
DATA_SOURCE = "combined"
DATA_LOCATION = "_data"
DATA_DIR = "coco"
os.environ["DETECTRON2_DATASETS"] = os.path.join(DATA_LOCATION, DATA_DIR)

# Convert the dataset to COCO format
The following commands convert the existing PNG mask-based dataset to the coco annotations required for training Mask2Former

In [4]:
!cd {DATA_LOCATION} && python mask_to_coco.py --images {DATA_SOURCE}/val/images/ --masks {DATA_SOURCE}/val/leaf_instances/ --output {DATA_DIR}/annotations/instances_val2017.json --fixed-category-id 58 --fixed-category-name "potted plant" --default-categories
!cd {DATA_LOCATION} && python mask_to_coco.py --images {DATA_SOURCE}/train/images/ --masks {DATA_SOURCE}/train/leaf_instances/ --output {DATA_DIR}/annotations/instances_train2017.json --fixed-category-id 58 --fixed-category-name "potted plant" --default-categories

Spawning pool with 102 workers
Starting Pool
100%|█████████████████████████████████████████| 772/772 [00:24<00:00, 31.39it/s]
Writing to coco/annotations/instances_val2017.json
Spawning pool with 102 workers
Starting Pool
100%|█████████████████████████████████████| 11407/11407 [02:02<00:00, 93.30it/s]
Writing to coco/annotations/instances_train2017.json


In [4]:
!cd {DATA_LOCATION} && mkdir {DATA_DIR}/train2017
!cd {DATA_LOCATION} && cp {DATA_SOURCE}/train/images/* {DATA_DIR}/train2017
!cd {DATA_LOCATION} && mkdir {DATA_DIR}/val2017
!cd {DATA_LOCATION} && cp {DATA_SOURCE}/val/images/* {DATA_DIR}/val2017

mkdir: cannot create directory ‘coco/train2017’: File exists


In [39]:
CONFIG = "lib/Mask2Former/configs/coco/instance-segmentation/swin/maskformer2_swin_base_IN21k_384_bs16_50ep.yaml"
NUM_GPUS = 1
BATCH_SIZE = 8
LEARNING_RATE = 0.001
DATASET_DIR = "_data/combined/train"
IMAGES_DIR_NAME = "images"
IMAGE_DIR = os.path.join(DATASET_DIR, IMAGES_DIR_NAME)
INSTANCES_DIR_NAME = "leaf_instances"
INSTANCES_DIR = os.path.join(DATASET_DIR, INSTANCES_DIR_NAME)

# Custom Data Loader

In [75]:
class LeavesDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.image_files = os.listdir(image_dir)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        image_path = os.path.join(self.image_dir, self.image_files[index])
        label_path = os.path.join(self.label_dir, self.image_files[index])

        image = Image.open(image_path).convert("RGB")
        label = Image.open(label_path).convert("L")

        if self.transform:
            image = self.transform(image)

        # Convert label to tensor
        label = torch.from_numpy(np.array(label))

        # Create instances dict
        instances = {"gt_boxes": [], "gt_classes": [], "gt_masks": []}
        unique_labels = torch.unique(label)
        for obj_class in unique_labels:
            if obj_class > 0:
                mask = label == obj_class
                coords = torch.nonzero(mask)
                xmin, ymin = coords.min(dim=0).values
                xmax, ymax = coords.max(dim=0).values
                instances["gt_boxes"].append([xmin, ymin, xmax, ymax])
                instances["gt_classes"].append(obj_class.item())
                instances["gt_masks"].append(mask)

        instances["gt_boxes"] = torch.tensor(instances["gt_boxes"])
        instances["gt_classes"] = torch.tensor(instances["gt_classes"], dtype=torch.long)
        instances["gt_masks"] = torch.stack(instances["gt_masks"])

        return {
            "image": image,
            "height": image.shape[1],
            "width": image.shape[2],
            "instances": instances,
        }

In [79]:
def collate_fn(batch):
    images = []
    instances = []
    extras = {}

    for item in batch:
        images.append(item["image"])
        
        item_instances = item["instances"]
        item_instances["gt_boxes"] = torch.tensor(item_instances["gt_boxes"])
        item_instances["gt_classes"] = torch.tensor(item_instances["gt_classes"], dtype=torch.long)
        item_instances["gt_masks"] = torch.tensor(item_instances["gt_masks"])
        instances.append(item_instances)
        
        extras["height"] = item["height"]
        extras["width"] = item["width"]

    batched_inputs = [
        {"image": image, "instances": instance, **extras}
        for image, instance in zip(images, instances)
    ]

    return batched_inputs

class LeavesTrainer(m2f.train_net.Trainer):
    @classmethod
    def build_train_loader(cls, _):
        # Define your data transforms
        transform = transforms.Compose([
            transforms.Resize((800, 800)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        # Create the dataset
        dataset = LeavesDataset(IMAGE_DIR, INSTANCES_DIR, transform=transform, )
        
        # Create the DataLoader
        dataloader = build_detection_train_loader(dataset, mapper=None, total_batch_size=8)
        #dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)
        return dataloader

In [80]:
def get_trainer(cfg):
    trainer = LeavesTrainer(cfg)
    #trainer.resume_or_load(resume=args.resume)
    return trainer.train()

In [81]:
cfg = get_cfg()
add_deeplab_config(cfg)
mask2former.add_maskformer2_config(cfg)
cfg.merge_from_file(CONFIG)

trainer = get_trainer(cfg)
next(trainer)

#launch(get_trainer, 1, args=(cfg,))

[32m[07/16 16:45:32 d2.engine.defaults]: [0mModel:
MaskFormer(
  (backbone): D2SwinTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0): BasicLayer(
        (blocks): ModuleList(
          (0): SwinTransformerBlock(
            (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (attn): WindowAttention(
              (qkv): Linear(in_features=128, out_features=384, bias=True)
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=128, out_features=128, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (softmax): Softmax(dim=-1)
            )
            (drop_path): Identity()
            (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (mlp): Mlp(
      

OutOfMemoryError: CUDA out of memory. Tried to allocate 254.00 MiB. GPU 