In [1]:
import lib.Mask2Former as m2f
import os
from detectron2.data.build_detection_train_loader
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from detectron2.engine import (launch, default_argument_parser)

In [2]:
DATA_SOURCE = "combined"
DATA_LOCATION = "_data"
DATA_DIR = "coco"
os.environ["DETECTRON2_DATASETS"] = os.path.join(DATA_LOCATION, DATA_DIR)

# Convert the dataset to COCO format
The following commands convert the existing PNG mask-based dataset to the coco annotations required for training Mask2Former

In [None]:
!cd {DATA_LOCATION} && python mask_to_coco.py --images {DATA_SOURCE}/val/images/ --masks {DATA_SOURCE}/val/leaf_instances/ --output {DATA_DIR}/annotations/instances_val2017.json --fixed-category-id 58 --fixed-category-name "potted plant"
!cd {DATA_LOCATION} && python mask_to_coco.py --images {DATA_SOURCE}/train/images/ --masks {DATA_SOURCE}/train/leaf_instances/ --output {DATA_DIR}/annotations/instances_train2017.json --fixed-category-id 58 --fixed-category-name "potted plant"

  File "/home/stefan.steinheber/bachelor/processing/leaf_segmentation/_data/mask_to_coco.py", line 141
    convert_masks_to_coco(images, masks, output, pool_size=pool_size, category=(fixed_category_id, fixed_category_name), default_categories)
                                                                                                                                           ^
SyntaxError: positional argument follows keyword argument
  File "/home/stefan.steinheber/bachelor/processing/leaf_segmentation/_data/mask_to_coco.py", line 141
    convert_masks_to_coco(images, masks, output, pool_size=pool_size, category=(fixed_category_id, fixed_category_name), default_categories)
                                                                                                                                           ^
SyntaxError: positional argument follows keyword argument


In [4]:
!cd {DATA_LOCATION} && mkdir {DATA_DIR}/train2017
!cd {DATA_LOCATION} && cp {DATA_SOURCE}/train/images/* {DATA_DIR}/train2017
!cd {DATA_LOCATION} && mkdir {DATA_DIR}/val2017
!cd {DATA_LOCATION} && cp {DATA_SOURCE}/val/images/* {DATA_DIR}/val2017

mkdir: cannot create directory ‘coco/train2017’: File exists


In [10]:
CONFIG = "configs/coco/instance-segmentation/swin/maskformer2_swin_base_IN21k_384_bs16_50ep.yaml"
NUM_GPUS = 1
BATCH_SIZE = 8
LEARNING_RATE = 0.001
DATASET_DIR = "_data/combined"
IMAGES_DIR_NAME = "images"
IMAGE_DIR = os.path.join(DATASET_DIR, IMAGES_DIR_NAME)
INSTANCES_DIR_NAME = "leaf_instances"
INSTANCES_DIR = os.path.join(DATASET_DIR, INSTANCES_DIR_NAME)

# Custom Data Loader

In [None]:
class LeavesDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.image_files = os.listdir(image_dir)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        image_path = os.path.join(self.image_dir, self.image_files[index])
        label_path = os.path.join(self.label_dir, self.image_files[index])

        image = Image.open(image_path).convert("RGB")
        label = Image.open(label_path).convert("L")

        if self.transform:
            image = self.transform(image)

        # Convert label to tensor
        label = torch.from_numpy(np.array(label))

        # Create instances dict
        instances = {"gt_boxes": [], "gt_classes": [], "gt_masks": []}
        unique_labels = torch.unique(label)
        for obj_class in unique_labels:
            if obj_class > 0:
                mask = label == obj_class
                coords = torch.nonzero(mask)
                xmin, ymin = coords.min(dim=0).values
                xmax, ymax = coords.max(dim=0).values
                instances["gt_boxes"].append([xmin, ymin, xmax, ymax])
                instances["gt_classes"].append(obj_class.item())
                instances["gt_masks"].append(mask)

        instances["gt_boxes"] = torch.tensor(instances["gt_boxes"])
        instances["gt_classes"] = torch.tensor(instances["gt_classes"], dtype=torch.long)
        instances["gt_masks"] = torch.stack(instances["gt_masks"])

        return {
            "image": image,
            "height": image.shape[1],
            "width": image.shape[2],
            "instances": instances,
        }

In [None]:
class LeavesTrainer(m2f.train_net.Trainer):
    @classmethod
    def build_train_loader(cls):
        # Define your data transforms
        transform = transforms.Compose([
            transforms.Resize((800, 800)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        # Create the dataset
        dataset = LeavesDataset(IMAGE_DIR, INSTANCES_DIR, transform=transform)

        # Create the DataLoader
        dataloader = DataLoader(dataset, batch_size=2, shuffle=True)
        return dataloader

In [None]:
def get_trainer(args):
    trainer = LeavesTrainer()
    trainer.resume_or_load(resume=args.resume)
    return trainer.train()

In [None]:
launch(get_trainer, 1)