### Mask2Former

In [1]:
# Benötigte Bibliotheken importieren

import requests
import os
import torch
from torchvision import transforms
from PIL import Image
from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation

  from .autonotebook import tqdm as notebook_tqdm


### 1. Modellvorbereitung

In [2]:
# Mask2FormerForUniversalSegmentation-Modell sowie Image Processor laden

processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-tiny-ade-semantic")
model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-tiny-ade-semantic")

  return func(*args, **kwargs)


In [3]:
# Bild laden

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)


In [4]:
# Verarbeitung mit dem Prozessor

inputs = processor(images=image, return_tensors="pt")


In [5]:
# Modellvorhersage
with torch.no_grad():
    outputs = model(**inputs)

In [6]:
# class_queries_logits: Enthält die Wahrscheinlichkeiten für die vorhergesagten Klassen.

# masks_queries_logits: Enthält die vorhergesagten Masken für jedes Pixel.

class_queries_logits = outputs.class_queries_logits
masks_queries_logits = outputs.masks_queries_logits

In [7]:
# Nachbearbeitung der Vorhersage
predicted_semantic_map = processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]

: 

In [None]:
import matplotlib.pyplot as plt

# Visualisiere das Originalbild und die Segmentierungskarte
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title("Original Image")

plt.subplot(1, 2, 2)
plt.imshow(predicted_semantic_map, cmap="jet")
plt.title("Predicted Segmentation")
plt.show()


### 2. Datenvorbereitung

In [7]:

# Transformationen für Bildverarbeitung
image_transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformationen für Maskenverarbeitung
mask_transform = transforms.Compose([
    transforms.Resize((512, 512), interpolation=Image.NEAREST),
    transforms.ToTensor()
])

def load_data(image_dir, mask_dir):
    images = sorted([os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.jpg')])
    masks = sorted([os.path.join(mask_dir, f) for f in os.listdir(mask_dir) if f.endswith('.png')])
    return images, masks


  transforms.Resize((512, 512), interpolation=Image.NEAREST),


### 3. Training

In [None]:
# Funktion für das Training des Modells

from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from torch import nn
from tqdm import tqdm

class SegmentationDataset(Dataset):
    def __init__(self, image_paths, mask_paths, image_transform, mask_transform):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.image_transform = image_transform
        self.mask_transform = mask_transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        mask = Image.open(self.mask_paths[idx])
        return self.image_transform(image), self.mask_transform(mask).long()

# Daten laden
train_images, train_masks = load_data("train/images", "train/masks")
val_images, val_masks = load_data("val/images", "val/masks")

train_dataset = SegmentationDataset(train_images, train_masks, image_transform, mask_transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

# Trainingsfunktion
def train_model(model, train_loader, processor, epochs=5, device="cuda"):
    model.to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for images, masks in tqdm(train_loader):
            images = images.to(device)
            masks = masks.squeeze(1).to(device)  # Binarisierte Labels
            optimizer.zero_grad()

            # Vorwärtsdurchlauf
            inputs = processor(images=images, return_tensors="pt").to(device)
            outputs = model(pixel_values=inputs["pixel_values"], labels={"mask_labels": masks})

            loss = outputs.loss
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader)}")
