In [1]:
pip install -q "pytorch-lightning<2.0.0"

[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
pip install -q transformers datasets roboflow

[0mNote: you may need to restart the kernel to use updated packages.


In [4]:
pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3
[0mNote: you may need to restart the kernel to use updated packages.


In [20]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import numpy as np
import random
import evaluate

metric = evaluate.load("accuracy")

In [21]:
import roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="rWhzxDa8oLFF7F6zw3cG")
project = rf.workspace("cd-pq7yy").project("building-defects-xpjmz")
version = project.version(5)
dataset = version.download("png-mask-semantic")
                

loading Roboflow workspace...
loading Roboflow project...


In [22]:
class SemanticSegmentationDataset(Dataset):
    """Image (semantic) segmentation dataset."""

    def __init__(self, root_dir, feature_extractor):
        """
        Args:
            root_dir (string): Root directory of the dataset containing the images + annotations.
            feature_extractor (SegFormerFeatureExtractor): feature extractor to prepare images + segmentation maps.
        """
        self.root_dir = root_dir
        self.feature_extractor = feature_extractor
        
        # Load class mapping from CSV file (e.g., _classes.csv)
        self.classes_csv_file = os.path.join(self.root_dir, "_classes.csv")
        with open(self.classes_csv_file, 'r') as fid:
            data = [l.split(',') for i, l in enumerate(fid) if i != 0]
        self.id2label = {x[0]: x[1] for x in data}
        
        image_file_names = [f for f in os.listdir(self.root_dir) if '.jpg' in f]
        mask_file_names = [f for f in os.listdir(self.root_dir) if '.png' in f]
        
        self.images = sorted(image_file_names)
        self.masks = sorted(mask_file_names)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.open(os.path.join(self.root_dir, self.images[idx]))
        segmentation_map = Image.open(os.path.join(self.root_dir, self.masks[idx]))

        # Convert segmentation map to numpy array (without ignoring any labels)
        segmentation_map = np.array(segmentation_map)

        # Apply feature extractor to both image and segmentation map
        encoded_inputs = self.feature_extractor(image, segmentation_map, return_tensors="pt")

        # Remove batch dimension
        for k, v in encoded_inputs.items():
            encoded_inputs[k].squeeze_()

        return encoded_inputs
    

In [23]:
import torch
import torch.nn.functional as F

class SegformerFinetuner(pl.LightningModule):
    
    def __init__(self, id2label, train_dataloader=None, val_dataloader=None, test_dataloader=None, metrics_interval=100):
        super(SegformerFinetuner, self).__init__()
        self.id2label = id2label
        self.metrics_interval = metrics_interval
        self.train_dl = train_dataloader
        self.val_dl = val_dataloader
        self.test_dl = test_dataloader
        
        self.num_classes = len(id2label.keys())
        self.label2id = {v:k for k,v in self.id2label.items()}
        
        # SegFormer 모델 로드
        self.model = SegformerForSemanticSegmentation.from_pretrained(
            "nvidia/segformer-b5-finetuned-cityscapes-1024-1024", 
            return_dict=False, 
            num_labels=self.num_classes,
            id2label=self.id2label,
            label2id=self.label2id,
            ignore_mismatched_sizes=True,
        )
        
        # 클래스 가중치 (4번 클래스에 높은 가중치 설정)
        self.class_weights = torch.ones(self.num_classes)  # 기본 가중치는 모두 1
        self.class_weights[4] = 10.0  # 4번 클래스에 가중치 10을 적용
        
        self.train_mean_iou = load_metric("mean_iou")
        self.val_mean_iou = load_metric("mean_iou")
        self.test_mean_iou = load_metric("mean_iou")
        
    def forward(self, images, masks):
        outputs = self.model(pixel_values=images, labels=masks)
        return outputs
    
    def training_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        outputs = self(images, masks)
        loss, logits = outputs[0], outputs[1]
        
        # Logits 업샘플링하여 원본 크기와 맞추기
        upsampled_logits = nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )

        predicted = upsampled_logits.argmax(dim=1)

        # 클래스 가중치를 적용한 cross-entropy 손실 계산
        loss = F.cross_entropy(upsampled_logits, masks, weight=self.class_weights.to(self.device))

        # IoU 계산
        self.train_mean_iou.add_batch(
            predictions=predicted.detach().cpu().numpy(), 
            references=masks.detach().cpu().numpy()
        )
        
        if batch_nb % self.metrics_interval == 0:
            metrics = self.train_mean_iou.compute(
                num_labels=self.num_classes, 
                ignore_index=255, 
                reduce_labels=False,
            )
            metrics = {'loss': loss, "mean_iou": metrics["mean_iou"], "mean_accuracy": metrics["mean_accuracy"]}
            
            for k,v in metrics.items():
                self.log(k, v)
            
            return metrics
        else:
            return {'loss': loss}
    
    def validation_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        outputs = self(images, masks)
        loss, logits = outputs[0], outputs[1]
        
        upsampled_logits = nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted = upsampled_logits.argmax(dim=1)

        # Validation에서도 가중치를 적용한 손실 계산
        loss = F.cross_entropy(upsampled_logits, masks, weight=self.class_weights.to(self.device))

        # IoU 계산
        self.val_mean_iou.add_batch(
            predictions=predicted.detach().cpu().numpy(), 
            references=masks.detach().cpu().numpy()
        )
        
        return {'val_loss': loss}

    def validation_epoch_end(self, outputs):
        metrics = self.val_mean_iou.compute(
              num_labels=self.num_classes, 
              ignore_index=255, 
              reduce_labels=False,
          )
        
        avg_val_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        val_mean_iou = metrics["mean_iou"]
        val_mean_accuracy = metrics["mean_accuracy"]
        
        metrics = {"val_loss": avg_val_loss, "val_mean_iou": val_mean_iou, "val_mean_accuracy": val_mean_accuracy}
        for k,v in metrics.items():
            self.log(k, v)

        return metrics
    
    def test_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        outputs = self(images, masks)
        loss, logits = outputs[0], outputs[1]
        
        upsampled_logits = nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted = upsampled_logits.argmax(dim=1)

        # Test에서도 가중치를 적용한 손실 계산
        loss = F.cross_entropy(upsampled_logits, masks, weight=self.class_weights.to(self.device))

        # IoU 계산
        self.test_mean_iou.add_batch(
            predictions=predicted.detach().cpu().numpy(), 
            references=masks.detach().cpu().numpy()
        )
            
        return {'test_loss': loss}
    
    def test_epoch_end(self, outputs):
        metrics = self.test_mean_iou.compute(
              num_labels=self.num_classes, 
              ignore_index=255, 
              reduce_labels=False,
          )
       
        avg_test_loss = torch.stack([x["test_loss"] for x in outputs]).mean()
        test_mean_iou = metrics["mean_iou"]
        test_mean_accuracy = metrics["mean_accuracy"]

        metrics = {"test_loss": avg_test_loss, "test_mean_iou": test_mean_iou, "test_mean_accuracy": test_mean_accuracy}
        for k,v in metrics.items():
            self.log(k,v)
        
        return metrics
    
    def configure_optimizers(self):
        return torch.optim.Adam([p for p in self.parameters() if p.requires_grad], lr=2e-05, eps=1e-08)
    
    def train_dataloader(self):
        return self.train_dl
    
    def val_dataloader(self):
        return self.val_dl
    
    def test_dataloader(self):
        return self.test_dl


In [24]:
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from transformers import SegformerForSemanticSegmentation

# IoU 계산 함수
def compute_iou(predictions, targets, num_classes):
    ious = []
    for cls in range(num_classes):
        pred_inds = predictions == cls
        target_inds = targets == cls
        intersection = torch.sum(pred_inds & target_inds)
        union = torch.sum(pred_inds | target_inds)
        if union == 0:
            ious.append(float('nan'))  # NaN 대신 기본값을 설정 가능
        else:
            ious.append(float(intersection) / float(union))
    return torch.tensor(ious).nanmean()  # NaN 제외 평균 IoU

# SegFormer fine-tuning 클래스
class SegformerFinetuner(pl.LightningModule):
    def __init__(self, id2label, train_dataloader=None, val_dataloader=None, test_dataloader=None, metrics_interval=100):
        super(SegformerFinetuner, self).__init__()
        self.id2label = id2label
        self.metrics_interval = metrics_interval
        self.train_dl = train_dataloader
        self.val_dl = val_dataloader
        self.test_dl = test_dataloader
        
        self.num_classes = len(id2label.keys())
        self.label2id = {v: k for k, v in self.id2label.items()}
        
        # Pretrained SegFormer 모델 로드
        self.model = SegformerForSemanticSegmentation.from_pretrained(
            "nvidia/segformer-b5-finetuned-cityscapes-1024-1024",
            num_labels=self.num_classes,
            id2label=self.id2label,
            label2id=self.label2id,
            ignore_mismatched_sizes=True
        )
        
        # 클래스 가중치 (4번 클래스에 가중치 부여)
        self.class_weights = torch.ones(self.num_classes)
        self.class_weights[4] = 10.0  # 클래스 4에 높은 가중치 부여

    def forward(self, images, masks):
        outputs = self.model(pixel_values=images, labels=masks)
        return outputs

    def training_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        loss, logits = outputs[0], outputs[1]

        # Logits upsampling to match mask size
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted = upsampled_logits.argmax(dim=1)

        # 가중치를 적용한 cross-entropy 손실 계산
        loss = F.cross_entropy(upsampled_logits, masks, weight=self.class_weights.to(self.device))
        
        # IoU 계산
        mean_iou = compute_iou(predicted, masks, self.num_classes)
        
        metrics = {'loss': loss, 'mean_iou': mean_iou}
        
        if batch_nb % self.metrics_interval == 0:
            for k, v in metrics.items():
                self.log(k, v)

        return metrics

    def validation_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        loss, logits = outputs[0], outputs[1]

        # Logits upsampling to match mask size
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted = upsampled_logits.argmax(dim=1)
        
        # Validation에서도 가중치 적용
        loss = F.cross_entropy(upsampled_logits, masks, weight=self.class_weights.to(self.device))
        
        # IoU 계산
        mean_iou = compute_iou(predicted, masks, self.num_classes)
        
        return {'val_loss': loss, 'val_mean_iou': mean_iou}

    def validation_epoch_end(self, outputs):
        avg_val_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        avg_val_mean_iou = torch.stack([x["val_mean_iou"] for x in outputs]).mean()

        metrics = {"val_loss": avg_val_loss, "val_mean_iou": avg_val_mean_iou}
        for k, v in metrics.items():
            self.log(k, v)

        return metrics

    def test_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        loss, logits = outputs[0], outputs[1]

        # Logits upsampling to match mask size
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted = upsampled_logits.argmax(dim=1)

        # Test에서도 가중치 적용
        loss = F.cross_entropy(upsampled_logits, masks, weight=self.class_weights.to(self.device))
        
        # IoU 계산
        mean_iou = compute_iou(predicted, masks, self.num_classes)
        
        return {'test_loss': loss, 'test_mean_iou': mean_iou}

    def test_epoch_end(self, outputs):
        avg_test_loss = torch.stack([x["test_loss"] for x in outputs]).mean()
        avg_test_mean_iou = torch.stack([x["test_mean_iou"] for x in outputs]).mean()

        metrics = {"test_loss": avg_test_loss, "test_mean_iou": avg_test_mean_iou}
        for k, v in metrics.items():
            self.log(k, v)

        return metrics

    def configure_optimizers(self):
        return torch.optim.Adam([p for p in self.parameters() if p.requires_grad], lr=2e-05, eps=1e-08)

    def train_dataloader(self):
        return self.train_dl

    def val_dataloader(self):
        return self.val_dl

    def test_dataloader(self):
        return self.test_dl


In [None]:
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from transformers import SegformerForSemanticSegmentation

# IoU 계산 함수
def compute_iou(predictions, targets, num_classes):
    ious = []
    for cls in range(num_classes):
        pred_inds = predictions == cls
        target_inds = targets == cls
        intersection = torch.sum(pred_inds & target_inds)
        union = torch.sum(pred_inds | target_inds)
        if union == 0:
            ious.append(float('nan'))  # NaN 대신 기본값을 설정 가능
        else:
            ious.append(float(intersection) / float(union))
    return torch.tensor(ious).nanmean()  # NaN 제외 평균 IoU

# SegFormer fine-tuning 클래스
class SegformerFinetuner(pl.LightningModule):
    def __init__(self, id2label, train_dataloader=None, val_dataloader=None, test_dataloader=None, metrics_interval=100):
        super(SegformerFinetuner, self).__init__()
        self.id2label = id2label
        self.metrics_interval = metrics_interval
        self.train_dl = train_dataloader
        self.val_dl = val_dataloader
        self.test_dl = test_dataloader
        
        self.num_classes = len(id2label.keys())
        self.label2id = {v: k for k, v in self.id2label.items()}
        
        # Pretrained SegFormer 모델 로드
        self.model = SegformerForSemanticSegmentation.from_pretrained(
            "nvidia/segformer-b5-finetuned-cityscapes-1024-1024",
            num_labels=self.num_classes,
            id2label=self.id2label,
            label2id=self.label2id,
            ignore_mismatched_sizes=True
        )
        
        # 클래스 가중치 (4번 클래스에 가중치 부여)
        self.class_weights = torch.ones(self.num_classes)
        self.class_weights[4] = 10.0  # 클래스 4에 높은 가중치 부여

    def forward(self, images, masks):
        outputs = self.model(pixel_values=images, labels=masks)
        return outputs

    def training_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        loss, logits = outputs[0], outputs[1]

        # Logits upsampling to match mask size
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted = upsampled_logits.argmax(dim=1)

        # 가중치를 적용한 cross-entropy 손실 계산
        loss = F.cross_entropy(upsampled_logits, masks, weight=self.class_weights.to(self.device))
        
        # IoU 계산
        mean_iou = compute_iou(predicted, masks, self.num_classes)
        
        metrics = {'loss': loss, 'mean_iou': mean_iou}
        
        if batch_nb % self.metrics_interval == 0:
            for k, v in metrics.items():
                self.log(k, v)

        return metrics

    def validation_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        loss, logits = outputs[0], outputs[1]

        # Logits upsampling to match mask size
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted = upsampled_logits.argmax(dim=1)
        
        # Validation에서도 가중치 적용
        loss = F.cross_entropy(upsampled_logits, masks, weight=self.class_weights.to(self.device))
        
        # IoU 계산
        mean_iou = compute_iou(predicted, masks, self.num_classes)
        
        return {'val_loss': loss, 'val_mean_iou': mean_iou}

    def validation_epoch_end(self, outputs):
        avg_val_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        avg_val_mean_iou = torch.stack([x["val_mean_iou"] for x in outputs]).mean()

        metrics = {"val_loss": avg_val_loss, "val_mean_iou": avg_val_mean_iou}
        for k, v in metrics.items():
            self.log(k, v)

        return metrics

    def test_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        loss, logits = outputs[0], outputs[1]

        # Logits upsampling to match mask size
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted = upsampled_logits.argmax(dim=1)

        # Test에서도 가중치 적용
        loss = F.cross_entropy(upsampled_logits, masks, weight=self.class_weights.to(self.device))
        
        # IoU 계산
        mean_iou = compute_iou(predicted, masks, self.num_classes)
        
        return {'test_loss': loss, 'test_mean_iou': mean_iou}

    def test_epoch_end(self, outputs):
        avg_test_loss = torch.stack([x["test_loss"] for x in outputs]).mean()
        avg_test_mean_iou = torch.stack([x["test_mean_iou"] for x in outputs]).mean()

        metrics = {"test_loss": avg_test_loss, "test_mean_iou": avg_test_mean_iou}
        for k, v in metrics.items():
            self.log(k, v)

        return metrics

    def configure_optimizers(self):
        return torch.optim.Adam([p for p in self.parameters() if p.requires_grad], lr=2e-05, eps=1e-08)

    def train_dataloader(self):
        return self.train_dl

    def val_dataloader(self):
        return self.val_dl

    def test_dataloader(self):
        return self.test_dl

In [25]:
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
feature_extractor.do_reduce_labels = False
feature_extractor.size = 512

train_dataset = SemanticSegmentationDataset(f"{dataset.location}/train/", feature_extractor)
val_dataset = SemanticSegmentationDataset(f"{dataset.location}/valid/", feature_extractor)
test_dataset = SemanticSegmentationDataset(f"{dataset.location}/test/", feature_extractor)

batch_size = 4
num_workers = 0
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=0)

segformer_finetuner = SegformerFinetuner(
    train_dataset.id2label, 
    train_dataloader=train_dataloader, 
    val_dataloader=val_dataloader, 
    test_dataloader=test_dataloader, 
    metrics_interval=10,
)

The class SegformerFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use SegformerImageProcessor instead.
The following named arguments are not valid for `SegformerFeatureExtractor.__init__` and were ignored: 'feature_extractor_type'
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b5-finetuned-cityscapes-1024-1024 and are newly initialized because the shapes did not match:
- decode_head.classifier.weight: found shape torch.Size([19, 768, 1, 1]) in the checkpoint and torch.Size([5, 768, 1, 1]) in the model instantiated
- decode_head.classifier.bias: found shape torch.Size([19]) in the checkpoint and torch.Size([5]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [26]:
early_stop_callback = EarlyStopping(
    monitor="val_loss", 
    min_delta=0.00, 
    patience=1000, 
    verbose=False, 
    mode="min",
)

checkpoint_callback = ModelCheckpoint(save_top_k=1, monitor="val_loss")

trainer = pl.Trainer(
    gpus=[0],
    callbacks=[early_stop_callback, checkpoint_callback],
    max_epochs=2000,
    val_check_interval=len(train_dataloader),
)
trainer.fit(segformer_finetuner)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name  | Type                             | Params
-----------------------------------------------------------
0 | model | SegformerForSemanticSegmentation | 84.6 M
-----------------------------------------------------------
84.6 M    Trainable params
0         Non-trainable params
84.6 M    Total params
338.389   Total estimated model params size (MB)
The dataloader, val dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 20 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.


                                                                      

The validation_epoch_end should not return anything as of 9.1. To log, use self.log(...) or self.write(...) directly in the LightningModule
The dataloader, train dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 20 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.


Epoch 0:  87%|████████▋ | 13/15 [00:07<00:01,  1.71it/s, loss=1.45, v_num=62]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/2 [00:00<?, ?it/s][A
Epoch 0: 100%|██████████| 15/15 [00:07<00:00,  1.91it/s, loss=1.45, v_num=62]
Epoch 0: 100%|██████████| 15/15 [00:08<00:00,  1.85it/s, loss=1.45, v_num=62]
Epoch 1:  93%|█████████▎| 14/15 [00:07<00:00,  1.86it/s, loss=1.3, v_num=62] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/2 [00:00<?, ?it/s][A
Validating:  50%|█████     | 1/2 [00:00<00:00,  3.90it/s][A
Epoch 1: 100%|██████████| 15/15 [00:08<00:00,  1.86it/s, loss=1.3, v_num=62]
Epoch 2:  93%|█████████▎| 14/15 [00:07<00:00,  1.86it/s, loss=1.1, v_num=62] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/2 [00:00<?, ?it/s][A
Validating:  50%|█████     | 1/2 [00:00<00:00,  3.83it/s][A
Epoch 2: 100%|██████████| 15/15 [00:08<00:00,  1.87it/s, loss=1.1, v_num=62]
Epoch 3:  93%|█████████▎| 14/15 [00:07<00:00,  1.84it/s, loss=0.94, v_num

1

In [30]:
import torch
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# IoU 계산 함수
def compute_iou(predictions, targets, num_classes):
    ious = []
    for cls in range(num_classes):
        pred_inds = predictions == cls
        target_inds = targets == cls
        intersection = np.sum(pred_inds & target_inds)
        union = np.sum(pred_inds | target_inds)
        if union == 0:
            ious.append(float('nan'))  # NaN 대신 처리할 값을 넣을 수 있음
        else:
            ious.append(float(intersection) / float(union))
    return np.nanmean(ious)  # NaN을 제외한 평균 IoU

def compute_bbox(predictions, targets, num_classes):
    bbox = []
    for cls in range(num_classegs):
        pred_inds = predictions == cls
        target_inds = targets == cls
        intersection = np.sum

# 클래스별로 정밀도, 재현율, F1-score, 정확도 계산 함수
def compute_classwise_metrics(predictions, targets, num_classes):
    precision = precision_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    recall = recall_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    f1 = f1_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    accuracy = accuracy_score(targets.flatten(), predictions.flatten())
    
    return precision, recall, f1, accuracy

# 모델 추론 및 성능 지표 계산 함수
def evaluate_model_on_test_data(dataloader, model, num_classes):
    iou_list = []
    precision_list, recall_list, f1_list = [], [], []
    
    for batch in dataloader:
        images, masks = batch['pixel_values'], batch['labels']
        
        with torch.no_grad():
            outputs = model(pixel_values=images)
        
        logits = outputs.logits
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy()
        masks = masks.cpu().numpy()

        # IoU 계산
        iou = compute_iou(predicted_mask, masks, num_classes)
        iou_list.append(iou)

        # 정밀도, 재현율, F1-score 계산
        precision, recall, f1, accuracy = compute_classwise_metrics(predicted_mask, masks, num_classes)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)

    avg_iou = np.mean(iou_list)
    avg_precision = np.mean(precision_list, axis=0)
    avg_recall = np.mean(recall_list, axis=0)
    avg_f1 = np.mean(f1_list, axis=0)

    return avg_iou, avg_precision, avg_recall, avg_f1, accuracy

# 모델 성능 평가
num_classes = 3  # 클래스 개수
avg_iou, avg_precision, avg_recall, avg_f1, accuracy = evaluate_model_on_test_data(test_dataloader, segformer_finetuner.model, num_classes)

# 결과 출력
print(f"Average IoU: {avg_iou}")
for cls in range(num_classes):
    print(f"Class {cls}: Precision={avg_precision[cls]}, Recall={avg_recall[cls]}, F1-score={avg_f1[cls]}")
print(f"Overall Accuracy: {accuracy}")


Average IoU: 0.6944132681751422
Class 0: Precision=0.9531850477881771, Recall=0.9734592469018599, F1-score=0.9629384500206813
Class 1: Precision=0.9681683862325529, Recall=0.9659414177427499, F1-score=0.9670293697122072
Class 2: Precision=0.45286414045533063, Recall=0.29198419013087507, F1-score=0.3506919855022509
Overall Accuracy: 0.971771240234375


In [27]:
import torch
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# IoU 계산 함수
def compute_iou(predictions, targets, num_classes):
    ious = []
    for cls in range(num_classes):
        pred_inds = predictions == cls
        target_inds = targets == cls
        intersection = np.sum(pred_inds & target_inds)
        union = np.sum(pred_inds | target_inds)
        if union == 0:
            ious.append(float('nan'))  # NaN 대신 처리할 값을 넣을 수 있음
        else:
            ious.append(float(intersection) / float(union))
    return np.nanmean(ious)  # NaN을 제외한 평균 IoU

# 클래스별로 정밀도, 재현율, F1-score, 정확도 계산 함수
def compute_classwise_metrics(predictions, targets, num_classes):
    precision = precision_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    recall = recall_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    f1 = f1_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    accuracy = accuracy_score(targets.flatten(), predictions.flatten())
    
    return precision, recall, f1, accuracy

# 모델 추론 및 성능 지표 계산 함수
def evaluate_model_on_test_data(dataloader, model, num_classes):
    iou_list = []
    precision_list, recall_list, f1_list = [], [], []
    
    for batch in dataloader:
        images, masks = batch['pixel_values'], batch['labels']
        
        with torch.no_grad():
            outputs = model(pixel_values=images)
        
        logits = outputs.logits
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy()
        masks = masks.cpu().numpy()

        # IoU 계산
        iou = compute_iou(predicted_mask, masks, num_classes)
        iou_list.append(iou)

        # 정밀도, 재현율, F1-score 계산
        precision, recall, f1, accuracy = compute_classwise_metrics(predicted_mask, masks, num_classes)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)

    avg_iou = np.mean(iou_list)
    avg_precision = np.mean(precision_list, axis=0)
    avg_recall = np.mean(recall_list, axis=0)
    avg_f1 = np.mean(f1_list, axis=0)

    return avg_iou, avg_precision, avg_recall, avg_f1, accuracy

# 모델 성능 평가
num_classes = 3  # 클래스 개수
avg_iou, avg_precision, avg_recall, avg_f1, accuracy = evaluate_model_on_test_data(test_dataloader, segformer_finetuner.model, num_classes)

# 결과 출력
print(f"Average IoU: {avg_iou}")
for cls in range(num_classes):
    print(f"Class {cls}: Precision={avg_precision[cls]}, Recall={avg_recall[cls]}, F1-score={avg_f1[cls]}")
print(f"Overall Accuracy: {accuracy}")


Average IoU: 0.693860887914302
Class 0: Precision=0.9548586806346249, Recall=0.9691922411817924, F1-score=0.9615701796174623
Class 1: Precision=0.966367027682499, Recall=0.9677917193585088, F1-score=0.9670294447595243
Class 2: Precision=0.4664171203701878, Recall=0.280393006534769, F1-score=0.3486084151770625
Overall Accuracy: 0.9703025817871094


In [32]:
import torch
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix

# IoU 계산 함수
def compute_iou(predictions, targets, num_classes):
    ious = []
    for cls in range(num_classes):
        pred_inds = predictions == cls
        target_inds = targets == cls
        intersection = np.sum(pred_inds & target_inds)
        union = np.sum(pred_inds | target_inds)
        if union == 0:
            ious.append(float('nan'))  # NaN 대신 처리할 값을 넣을 수 있음
        else:
            ious.append(float(intersection) / float(union))
    return np.nanmean(ious)  # NaN을 제외한 평균 IoU

# 클래스별로 정밀도, 재현율, F1-score, 정확도, 혼동 행렬 계산 함수
def compute_classwise_metrics(predictions, targets, num_classes):
    precision = precision_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    recall = recall_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    f1 = f1_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    accuracy = accuracy_score(targets.flatten(), predictions.flatten())
    
    conf_matrix = confusion_matrix(targets.flatten(), predictions.flatten(), labels=range(num_classes))
    
    # 각 클래스별 TP, TN, FP, FN 계산
    tp = np.diag(conf_matrix)
    fp = conf_matrix.sum(axis=0) - tp
    fn = conf_matrix.sum(axis=1) - tp
    tn = conf_matrix.sum() - (fp + fn + tp)
    
    return precision, recall, f1, accuracy, conf_matrix, tp, tn, fp, fn

# 모델 추론 및 성능 지표 계산 함수
def evaluate_model_on_test_data(dataloader, model, num_classes):
    iou_list = []
    precision_list, recall_list, f1_list = [], [], []
    conf_matrix_total = np.zeros((num_classes, num_classes), dtype=np.int64)
    tp_total = np.zeros(num_classes, dtype=np.int64)
    tn_total = np.zeros(num_classes, dtype=np.int64)
    fp_total = np.zeros(num_classes, dtype=np.int64)
    fn_total = np.zeros(num_classes, dtype=np.int64)
    
    for batch in dataloader:
        images, masks = batch['pixel_values'], batch['labels']
        
        with torch.no_grad():
            outputs = model(pixel_values=images)
        
        logits = outputs.logits
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy()
        masks = masks.cpu().numpy()

        # IoU 계산
        iou = compute_iou(predicted_mask, masks, num_classes)
        iou_list.append(iou)

        # 정밀도, 재현율, F1-score, 혼동 행렬 및 TP, TN, FP, FN 계산
        precision, recall, f1, accuracy, conf_matrix, tp, tn, fp, fn = compute_classwise_metrics(predicted_mask, masks, num_classes)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)
        conf_matrix_total += conf_matrix
        tp_total += tp
        tn_total += tn
        fp_total += fp
        fn_total += fn

    avg_iou = np.mean(iou_list)
    avg_precision = np.mean(precision_list, axis=0)
    avg_recall = np.mean(recall_list, axis=0)
    avg_f1 = np.mean(f1_list, axis=0)

    return avg_iou, avg_precision, avg_recall, avg_f1, accuracy, conf_matrix_total, tp_total, tn_total, fp_total, fn_total

# 모델 성능 평가
num_classes = 3  # 클래스 개수
avg_iou, avg_precision, avg_recall, avg_f1, accuracy, conf_matrix_total, tp_total, tn_total, fp_total, fn_total = evaluate_model_on_test_data(test_dataloader, segformer_finetuner.model, num_classes)

# 결과 출력
print(f"Average IoU: {avg_iou}")
for cls in range(num_classes):
    print(f"Class {cls}: Precision={avg_precision[cls]}, Recall={avg_recall[cls]}, F1-score={avg_f1[cls]}")
    print(f"Class {cls}: TP={tp_total[cls]}, TN={tn_total[cls]}, FP={fp_total[cls]}, FN={fn_total[cls]}")
print(f"Overall Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix_total)

Average IoU: 0.6929824614653014
Class 0: Precision=0.9546017563167193, Recall=0.9676900236652587, F1-score=0.9606581992212875
Class 0: TP=507813, TN=757114, FP=34156, FN=11637
Class 1: Precision=0.9658325030830929, Recall=0.9662267996187743, F1-score=0.9659626385607725
Class 1: TP=718284, TN=532545, FP=26660, FN=33231
Class 2: Precision=0.4478691562040463, Recall=0.28945748569134233, F1-score=0.35009240821150095
Class 2: TP=15007, TN=1262165, FP=8800, FN=24748
Overall Accuracy: 0.9694137573242188
Confusion Matrix:
[[507813  11145    492]
 [ 24923 718284   8308]
 [  9233  15515  15007]]


In [34]:
# IoU 계산 함수 수정
def compute_iou(predictions, targets, num_classes):
    ious = []
    for cls in range(num_classes):
        pred_inds = predictions == cls
        target_inds = targets == cls
        intersection = torch.sum(pred_inds & target_inds)  # torch.sum으로 수정
        union = torch.sum(pred_inds | target_inds)  # torch.sum으로 수정
        if union == 0:
            ious.append(float('nan'))  # NaN 대신 기본값을 설정 가능
        else:
            ious.append(float(intersection) / float(union))
    return torch.tensor(ious).nanmean()  # NaN 제외 평균 IoU


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing:   0%|          | 0/2 [00:40<?, ?it/s]
Testing: 100%|██████████| 2/2 [00:00<00:00,  4.17it/s]
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.20976848900318146, 'test_mean_iou': 0.6732882857322693}
--------------------------------------------------------------------------------


The testing_epoch_end should not return anything as of 9.1. To log, use self.log(...) or self.write(...) directly in the LightningModule


[{'test_loss': 0.20976848900318146, 'test_mean_iou': 0.6732882857322693}]

In [19]:
import torch
import numpy as np

# IoU 계산 함수 (변경 없음)
def compute_iou(predictions, targets, num_classes):
    ious = []
    for cls in range(num_classes):
        pred_inds = predictions == cls
        target_inds = targets == cls
        intersection = np.sum(pred_inds & target_inds)
        union = np.sum(pred_inds | target_inds)
        if union == 0:
            ious.append(float('nan'))  # NaN 대신 처리할 값을 넣을 수 있음
        else:
            ious.append(float(intersection) / float(union))
    return ious  # 각 클래스별 IoU 반환

# 모델 추론 및 성능 지표 계산 함수
def evaluate_model_on_test_data(dataloader, model, num_classes):
    iou_list = []
    accuracy_list = []
    
    # 모델의 디바이스 확인
    device = next(model.parameters()).device
    
    for batch in dataloader:
        images, masks = batch['pixel_values'], batch['labels']
        
        # 입력 텐서를 모델과 동일한 디바이스로 이동
        images = images.to(device)
        masks = masks.to(device)
        
        with torch.no_grad():
            outputs = model(pixel_values=images)
        
        logits = outputs.logits
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy()
        masks = masks.cpu().numpy()

        # IoU 계산
        ious = compute_iou(predicted_mask, masks, num_classes)
        iou_list.append(ious)

        # 정확도 계산
        correct = np.sum(predicted_mask == masks)
        total = predicted_mask.size
        batch_accuracy = correct / total
        accuracy_list.append(batch_accuracy)

    # 각 클래스별 평균 IoU 계산
    avg_iou_per_class = np.nanmean(iou_list, axis=0)
    # 전체 클래스에 대한 Mean IoU 계산
    mean_iou = np.nanmean(avg_iou_per_class)
    # 평균 정확도 계산
    avg_accuracy = np.mean(accuracy_list)

    return avg_iou_per_class, mean_iou, avg_accuracy

# 모델 성능 평가
num_classes = 4  # 클래스 개수
avg_iou_per_class, mean_iou, avg_accuracy = evaluate_model_on_test_data(test_dataloader, segformer_finetuner.model, num_classes)

# 결과 출력
print(f"Mean IoU over all classes: {mean_iou}")
for cls in range(num_classes):
    print(f"Class {cls}: IoU={avg_iou_per_class[cls]}")
print(f"Overall Accuracy: {avg_accuracy}")


Mean IoU over all classes: 0.4952717983309641
Class 0: IoU=0.908493153715444
Class 1: IoU=0.91680833119267
Class 2: IoU=0.1300082943404557
Class 3: IoU=0.025777414075286414
Overall Accuracy: 0.9458799362182617


In [17]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# 클래스별로 정밀도, 재현율, F1-score, 정확도 계산 함수
def compute_classwise_metrics(predictions, targets, num_classes):
    precision = precision_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    recall = recall_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    f1 = f1_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))

    # 클래스별 정확도 계산
    class_accuracies = []
    for cls in range(num_classes):
        true_positive = np.sum((predictions == cls) & (targets == cls))
        total_predicted_cls = np.sum(predictions == cls)
        total_actual_cls = np.sum(targets == cls)
        
        if total_actual_cls == 0:  # 타겟에 해당 클래스가 없는 경우
            class_accuracies.append(float('nan'))
        else:
            accuracy_cls = true_positive / total_actual_cls
            class_accuracies.append(accuracy_cls)

    return precision, recall, f1, class_accuracies

# 테스트 예시
predictions = np.array([[0, 1, 2], [2, 1, 0]])
targets = np.array([[0, 1, 1], [2, 0, 0]])

# 클래스 수는 3개라고 가정
num_classes = 3
precision, recall, f1, class_accuracies = compute_classwise_metrics(predictions, targets, num_classes)

# 결과 출력
for cls in range(num_classes):
    print(f"Class {cls}: Precision={precision[cls]}, Recall={recall[cls]}, F1-score={f1[cls]}, Accuracy={class_accuracies[cls]}")


Class 0: Precision=1.0, Recall=0.6666666666666666, F1-score=0.8, Accuracy=0.6666666666666666
Class 1: Precision=0.5, Recall=0.5, F1-score=0.5, Accuracy=0.5
Class 2: Precision=0.5, Recall=1.0, F1-score=0.6666666666666666, Accuracy=1.0


In [42]:
import torch

# Predict on a test image and overlay the mask on the original image
test_idx = 2
input_image_file = os.path.join(test_dataset.root_dir, test_dataset.images[test_idx])
input_image = Image.open(input_image_file)
test_batch = test_dataset[test_idx]
images, masks = test_batch['pixel_values'], test_batch['labels']
images = torch.unsqueeze(images, 0)
masks = torch.unsqueeze(masks, 0)

# 모델이 GPU에 할당되어 있으면, 입력 텐서도 GPU로 이동
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
images = images.to(device)
masks = masks.to(device)
segformer_finetuner.model = segformer_finetuner.model.to(device)

# 모델 추론
outputs = segformer_finetuner.model(images, masks)
    
loss, logits = outputs[0], outputs[1]

# Logits를 업샘플링하여 원본 이미지 크기와 일치시킴
upsampled_logits = torch.nn.functional.interpolate(
    logits, 
    size=masks.shape[-2:], 
    mode="bilinear", 
    align_corners=False
)
predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy()

# 마스크를 원본 이미지 위에 시각화
mask = prediction_to_vis(np.squeeze(masks.cpu()))  # CPU로 다시 이동
mask = mask.resize(input_image.size)
mask = mask.convert("RGBA")
input_image = input_image.convert("RGBA")
overlay_img = Image.blend(input_image, mask, 0.5)

# 각 클래스가 예측된 픽셀 수를 확인하는 코드
predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy().squeeze()
unique, counts = np.unique(predicted_mask, return_counts=True)
print(dict(zip(unique, counts)))  # 각 클래스별 픽셀 수 확인


{0: 144663, 1: 116506, 2: 975}


In [44]:
import os
import torch
import numpy as np
from PIL import Image

# 전체 데이터셋의 모든 이미지를 처리하는 함수
def run_inference_on_dataset(dataset):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    segformer_finetuner.model = segformer_finetuner.model.to(device)  # 모델을 GPU 또는 CPU로 이동

    for idx in range(len(dataset)):
        input_image_file = os.path.join(dataset.root_dir, dataset.images[idx])
        input_image = Image.open(input_image_file)

        # 이미지 전처리
        test_batch = dataset[idx]
        images, masks = test_batch['pixel_values'], test_batch['labels']
        images = torch.unsqueeze(images, 0).to(device)
        masks = torch.unsqueeze(masks, 0).to(device)

        # 모델 추론
        with torch.no_grad():
            outputs = segformer_finetuner.model(images, masks)
        
        loss, logits = outputs[0], outputs[1]

        # Logits를 원본 크기로 업샘플링
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        # 예측된 마스크
        predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy().squeeze()

        # 각 클래스별 픽셀 수
        unique, counts = np.unique(predicted_mask, return_counts=True)
        class_pixel_count = dict(zip(unique, counts))

        # 전체 픽셀 수 계산
        total_pixels = predicted_mask.size

        # 각 클래스가 차지하는 면적 비율 계산 (백분율)
        class_areas_percentage = {class_label: (count / total_pixels) * 100 for class_label, count in class_pixel_count.items()}

        # 출력
        print(f"Image file: {input_image_file}")
        for class_label, area_percentage in class_areas_percentage.items():
            print(f"Class {class_label}: {area_percentage:.2f}% of the image")
        print("-" * 40)

# 사용 예시: test_dataset에 있는 모든 이미지를 처리
run_inference_on_dataset(train_dataset)


Image file: /data/JW/segformer/building-defects-5/train/100_png.rf.46035115425a164f9588492209aef5c5.jpg
Class 0: 54.69% of the image
Class 1: 44.22% of the image
Class 2: 1.09% of the image
----------------------------------------
Image file: /data/JW/segformer/building-defects-5/train/10_png.rf.86ab7e0c1fad660e22def23cd41eed75.jpg
Class 0: 61.22% of the image
Class 1: 38.69% of the image
Class 2: 0.08% of the image
----------------------------------------
Image file: /data/JW/segformer/building-defects-5/train/11_png.rf.85f5e94165bd476391b5025c329cf98d.jpg
Class 0: 60.22% of the image
Class 1: 38.90% of the image
Class 2: 0.87% of the image
----------------------------------------
Image file: /data/JW/segformer/building-defects-5/train/12_png.rf.aff904ef91f6079b305ec9ef86a70525.jpg
Class 0: 38.01% of the image
Class 1: 61.68% of the image
Class 2: 0.31% of the image
----------------------------------------
Image file: /data/JW/segformer/building-defects-5/train/14_png.rf.ee4ce53362ab2

In [47]:
import os
import torch
import numpy as np
from PIL import Image

# 시각화를 위한 color map 설정 (배경, 건물, 크랙 등)
color_map = {
    0: (0, 0, 0),      # 배경
    1: (255, 255, 255),  # 건물
    2: (255, 0, 0),     # 크랙
}

# 예측된 마스크를 시각화하는 함수
def prediction_to_vis(prediction):
    vis_shape = prediction.shape + (3,)
    vis = np.zeros(vis_shape)
    for class_label, color in color_map.items():
        vis[prediction == class_label] = color
    return Image.fromarray(vis.astype(np.uint8))

# 전체 데이터셋의 모든 이미지를 처리하고 시각화하는 함수
def run_inference_on_dataset(dataset, output_folder):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    segformer_finetuner.model = segformer_finetuner.model.to(device)  # 모델을 GPU 또는 CPU로 이동

    # 결과 이미지를 저장할 폴더 생성
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for idx in range(len(dataset)):
        input_image_file = os.path.join(dataset.root_dir, dataset.images[idx])
        input_image = Image.open(input_image_file)

        # 이미지 전처리
        test_batch = dataset[idx]
        images, masks = test_batch['pixel_values'], test_batch['labels']
        images = torch.unsqueeze(images, 0).to(device)
        masks = torch.unsqueeze(masks, 0).to(device)

        # 모델 추론
        with torch.no_grad():
            outputs = segformer_finetuner.model(images, masks)
        
        loss, logits = outputs[0], outputs[1]

        # Logits를 원본 크기로 업샘플링
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )

        # 예측된 마스크
        predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy().squeeze()

        # 각 클래스별 픽셀 수
        unique, counts = np.unique(predicted_mask, return_counts=True)
        class_pixel_count = dict(zip(unique, counts))

        # 전체 픽셀 수 계산
        total_pixels = predicted_mask.size

        # 각 클래스가 차지하는 면적 비율 계산 (백분율)
        class_areas_percentage = {class_label: (count / total_pixels) * 100 for class_label, count in class_pixel_count.items()}

        # 출력 (클래스별 면적 비율)
        print(f"Image file: {input_image_file}")
        for class_label, area_percentage in class_areas_percentage.items():
            print(f"Class {class_label}: {area_percentage:.2f}% of the image")
        print("-" * 40)

        # 마스크 시각화
        mask = prediction_to_vis(predicted_mask)
        mask = mask.resize(input_image.size)
        mask = mask.convert("RGBA")

        # 원본 이미지에 마스크 오버레이
        input_image = input_image.convert("RGBA")
        overlay_img = Image.blend(input_image, mask, 0.5)

        # 오버레이 이미지를 RGB로 변환 후 저장
        overlay_img = overlay_img.convert("RGB")  # JPEG 형식으로 저장하기 위해 RGB로 변환
        output_image_file = os.path.join(output_folder, f"overlay_{dataset.images[idx]}")
        overlay_img.save(output_image_file, "JPEG")  # 저장 형식은 JPEG로 지정
        print(f"Saved overlay image: {output_image_file}")

# 사용 예시: test_dataset에 있는 모든 이미지를 처리하고 시각화하여 저장
output_folder = "v8_train_results"
run_inference_on_dataset(train_dataset, output_folder)


Image file: /data/JW/segformer/building-defects-5/train/100_png.rf.46035115425a164f9588492209aef5c5.jpg
Class 0: 54.70% of the image
Class 1: 44.17% of the image
Class 2: 1.13% of the image
----------------------------------------
Saved overlay image: v8_train_results/overlay_100_png.rf.46035115425a164f9588492209aef5c5.jpg
Image file: /data/JW/segformer/building-defects-5/train/10_png.rf.86ab7e0c1fad660e22def23cd41eed75.jpg
Class 0: 61.58% of the image
Class 1: 38.37% of the image
Class 2: 0.05% of the image
----------------------------------------
Saved overlay image: v8_train_results/overlay_10_png.rf.86ab7e0c1fad660e22def23cd41eed75.jpg
Image file: /data/JW/segformer/building-defects-5/train/11_png.rf.85f5e94165bd476391b5025c329cf98d.jpg
Class 0: 59.80% of the image
Class 1: 39.82% of the image
Class 2: 0.37% of the image
----------------------------------------
Saved overlay image: v8_train_results/overlay_11_png.rf.85f5e94165bd476391b5025c329cf98d.jpg
Image file: /data/JW/segforme

In [15]:
import os
import torch
import numpy as np
from PIL import Image

# 시각화를 위한 color map 설정 (배경, 건물, 크랙 등)
color_map = {
    0: (0, 0, 0),      # 배경
    1: (255, 255, 255),  # 건물
    2: (255, 0, 0),     # 크랙
    3: (255, 255, 0)
}

# 예측된 마스크를 시각화하는 함수
def prediction_to_vis(prediction):
    vis_shape = prediction.shape + (3,)
    vis = np.zeros(vis_shape)
    for class_label, color in color_map.items():
        vis[prediction == class_label] = color
    return Image.fromarray(vis.astype(np.uint8))

# 전체 데이터셋의 모든 이미지를 처리하고 크랙/건물 값을 계산하는 함수
def run_inference_and_sort_by_crack_building_ratio(dataset, output_folder):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    segformer_finetuner.model = segformer_finetuner.model.to(device)  # 모델을 GPU 또는 CPU로 이동

    # 결과 이미지를 저장할 폴더 생성
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    crack_building_data = []  # 각 이미지의 크랙/건물 비율을 저장할 리스트

    for idx in range(len(dataset)):
        input_image_file = os.path.join(dataset.root_dir, dataset.images[idx])
        input_image = Image.open(input_image_file)

        # 이미지 전처리
        test_batch = dataset[idx]
        images, masks = test_batch['pixel_values'], test_batch['labels']
        images = torch.unsqueeze(images, 0).to(device)
        masks = torch.unsqueeze(masks, 0).to(device)

        # 모델 추론
        with torch.no_grad():
            outputs = segformer_finetuner.model(images, masks)
        
        loss, logits = outputs[0], outputs[1]

        # Logits를 원본 크기로 업샘플링
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )

        # 예측된 마스크
        predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy().squeeze()

        # 각 클래스별 픽셀 수
        unique, counts = np.unique(predicted_mask, return_counts=True)
        class_pixel_count = dict(zip(unique, counts))

        # 전체 픽셀 수 계산
        total_pixels = predicted_mask.size

        # 각 클래스가 차지하는 면적 비율 계산 (백분율)
        class_areas_percentage = {class_label: (count / total_pixels) * 100 for class_label, count in class_pixel_count.items()}

        # 크랙 및 건물 비율 추출
        crack_percentage = class_areas_percentage.get(2, 0)  # 크랙(Class 2)의 면적 비율
        building_percentage = class_areas_percentage.get(1, 0)  # 건물(Class 1)의 면적 비율

        # 크랙 비율을 건물 비율로 나눈 값 계산 (건물 비율이 0인 경우, 크랙 비율만을 사용)
        if building_percentage > 0:
            crack_building_ratio = crack_percentage / building_percentage
        else:
            crack_building_ratio = crack_percentage  # 건물 비율이 0인 경우 크랙 비율만 저장

        # 데이터 저장 (이미지 경로와 크랙, 건물 비율, 크랙/건물 비율)
        crack_building_data.append((input_image_file, crack_percentage, building_percentage, crack_building_ratio))

        # 마스크 시각화
        mask = prediction_to_vis(predicted_mask)
        mask = mask.resize(input_image.size)
        mask = mask.convert("RGBA")

        # 원본 이미지에 마스크 오버레이
        input_image = input_image.convert("RGBA")
        overlay_img = Image.blend(input_image, mask, 0.5)

        # 오버레이 이미지를 RGB로 변환 후 저장
        overlay_img = overlay_img.convert("RGB")  # JPEG 형식으로 저장하기 위해 RGB로 변환
        output_image_file = os.path.join(output_folder, f"overlay_{dataset.images[idx]}")
        overlay_img.save(output_image_file, "JPEG")  # 저장 형식은 JPEG로 지정
        print(f"Saved overlay image: {output_image_file}")

    # 크랙/건물 비율 기준으로 정렬
    crack_building_data.sort(key=lambda x: x[3], reverse=True)

    # 크랙/건물 비율이 큰 순서대로 출력
    print("\nSorted images by crack/building ratio:")
    for image_file, crack_percentage, building_percentage, crack_building_ratio in crack_building_data:
        print(f"Image file: {image_file}")
        print(f"  Crack: {crack_percentage:.2f}%")
        print(f"  Building: {building_percentage:.2f}%")
        print(f"  Crack/Building Ratio: {crack_building_ratio:.2f}")
        print("-" * 40)

# 사용 예시: train_dataset에 있는 모든 이미지를 처리하고 시각화하여 저장
output_folder = "v10_train_results"
run_inference_and_sort_by_crack_building_ratio(train_dataset, output_folder)


Saved overlay image: v10_train_results/overlay_100_png.rf.d8f2e94abdc5f650c654c09b1b68ad30.jpg
Saved overlay image: v10_train_results/overlay_10_png.rf.52eac2e10a80ef4f76209f8534189e1d.jpg
Saved overlay image: v10_train_results/overlay_11_png.rf.8f862b1b17db64f3f32f6de6d18182c5.jpg
Saved overlay image: v10_train_results/overlay_14_png.rf.39eec1af9c8b796257cccef94b91db95.jpg
Saved overlay image: v10_train_results/overlay_17_png.rf.cf60558ac2f4fe01a627fa7a804c13a2.jpg
Saved overlay image: v10_train_results/overlay_18_png.rf.4db70baa22c42c03b348d641af065290.jpg
Saved overlay image: v10_train_results/overlay_19_png.rf.8b192bf14b513145a7d31e9ae00ea47f.jpg
Saved overlay image: v10_train_results/overlay_1_png.rf.525b761db2ab22ad1abd9358b745e15b.jpg
Saved overlay image: v10_train_results/overlay_20_png.rf.ed01e9764e4594fc03bde24dd3b36a67.jpg
Saved overlay image: v10_train_results/overlay_21_png.rf.f4038c2f5f74dd587ad877e40721bbd1.jpg
Saved overlay image: v10_train_results/overlay_23_png.rf.5c5

In [52]:
import os
import torch
import numpy as np
import pandas as pd
from PIL import Image

# 시각화를 위한 color map 설정 (배경, 건물, 크랙 등)
color_map = {
    0: (0, 0, 0),      # 배경
    1: (255, 255, 255),  # 건물
    2: (255, 0, 0),     # 크랙
}

# 예측된 마스크를 시각화하는 함수
def prediction_to_vis(prediction):
    vis_shape = prediction.shape + (3,)
    vis = np.zeros(vis_shape)
    for class_label, color in color_map.items():
        vis[prediction == class_label] = color
    return Image.fromarray(vis.astype(np.uint8))

# 전체 데이터셋의 모든 이미지를 처리하고 크랙/건물 값을 계산하는 함수
def run_inference_and_save_results(dataset, output_folder, csv_file):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    segformer_finetuner.model = segformer_finetuner.model.to(device)  # 모델을 GPU 또는 CPU로 이동

    # 결과 이미지를 저장할 폴더 생성
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    crack_building_data = []  # 각 이미지의 크랙/건물 비율을 저장할 리스트

    for idx in range(len(dataset)):
        input_image_file = os.path.join(dataset.root_dir, dataset.images[idx])
        input_image = Image.open(input_image_file)

        # 이미지 전처리
        test_batch = dataset[idx]
        images, masks = test_batch['pixel_values'], test_batch['labels']
        images = torch.unsqueeze(images, 0).to(device)
        masks = torch.unsqueeze(masks, 0).to(device)

        # 모델 추론
        with torch.no_grad():
            outputs = segformer_finetuner.model(images, masks)
        
        loss, logits = outputs[0], outputs[1]

        # Logits를 원본 크기로 업샘플링
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )

        # 예측된 마스크
        predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy().squeeze()

        # 각 클래스별 픽셀 수
        unique, counts = np.unique(predicted_mask, return_counts=True)
        class_pixel_count = dict(zip(unique, counts))

        # 전체 픽셀 수 계산
        total_pixels = predicted_mask.size

        # 각 클래스가 차지하는 면적 비율 계산 (백분율)
        class_areas_percentage = {class_label: (count / total_pixels) * 100 for class_label, count in class_pixel_count.items()}

        # 크랙 및 건물 비율 추출
        crack_percentage = class_areas_percentage.get(2, 0)  # 크랙(Class 2)의 면적 비율
        building_percentage = class_areas_percentage.get(1, 0)  # 건물(Class 1)의 면적 비율

        # 크랙 비율을 건물 비율로 나눈 값 계산 (건물 비율이 0인 경우, 크랙 비율만을 사용)
        if building_percentage > 0:
            crack_building_ratio = crack_percentage / building_percentage
        else:
            crack_building_ratio = crack_percentage  # 건물 비율이 0인 경우 크랙 비율만 저장

        # 데이터 저장 (이미지 경로와 크랙, 건물 비율, 크랙/건물 비율)
        crack_building_data.append({
            "image_file": input_image_file,
            "crack_percentage": crack_percentage,
            "building_percentage": building_percentage,
            "crack_building_ratio": crack_building_ratio
        })

        # 마스크 시각화
        mask = prediction_to_vis(predicted_mask)
        mask = mask.resize(input_image.size)
        mask = mask.convert("RGBA")

        # 원본 이미지에 마스크 오버레이
        input_image = input_image.convert("RGBA")
        overlay_img = Image.blend(input_image, mask, 0.5)

        # 오버레이 이미지를 RGB로 변환 후 저장
        overlay_img = overlay_img.convert("RGB")  # JPEG 형식으로 저장하기 위해 RGB로 변환
        output_image_file = os.path.join(output_folder, f"overlay_{dataset.images[idx]}")
        overlay_img.save(output_image_file, "JPEG")  # 저장 형식은 JPEG로 지정
        print(f"Saved overlay image: {output_image_file}")

    # 크랙/건물 비율 기준으로 정렬
    crack_building_data = sorted(crack_building_data, key=lambda x: x["crack_building_ratio"], reverse=True)

    # 결과를 CSV로 저장
    df = pd.DataFrame(crack_building_data)
    df.to_csv(csv_file, index=False)
    print(f"Saved results to {csv_file}")

    # 크랙/건물 비율이 큰 순서대로 출력
    print("\nSorted images by crack/building ratio:")
    for data in crack_building_data:
        print(f"Image file: {data['image_file']}")
        print(f"  Crack: {data['crack_percentage']:.2f}%")
        print(f"  Building: {data['building_percentage']:.2f}%")
        print(f"  Crack/Building Ratio: {data['crack_building_ratio']:.2f}")
        print("-" * 40)

# 사용 예시: train_dataset에 있는 모든 이미지를 처리하고 시각화하여 저장
output_folder = "v8_valid_results"
csv_file = "crack_building_valid_results.csv"
run_inference_and_save_results(val_dataset, output_folder, csv_file)


Saved overlay image: v8_valid_results/overlay_15_png.rf.9fa0b14d927f8c6ad297c519d089fb70.jpg
Saved overlay image: v8_valid_results/overlay_31_png.rf.149663a5a5700a26d04d81152de3bb9c.jpg
Saved overlay image: v8_valid_results/overlay_41_png.rf.2e847d7a04ea282592ff10765d0e5540.jpg
Saved overlay image: v8_valid_results/overlay_54_png.rf.0ada3808d554cad31927c6abeda78a6a.jpg
Saved overlay image: v8_valid_results/overlay_74_png.rf.b6d30b0fa74ec5a9af6074610409e20f.jpg
Saved overlay image: v8_valid_results/overlay_75_png.rf.27d7397d751a4cdc7f18d24d2bb652a6.jpg
Saved overlay image: v8_valid_results/overlay_89_png.rf.39b307ff85a9530f67961aab618c196f.jpg
Saved overlay image: v8_valid_results/overlay_9_png.rf.7d2f6157dbb5ac00e4b3ab9d3def3e37.jpg
Saved results to crack_building_valid_results.csv

Sorted images by crack/building ratio:
Image file: /data/JW/segformer/building-defects-5/valid/75_png.rf.27d7397d751a4cdc7f18d24d2bb652a6.jpg
  Crack: 4.19%
  Building: 52.67%
  Crack/Building Ratio: 0.08
-