In [1]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import numpy as np
import random
import evaluate

metric = evaluate.load("accuracy")


2024-10-13 07:00:28.715638: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-13 07:00:28.719233: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-13 07:00:28.727232: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-13 07:00:28.740414: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-13 07:00:28.744237: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attemptin

In [17]:
import roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="rWhzxDa8oLFF7F6zw3cG")
project = rf.workspace("cd-pq7yy").project("building-defects-xpjmz")
version = project.version(2)
dataset = version.download("png-mask-semantic")

loading Roboflow workspace...
loading Roboflow project...


In [18]:
class SemanticSegmentationDataset(Dataset):
    """Image (semantic) segmentation dataset."""

    def __init__(self, root_dir, feature_extractor):
        """
        Args:
            root_dir (string): Root directory of the dataset containing the images + annotations.
            feature_extractor (SegFormerFeatureExtractor): feature extractor to prepare images + segmentation maps.
        """
        self.root_dir = root_dir
        self.feature_extractor = feature_extractor
        
        # Load class mapping from CSV file (e.g., _classes.csv)
        self.classes_csv_file = os.path.join(self.root_dir, "_classes.csv")
        with open(self.classes_csv_file, 'r') as fid:
            data = [l.split(',') for i, l in enumerate(fid) if i != 0]
        self.id2label = {x[0]: x[1] for x in data}
        
        image_file_names = [f for f in os.listdir(self.root_dir) if '.jpg' in f]
        mask_file_names = [f for f in os.listdir(self.root_dir) if '.png' in f]
        
        self.images = sorted(image_file_names)
        self.masks = sorted(mask_file_names)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.open(os.path.join(self.root_dir, self.images[idx]))
        segmentation_map = Image.open(os.path.join(self.root_dir, self.masks[idx]))

        # Convert segmentation map to numpy array (without ignoring any labels)
        segmentation_map = np.array(segmentation_map)

        # Apply feature extractor to both image and segmentation map
        encoded_inputs = self.feature_extractor(image, segmentation_map, return_tensors="pt")

        # Remove batch dimension
        for k, v in encoded_inputs.items():
            encoded_inputs[k].squeeze_()

        return encoded_inputs
    

In [24]:
import torch
import torch.nn as nn

class_weights = torch.tensor([1.0, 1.0, 1.0, 9.0], dtype=torch.float32).to('cuda')  # 가중치 예시 (클래스 4 가중치 높게)

class SegformerFinetuner(pl.LightningModule):
    
    def __init__(self, id2label, train_dataloader=None, val_dataloader=None, test_dataloader=None, metrics_interval=100):
        super(SegformerFinetuner, self).__init__()
        self.id2label = id2label
        self.metrics_interval = metrics_interval
        self.train_dl = train_dataloader
        self.val_dl = val_dataloader
        self.test_dl = test_dataloader
        
        self.num_classes = len(id2label.keys())
        self.label2id = {v: k for k, v in self.id2label.items()}
        
        self.model = SegformerForSemanticSegmentation.from_pretrained(
            "nvidia/segformer-b5-finetuned-cityscapes-1024-1024", 
            return_dict=False, 
            num_labels=self.num_classes,
            id2label=self.id2label,
            label2id=self.label2id,
            ignore_mismatched_sizes=True,
        )
        
        self.train_mean_iou = load_metric("mean_iou")
        self.val_mean_iou = load_metric("mean_iou")
        self.test_mean_iou = load_metric("mean_iou")
        
        # 손실 함수 (클래스별 가중치 적용)
        self.loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    
    def forward(self, images, masks):
        outputs = self.model(pixel_values=images, labels=masks)
        return outputs
    
    def training_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        
        loss, logits = outputs[0], outputs[1]
        
        # CrossEntropyLoss에서 가중치 적용
        loss = self.loss_fn(logits, masks)
        
        upsampled_logits = nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )

        predicted = upsampled_logits.argmax(dim=1)

        self.train_mean_iou.add_batch(
            predictions=predicted.detach().cpu().numpy(), 
            references=masks.detach().cpu().numpy()
        )
        if batch_nb % self.metrics_interval == 0:

            metrics = self.train_mean_iou.compute(
                num_labels=self.num_classes, 
                ignore_index=255, 
                reduce_labels=False,
            )
            
            metrics = {'loss': loss, "mean_iou": metrics["mean_iou"], "mean_accuracy": metrics["mean_accuracy"]}
            
            for k, v in metrics.items():
                self.log(k, v)
            
            return metrics
        else:
            return {'loss': loss}
    
    def validation_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        
        loss, logits = outputs[0], outputs[1]
        
        # CrossEntropyLoss에서 가중치 적용
        loss = self.loss_fn(logits, masks)
        
        upsampled_logits = nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted = upsampled_logits.argmax(dim=1)
        
        self.val_mean_iou.add_batch(
            predictions=predicted.detach().cpu().numpy(), 
            references=masks.detach().cpu().numpy()
        )
        
        return {'val_loss': loss}
    
    def validation_epoch_end(self, outputs):
        metrics = self.val_mean_iou.compute(
            num_labels=self.num_classes, 
            ignore_index=255, 
            reduce_labels=False,
        )
        
        avg_val_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        val_mean_iou = metrics["mean_iou"]
        val_mean_accuracy = metrics["mean_accuracy"]
        
        metrics = {"val_loss": avg_val_loss, "val_mean_iou": val_mean_iou, "val_mean_accuracy": val_mean_accuracy}
        for k, v in metrics.items():
            self.log(k, v)

        return metrics
    
    def test_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        
        loss, logits = outputs[0], outputs[1]
        
        # CrossEntropyLoss에서 가중치 적용
        loss = self.loss_fn(logits, masks)
        
        upsampled_logits = nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted = upsampled_logits.argmax(dim=1)
        
        self.test_mean_iou.add_batch(
            predictions=predicted.detach().cpu().numpy(), 
            references=masks.detach().cpu().numpy()
        )
        
        return {'test_loss': loss}
    
    def test_epoch_end(self, outputs):
        metrics = self.test_mean_iou.compute(
            num_labels=self.num_classes, 
            ignore_index=255, 
            reduce_labels=False,
        )
       
        avg_test_loss = torch.stack([x["test_loss"] for x in outputs]).mean()
        test_mean_iou = metrics["mean_iou"]
        test_mean_accuracy = metrics["mean_accuracy"]

        metrics = {"test_loss": avg_test_loss, "test_mean_iou": test_mean_iou, "test_mean_accuracy": test_mean_accuracy}
        
        for k, v in metrics.items():
            self.log(k, v)
        
        return metrics
    
    def configure_optimizers(self):
        return torch.optim.Adam([p for p in self.parameters() if p.requires_grad], lr=2e-05, eps=1e-08)
    
    def train_dataloader(self):
        return self.train_dl
    
    def val_dataloader(self):
        return self.val_dl
    
    def test_dataloader(self):
        return self.test_dl


In [25]:
import torch

# 클래스별 가중치 설정 (예: 클래스 4번의 가중치를 높임)
class_weights = torch.tensor([1.0, 1.0, 1.0, 9.0], dtype=torch.float32).to('cuda')  # 가중치 설정

# IoU 계산 함수
def compute_iou(predictions, targets, num_classes):
    ious = []
    for cls in range(num_classes):
        pred_inds = predictions == cls
        target_inds = targets == cls
        intersection = torch.sum(pred_inds & target_inds)
        union = torch.sum(pred_inds | target_inds)
        if union == 0:
            ious.append(float('nan'))  # NaN 대신 다른 값으로 처리 가능
        else:
            ious.append(float(intersection) / float(union))
    return torch.tensor(ious).nanmean()  # NaN을 제외한 평균 IoU 계산

# SegFormer fine-tuning 클래스
class SegformerFinetuner(pl.LightningModule):
    def __init__(self, id2label, train_dataloader=None, val_dataloader=None, test_dataloader=None, metrics_interval=100):
        super(SegformerFinetuner, self).__init__()
        self.id2label = id2label
        self.metrics_interval = metrics_interval
        self.train_dl = train_dataloader
        self.val_dl = val_dataloader
        self.test_dl = test_dataloader
        
        self.num_classes = len(id2label.keys())
        self.label2id = {v: k for k, v in self.id2label.items()}
        
        # Pretrained SegFormer 모델 로드
        self.model = SegformerForSemanticSegmentation.from_pretrained(
            "nvidia/segformer-b5-finetuned-cityscapes-1024-1024",
            num_labels=self.num_classes,
            id2label=self.id2label,
            label2id=self.label2id,
            ignore_mismatched_sizes=True
        )

        # 가중치를 적용한 손실 함수 정의
        self.loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights)

    def forward(self, images, masks):
        outputs = self.model(pixel_values=images, labels=masks)
        return outputs

    def training_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        logits = outputs[1]

        # Logits upsampling to match mask size
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        # 가중치를 적용한 손실 함수 사용
        loss = self.loss_fn(upsampled_logits, masks)
        predicted = upsampled_logits.argmax(dim=1)
        
        # IoU 계산
        mean_iou = compute_iou(predicted, masks, self.num_classes)
        
        metrics = {'loss': loss, 'mean_iou': mean_iou}
        
        if batch_nb % self.metrics_interval == 0:
            for k, v in metrics.items():
                self.log(k, v)

        return metrics

    def validation_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        logits = outputs[1]

        # Logits upsampling to match mask size
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        # 가중치를 적용한 손실 함수 사용
        loss = self.loss_fn(upsampled_logits, masks)
        predicted = upsampled_logits.argmax(dim=1)
        
        # IoU 계산
        mean_iou = compute_iou(predicted, masks, self.num_classes)
        
        return {'val_loss': loss, 'val_mean_iou': mean_iou}

    def validation_epoch_end(self, outputs):
        avg_val_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        avg_val_mean_iou = torch.stack([x["val_mean_iou"] for x in outputs]).mean()

        metrics = {"val_loss": avg_val_loss, "val_mean_iou": avg_val_mean_iou}
        for k, v in metrics.items():
            self.log(k, v)

        return metrics

    def test_step(self, batch, batch_nb):
        images, masks = batch['pixel_values'], batch['labels']
        
        outputs = self(images, masks)
        logits = outputs[1]

        # Logits upsampling to match mask size
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        # 가중치를 적용한 손실 함수 사용
        loss = self.loss_fn(upsampled_logits, masks)
        predicted = upsampled_logits.argmax(dim=1)
        
        # IoU 계산
        mean_iou = compute_iou(predicted, masks, self.num_classes)
        
        return {'test_loss': loss, 'test_mean_iou': mean_iou}

    def test_epoch_end(self, outputs):
        avg_test_loss = torch.stack([x["test_loss"] for x in outputs]).mean()
        avg_test_mean_iou = torch.stack([x["test_mean_iou"] for x in outputs]).mean()

        metrics = {"test_loss": avg_test_loss, "test_mean_iou": avg_test_mean_iou}
        for k, v in metrics.items():
            self.log(k, v)

        return metrics

    def configure_optimizers(self):
        return torch.optim.Adam([p for p in self.parameters() if p.requires_grad], lr=2e-05, eps=1e-08)

    def train_dataloader(self):
        return self.train_dl

    def val_dataloader(self):
        return self.val_dl

    def test_dataloader(self):
        return self.test_dl


In [26]:
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
feature_extractor.do_reduce_labels = False
feature_extractor.size = 512

train_dataset = SemanticSegmentationDataset(f"{dataset.location}/train/", feature_extractor)
val_dataset = SemanticSegmentationDataset(f"{dataset.location}/valid/", feature_extractor)
test_dataset = SemanticSegmentationDataset(f"{dataset.location}/test/", feature_extractor)

batch_size = 4
num_workers = 0
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=0)

segformer_finetuner = SegformerFinetuner(
    train_dataset.id2label, 
    train_dataloader=train_dataloader, 
    val_dataloader=val_dataloader, 
    test_dataloader=test_dataloader, 
    metrics_interval=10,
)

The class SegformerFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use SegformerImageProcessor instead.
The following named arguments are not valid for `SegformerFeatureExtractor.__init__` and were ignored: 'feature_extractor_type'
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b5-finetuned-cityscapes-1024-1024 and are newly initialized because the shapes did not match:
- decode_head.classifier.weight: found shape torch.Size([19, 768, 1, 1]) in the checkpoint and torch.Size([5, 768, 1, 1]) in the model instantiated
- decode_head.classifier.bias: found shape torch.Size([19]) in the checkpoint and torch.Size([5]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
early_stop_callback = EarlyStopping(
    monitor="val_loss", 
    min_delta=0.00, 
    patience=10, 
    verbose=False, 
    mode="min",
)

checkpoint_callback = ModelCheckpoint(save_top_k=1, monitor="val_loss")

trainer = pl.Trainer(
    gpus=[1], 
    callbacks=[early_stop_callback, checkpoint_callback],
    max_epochs=500,
    val_check_interval=len(train_dataloader),
)
trainer.fit(segformer_finetuner)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name    | Type                             | Params
-------------------------------------------------------------
0 | model   | SegformerForSemanticSegmentation | 84.6 M
1 | loss_fn | CrossEntropyLoss                 | 0     
-------------------------------------------------------------
84.6 M    Trainable params
0         Non-trainable params
84.6 M    Total params
338.389   Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

RuntimeError: weight tensor should be defined either for all or no classes

In [23]:
import torch
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# IoU 계산 함수
def compute_iou(predictions, targets, num_classes):
    ious = []
    for cls in range(num_classes):
        pred_inds = predictions == cls
        target_inds = targets == cls
        intersection = np.sum(pred_inds & target_inds)
        union = np.sum(pred_inds | target_inds)
        if union == 0:
            ious.append(float('nan'))  # NaN 대신 처리할 값을 넣을 수 있음
        else:
            ious.append(float(intersection) / float(union))
    return np.nanmean(ious)  # NaN을 제외한 평균 IoU

# 클래스별로 정밀도, 재현율, F1-score, 정확도 계산 함수
def compute_classwise_metrics(predictions, targets, num_classes):
    precision = precision_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    recall = recall_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    f1 = f1_score(targets.flatten(), predictions.flatten(), average=None, labels=range(num_classes))
    accuracy = accuracy_score(targets.flatten(), predictions.flatten())
    
    return precision, recall, f1, accuracy

# 모델 추론 및 성능 지표 계산 함수
def evaluate_model_on_test_data(dataloader, model, num_classes):
    iou_list = []
    precision_list, recall_list, f1_list = [], [], []
    
    for batch in dataloader:
        images, masks = batch['pixel_values'], batch['labels']
        
        with torch.no_grad():
            outputs = model(pixel_values=images)
        
        logits = outputs.logits
        upsampled_logits = torch.nn.functional.interpolate(
            logits, 
            size=masks.shape[-2:], 
            mode="bilinear", 
            align_corners=False
        )
        
        predicted_mask = upsampled_logits.argmax(dim=1).cpu().numpy()
        masks = masks.cpu().numpy()

        # IoU 계산
        iou = compute_iou(predicted_mask, masks, num_classes)
        iou_list.append(iou)

        # 정밀도, 재현율, F1-score 계산
        precision, recall, f1, accuracy = compute_classwise_metrics(predicted_mask, masks, num_classes)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)

    avg_iou = np.mean(iou_list)
    avg_precision = np.mean(precision_list, axis=0)
    avg_recall = np.mean(recall_list, axis=0)
    avg_f1 = np.mean(f1_list, axis=0)

    return avg_iou, avg_precision, avg_recall, avg_f1, accuracy

# 모델 성능 평가
num_classes = 3  # 클래스 개수
avg_iou, avg_precision, avg_recall, avg_f1, accuracy = evaluate_model_on_test_data(test_dataloader, segformer_finetuner.model, num_classes)

# 결과 출력
print(f"Average IoU: {avg_iou}")
for cls in range(num_classes):
    print(f"Class {cls}: Precision={avg_precision[cls]}, Recall={avg_recall[cls]}, F1-score={avg_f1[cls]}")
print(f"Overall Accuracy: {accuracy}")


Average IoU: 0.6688463198737511
Class 0: Precision=0.9695340228732868, Recall=0.9449876859777527, F1-score=0.9570278151299586
Class 1: Precision=0.9443783142133068, Recall=0.949859765523102, F1-score=0.9469625617292355
Class 2: Precision=0.2722608188601273, Recall=0.38854969608744055, F1-score=0.3182928413574594
Overall Accuracy: 0.9479942321777344
