In [90]:
%matplotlib inline
import os

import cv2
import numpy as np

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from transformers import SegformerFeatureExtractor
from torchvision import transforms

from utilities import AITEXPatchedSegmentation
from model_architectures import BinaryClassifier, MiniUNet


In [48]:
class SemanticSegmentationDataset(AITEXPatchedSegmentation):
    """Image (semantic) segmentation dataset."""

    def __init__(self, *args, feature_extractor, **kwargs,):
        super(SemanticSegmentationDataset, self).__init__(*args, **kwargs)
        self.feature_extractor = feature_extractor

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        
        image = self.defect_images[idx]
        mask = self.defect_masks[idx]

        encoded_inputs = self.feature_extractor(image.expand(3, 256, 256), mask, return_tensors="pt")

        for k,v in encoded_inputs.items():
          encoded_inputs[k].squeeze_()

        return encoded_inputs

In [91]:
# Define paths
root = os.path.abspath(os.path.join(os.getcwd(), ".."))
model_dir = os.path.join(root, "models")
data_dir = os.path.join(root, "data")
aitex_dir = os.path.join(data_dir, "aitex")
transform = transforms.Compose([])

data = SemanticSegmentationDataset(aitex_dir, feature_extractor=feature_extractor, transform=transform)

bs = 4
train = DataLoader(data, batch_size=bs)

In [92]:
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
feature_extractor.do_reduce_labels = False
feature_extractor.size = 128



In [93]:
from transformers import SegformerForSemanticSegmentation

model = SegformerForSemanticSegmentation.from_pretrained(
    "nvidia/segformer-b0-finetuned-ade-512-512", 
    return_dict=False, 
    num_labels=1,
    # id2label=self.id2label,
    # label2id=self.label2id,
    ignore_mismatched_sizes=True,
)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b0-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.weight: found shape torch.Size([150, 256, 1, 1]) in the checkpoint and torch.Size([1, 256, 1, 1]) in the model instantiated
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([1]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [86]:
from datasets import load_metric

train_mean_iou = load_metric("mean_iou")

x = data[0]['pixel_values'].reshape(1, 3, 128, 128)
y = data[0]['labels'].reshape(1, 128, 128)
outputs = model(pixel_values=x, labels=y)
loss, logits = outputs[0], outputs[1]

upsampled_logits = nn.functional.interpolate(
    logits, 
    size=y.shape[-2:], 
    mode="bilinear", 
    align_corners=False
)

predicted = upsampled_logits.argmax(dim=1)

train_mean_iou.add_batch(
    predictions=predicted.detach().cpu().numpy(), 
    references=y.detach().cpu().numpy()
)
train_mean_iou.compute(
    num_labels=1, 
    ignore_index=255, 
    reduce_labels=False,
)

{'mean_iou': 0.97845458984375,
 'mean_accuracy': 1.0,
 'overall_accuracy': 1.0,
 'per_category_iou': array([0.97845459]),
 'per_category_accuracy': array([1.])}

In [107]:
import torch
from torch import nn
from sklearn.metrics import accuracy_score
from tqdm import tqdm

# define optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00006)
# move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
metric = load_metric("mean_iou")
model.train()
for epoch in range(200):  # loop over the dataset multiple times
   print("Epoch:", epoch)
   for batch in data:
      # get the inputs;
      pixel_values = batch["pixel_values"].reshape(1, 3, 128, 128).to(device)
      labels = batch["labels"].reshape(1, 128, 128).to(device)

      # zero the parameter gradients
      optimizer.zero_grad()

      # forward + backward + optimize
      outputs = model(pixel_values=pixel_values, labels=labels)
      loss, logits = outputs
      
      loss.backward()
      optimizer.step()

      # evaluate
      with torch.no_grad():
         upsampled_logits = nn.functional.interpolate(logits, size=labels.shape[-2:], mode="bilinear", align_corners=False)
         predicted = upsampled_logits.argmax(dim=1)
         
         # note that the metric expects predictions + labels as numpy arrays
         metric.add_batch(predictions=predicted.detach().cpu().numpy(), references=labels.detach().cpu().numpy())
         
   metrics = metric.compute(num_labels=1, 
                           ignore_index=255,
                           reduce_labels=False, # we've already reduced the labels before)
   )

   print("Loss:", loss.item())
   print("Mean_iou:", metrics["mean_iou"])
   print("Mean accuracy:", metrics["mean_accuracy"])

Epoch: 0
Loss: 0.04712343215942383
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 1
Loss: 0.056149400770664215
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 2
Loss: 0.057191986590623856
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 3
Loss: 0.0935225635766983
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 4
Loss: 0.04893092066049576
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 5
Loss: 0.05527281016111374
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 6
Loss: 0.05002467334270477
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 7
Loss: 0.06600642204284668
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 8
Loss: 0.0592881515622139
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 9
Loss: 0.051115769892930984
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 10
Loss: 0.05140042304992676
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
Epoch: 11
Loss: 0.05138517916202545
Mean_iou: 0.9598319388724662
Mean accuracy: 1.0
E

KeyboardInterrupt: 

In [100]:
len(train)

62