Based off of https://colab.research.google.com/drive/1_t3KvF3qg4IJfEhTuftFI1GSlscapNgf?usp=sharing

In [1]:
!pip install --upgrade transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m54.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.3 tokenizers-0.13.2 transformers-4.27.4


In [2]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torchvision import transforms

from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
from transformers import AdamW

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from PIL import Image
import os
import pandas as pd
import cv2
import albumentations as aug
import numpy as np
import glob
import re
import matplotlib.pyplot as plt 

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
numbers = re.compile(r'(\d+)')

def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

In [5]:
# Define your transformations
image_transform = transforms.Compose([
    transforms.ToTensor()
])

mask_transform = transforms.Compose([
    transforms.ToTensor()
  
])

class SemanticSegmentationDataset(Dataset):
    def __init__(self, image_paths, mask_paths, image_transform=image_transform, mask_transform=mask_transform):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.image_transform = image_transform
        self.mask_transform = mask_transform
      
        assert len(self.image_paths) == len(self.mask_paths), "Number of masks and images does not match :("
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        # Load image and mask

        assert os.path.basename(self.image_paths[idx]) == os.path.basename(self.mask_paths[idx]), "Image and mask don't match!"

        image = Image.open(self.image_paths[idx])
        image = image.convert('RGB')
        
        mask = Image.open(self.mask_paths[idx])
        mask = image.convert('L')

         # Apply transformations
        if self.image_transform:
             image = self.image_transform(image)  
             mask = self.mask_transform(mask)   

        feature_extractor = SegformerImageProcessor(align=False, reduce_zero_label=False)
        image_features = feature_extractor(image)
        image_tensor = torch.tensor(image_features.pixel_values)
        image_tensor = torch.squeeze(image_tensor, dim = 0)
        
        mask = torch.squeeze(mask, dim =0)
        mask = mask.long()
        
        #print(mask.shape)
        #print(image_tensor.shape)

        return image_tensor, mask
    


In [6]:
image_path = '/content/drive/MyDrive/Hops/512/Input'
mask_path =  '/content/drive/MyDrive/Hops/512/Target'

images = sorted(glob.glob(image_path + '/*.png'), key = numericalSort)
masks = sorted(glob.glob(mask_path + '/*.png'), key = numericalSort)

train_val_images, test_images, train_val_masks, test_masks = train_test_split(images, masks, test_size=0.2, random_state=42)
train_images, val_images, train_masks, val_masks = train_test_split(train_val_images, train_val_masks, test_size=0.25, random_state=42)

train_dataset = SemanticSegmentationDataset(train_images, train_masks, image_transform=image_transform, mask_transform=mask_transform)
val_dataset = SemanticSegmentationDataset(val_images, val_masks, image_transform=image_transform, mask_transform=mask_transform)
test_dataset = SemanticSegmentationDataset(test_images, test_masks, image_transform=image_transform, mask_transform=mask_transform)

batch_size = 4
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

print("Number of training examples:", len(train_dataloader.dataset))
print("Number of validation examples:", len(val_dataloader.dataset))
print("Number of test examples:", len(test_dataloader.dataset))

Number of training examples: 408
Number of validation examples: 136
Number of test examples: 136


In [7]:
batch = next(iter(train_dataloader))

  image_tensor = torch.tensor(image_features.pixel_values)


In [8]:
id2label = {0: 'background', 1: 'foreground'}
label2id = {'background': 0, 'foreground': 1}


model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b0", ignore_mismatched_sizes=True,
                                                         num_labels=len(id2label), id2label=id2label, label2id=label2id,
                                                         reshape_last_stage=True)

Downloading (…)lve/main/config.json:   0%|          | 0.00/70.0k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/14.4M [00:00<?, ?B/s]

Some weights of the model checkpoint at nvidia/mit-b0 were not used when initializing SegformerForSemanticSegmentation: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.batch_norm.bias', 'decode_head.linear_c.3.proj.bias', 'decode_head.classif

In [9]:
optimizer = AdamW(model.parameters(), lr=0.00006)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("Model Initialized!")

Model Initialized!




In [10]:
torch.cuda.empty_cache()

for epoch in range(1,11):  # loop over the dataset multiple times
    print("Epoch:", epoch)
    pbar = tqdm(train_dataloader)
    accuracies = []
    losses = []
    val_accuracies = []
    val_losses = []
    model.train()
    for idx, batch in enumerate(pbar):
        # get the inputs;
        pixel_values = batch[0].to(device)
        
        labels = batch[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        outputs = model(pixel_values=pixel_values, labels=labels)

        # evaluate
        upsampled_logits = nn.functional.interpolate(outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False)
        predicted = upsampled_logits.argmax(dim=1)

        mask = (labels != 255) # we don't include the background class in the accuracy calculation
        pred_labels = predicted[mask].detach().cpu().numpy()
        true_labels = labels[mask].detach().cpu().numpy()
        accuracy = accuracy_score(pred_labels, true_labels)
        loss = outputs.loss
        accuracies.append(accuracy)
        losses.append(loss.item())
        pbar.set_postfix({'Batch': idx, 'Pixel-wise accuracy': sum(accuracies)/len(accuracies), 'Loss': sum(losses)/len(losses)})

        # backward + optimize
        loss.backward()
        optimizer.step()
    else:
         model.eval()
         with torch.no_grad():
             for idx, batch in enumerate(val_dataloader):
                 pixel_values = batch[0].to(device)
                 labels = batch[1].to(device)

                 outputs = model(pixel_values=pixel_values, labels=labels)
                 upsampled_logits = nn.functional.interpolate(outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False)
                 predicted = upsampled_logits.argmax(dim=1)

                 mask = (labels != 255) # we don't include the background class in the accuracy calculation
                 pred_labels = predicted[mask].detach().cpu().numpy()
                 true_labels = labels[mask].detach().cpu().numpy()
                 accuracy = accuracy_score(pred_labels, true_labels)
                 val_loss = outputs.loss
                 val_accuracies.append(accuracy)
                 val_losses.append(val_loss.item())

    print(f"Train Pixel-wise accuracy: {sum(accuracies)/len(accuracies)}\
         Train Loss: {sum(losses)/len(losses)}\
         Val Pixel-wise accuracy: {sum(val_accuracies)/len(val_accuracies)}\
         Val Loss: {sum(val_losses)/len(val_losses)}")

Epoch: 1


  0%|          | 0/102 [00:00<?, ?it/s]

Train Pixel-wise accuracy: 0.9313557381723442         Train Loss: 0.39529330458711176         Val Pixel-wise accuracy: 0.9959718760322122         Val Loss: 0.2025966293671552
Epoch: 2


  0%|          | 0/102 [00:00<?, ?it/s]

Train Pixel-wise accuracy: 0.9971135550854253         Train Loss: 0.15481856954740544         Val Pixel-wise accuracy: 0.9989668621736414         Val Loss: 0.09702389029895558
Epoch: 3


  0%|          | 0/102 [00:00<?, ?it/s]

Train Pixel-wise accuracy: 0.9974942581326354         Train Loss: 0.09080282459948577         Val Pixel-wise accuracy: 0.9989668621736414         Val Loss: 0.06196420464445563
Epoch: 4


  0%|          | 0/102 [00:00<?, ?it/s]

Train Pixel-wise accuracy: 0.997582239263198         Train Loss: 0.05936893389797678         Val Pixel-wise accuracy: 0.9989668621736414         Val Loss: 0.04278742784962935
Epoch: 5


  0%|          | 0/102 [00:00<?, ?it/s]

Train Pixel-wise accuracy: 0.9976047347573673         Train Loss: 0.0430760869354594         Val Pixel-wise accuracy: 0.9989668621736414         Val Loss: 0.032241197795990634
Epoch: 6


  0%|          | 0/102 [00:00<?, ?it/s]

Train Pixel-wise accuracy: 0.9976139816583371         Train Loss: 0.036830951003175155         Val Pixel-wise accuracy: 0.9989668621736414         Val Loss: 0.024947835625532794
Epoch: 7


  0%|          | 0/102 [00:00<?, ?it/s]

Train Pixel-wise accuracy: 0.9976160012039483         Train Loss: 0.03065206899362452         Val Pixel-wise accuracy: 0.9989668621736414         Val Loss: 0.01999157719204531
Epoch: 8


  0%|          | 0/102 [00:00<?, ?it/s]

Train Pixel-wise accuracy: 0.9976131495307473         Train Loss: 0.025955762211963825         Val Pixel-wise accuracy: 0.9989668621736414         Val Loss: 0.01610897043172051
Epoch: 9


  0%|          | 0/102 [00:00<?, ?it/s]

Train Pixel-wise accuracy: 0.9976160853516822         Train Loss: 0.022287590100484735         Val Pixel-wise accuracy: 0.9989668621736414         Val Loss: 0.014712042640894651
Epoch: 10


  0%|          | 0/102 [00:00<?, ?it/s]

Train Pixel-wise accuracy: 0.9976100080153522         Train Loss: 0.02041995896976076         Val Pixel-wise accuracy: 0.9989668621736414         Val Loss: 0.01421600385733387


In [12]:
model.save_pretrained("/content/drive/MyDrive/Hops/512/mit-b0")

In [19]:
from transformers import AutoConfig, AutoModel

config_path = '/content/drive/MyDrive/Hops/512/mit-b0/config.json'
config = AutoConfig.from_pretrained(config_path)

model_path = "/content/drive/MyDrive/Hops/512/mit-b0"
model = AutoModel.from_pretrained(model_path)

Some weights of the model checkpoint at /content/drive/MyDrive/Hops/512/mit-b0 were not used when initializing SegformerModel: ['decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.batch_norm.bias', 'decode_head.linear_c.3.proj.bias', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.batch_norm.running_mean', 'decode_head.linear_c.3.proj.weight', 'decode_head.batch_norm.weight', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.batch_norm.running_var', 'decode_head.linear_fuse.weight']
- This IS expected if you are initializing SegformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerMo