In [29]:
import wandb
from pathlib import Path
import os
from tqdm.auto import tqdm
from PIL import Image
import numpy as np
import torch
from torch import nn
from datasets import load_dataset
from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchinfo import summary
import torchvision.models as models
from tqdm.auto import tqdm
import torchmetrics
from torchmetrics import Accuracy
import json

In [3]:
os.environ['WANDB_NOTEBOOK_NAME']='EDA.ipynb'
os.environ["KMP_DUPLICATE_LIB_OK"]='TRUE'

In [3]:
Logging_Place= 'LOCAL'

if Logging_Place == 'LOCAL':
    os.environ['WANDB_BASE_URL']="http://10.24.1.19:8080"
    os.environ['WANDB_API_KEY']='local-96dd72cbb60b0155149e2f9dc985a636ffa77b28'
    ! wandb login --host=http://10.24.1.19:8080

elif Logging_Place =="CLOUD":
    os.environ['WANDB_BASE_URL']="https://api.wandb.ai"
    os.environ['WANDB_API_KEY']='be69ec2b537cb972b0106fabd867f0dc68c4468a'
    ! wandb login --host=https://api.wandb.ai

^C


wandb: Network error (HTTPError), entering retry loop.
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin


In [4]:
data_path=Path('COD10K-v2')
train_path=data_path / 'Train'
test_path= data_path / 'Test'

train_images_path= train_path / 'Images/Image'
train_labels_path= train_path / 'GT_Objects/GT_Object'

test_images_path= test_path / 'Images/Image'
test_labels_path= test_path / 'GT_Objects/GT_Object'

In [5]:
train_images_list=list(train_images_path.glob('*.jpg'))
print(train_images_list[0:5])
train_labels_list=list(train_labels_path.glob('*.png'))
print(train_labels_list[0:5])

[WindowsPath('COD10K-v2/Train/Images/Image/COD10K-CAM-1-Aquatic-1-BatFish-1.jpg'), WindowsPath('COD10K-v2/Train/Images/Image/COD10K-CAM-1-Aquatic-1-BatFish-3.jpg'), WindowsPath('COD10K-v2/Train/Images/Image/COD10K-CAM-1-Aquatic-1-BatFish-7.jpg'), WindowsPath('COD10K-v2/Train/Images/Image/COD10K-CAM-1-Aquatic-1-BatFish-8.jpg'), WindowsPath('COD10K-v2/Train/Images/Image/COD10K-CAM-1-Aquatic-1-BatFish-9.jpg')]
[WindowsPath('COD10K-v2/Train/GT_Objects/GT_Object/COD10K-CAM-1-Aquatic-1-BatFish-1.png'), WindowsPath('COD10K-v2/Train/GT_Objects/GT_Object/COD10K-CAM-1-Aquatic-1-BatFish-3.png'), WindowsPath('COD10K-v2/Train/GT_Objects/GT_Object/COD10K-CAM-1-Aquatic-1-BatFish-7.png'), WindowsPath('COD10K-v2/Train/GT_Objects/GT_Object/COD10K-CAM-1-Aquatic-1-BatFish-8.png'), WindowsPath('COD10K-v2/Train/GT_Objects/GT_Object/COD10K-CAM-1-Aquatic-1-BatFish-9.png')]


In [6]:
super_classes_list=[ i.stem.split('-')[3] for i in train_images_list]
super_classes_list=[*set(super_classes_list)]
print(super_classes_list)


['Aquatic', 'Terrestrial', 'Terrestial', 'Amphibian', 'Flying', 'Background', 'Other']


In [7]:
index=5
sub_classes_list=[]
for i in train_images_list:
    split_path=i.stem.split('-')
    count=len(split_path)
    if (index<count):
        sub_classes_list.append(split_path[index])
sub_classes_list=[*set(sub_classes_list)]
print(sub_classes_list)

['Pipefish', 'Caterpillar', 'Deer', 'SeaHorse', 'Mockingbird', 'Owl', 'Stingaree', 'Centipede', 'GhostPipefish', 'Bug', 'Flounder', 'Shrimp', 'Mantis', 'StarFish', 'Dog', 'Giraffe', 'FrogFish', 'Kangaroo', 'Fish', 'Turtle', 'Reccoon', 'Butterfly', 'Leopard', 'Indoor', 'Human', 'Grasshopper', 'Spider', 'Toad', 'Dragonfly', 'Bittern', 'Owlfly', 'Crab', 'Crocodile', 'Sky', 'Heron', 'ClownFish', 'Lizard', 'Lion', 'Snake', 'StickInsect', 'Gecko', 'Wolf', 'Sheep', 'Monkey', 'Beetle', 'Vegetation', 'Octopus', 'Katydid', 'Frog', 'Other', 'Cat', 'Sciuridae', 'Bee', 'BatFish', 'CrocodileFish', 'Cheetah', 'Cicada', 'Ocean', 'Pagurian', 'ScorpionFish', 'Bird', 'Moth', 'Sand', 'Bat', 'Ant', 'Duck', 'Chameleon', 'Worm', 'Tiger', 'LeafySeaDragon', 'Slug', 'Grouse', 'Frogmouth', 'Rabbit']


#####    Setting up dataloaders

In [21]:
class CamouflageDataset(Dataset):
    def __init__(
        self,
        root_dir: str,
        feature_extractor,
        split:str
    ):
        self.root_dir=Path(root_dir)
        if split=="train":
            self.data_dir=self.root_dir / "Train"
        elif split=="test":
            self.data_dir=self.root_dir /"Test"

        self.image_dir=self.data_dir / "Images/Image"
        self.gt_dir=self.data_dir / "GT_objects/GT_Object"

        self.feature_extractor=feature_extractor

        self.unsorted_image_list= list(self.image_dir.glob('*.jpg'))
        self.sorted_image_list=sorted(self.unsorted_image_list)

        self.unsorted_gt_list=list(self.gt_dir.glob('*.png'))
        self.sorted_gt_list=sorted(self.unsorted_gt_list)

        #print(self.sorted_image_list[0:5])
        #print(self.sorted_gt_list[0:5])

        assert (len(self.sorted_gt_list)==len(self.sorted_image_list)), "Number of images and ground truths must be same"

    def __getitem__(self, idx):
        img=Image.open(self.sorted_image_list[idx])
        gt=Image.open(self.sorted_gt_list[idx])

        encoded_input=self.feature_extractor(img,gt,return_tensors="pt")

        for k,v in encoded_input.items():
            encoded_input[k].squeeze_()
        return encoded_input
        

    def __len__(self):
        return len(self.sorted_image_list)

In [22]:
feature_extractor=SegformerImageProcessor()
train_dataset=CamouflageDataset(root_dir='COD10K-v2', feature_extractor= feature_extractor, split="train")
test_dataset=CamouflageDataset(root_dir='COD10K-v2', feature_extractor= feature_extractor, split="test")

In [23]:
encoded_input=train_dataset[0]
print(encoded_input["pixel_values"].shape)
print(encoded_input["labels"].shape)

torch.Size([3, 512, 512])
torch.Size([512, 512])


In [24]:
train_dataloader=DataLoader(train_dataset,batch_size=1, shuffle=True)
test_dataloader=DataLoader(test_dataset,batch_size=1, shuffle=False)

In [28]:
batch=next(iter(train_dataloader))
print(batch["pixel_values"].shape)
print(batch["labels"].shape)

torch.Size([1, 3, 512, 512])
torch.Size([1, 512, 512])


In [26]:
print(len(train_dataset))
print(len(test_dataset))

6000
4000


##### Model definitions

In [30]:
id2label=json.load(open('COD10K-v2.json', "r"))
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()}

In [31]:
print(id2label)
print(label2id)

{0: 'background', 1: 'camouflage object'}
{'background': 0, 'camouflage object': 1}


In [34]:
model = SegformerForSemanticSegmentation.from_pretrained("mit-b0",
                                                         num_labels=2, 
                                                         id2label=id2label, 
                                                         label2id=label2id
)

Some weights of the model checkpoint at mit-b0 were not used when initializing SegformerForSemanticSegmentation: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at mit-b0 and are newly initialized: ['decode_head.linear_c.3.proj.weight', 'decode_head.classifier.weight', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.linear_fuse.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.0.pr

#### Model train loop