In [105]:
from torchvision.datasets import Cityscapes
from torchvision import transforms
from torch.utils.data import DataLoader
import torchvision.transforms.functional as TF
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [66]:
from torchvision.datasets import Cityscapes
from torchvision import transforms
import torchvision.transforms.functional as TF
import torch
import numpy as np

class CityscapesDataset(Cityscapes):
    def __init__(self, root, split='train', mode='fine', target_type='semantic', shrinkToSize=None, 
                 cropHeight=512, cropWidth=1024, transform=None, target_transform=None):
        super(CityscapesDataset, self).__init__(root, split=split, mode=mode, target_type=target_type)
        
        #defining constants
        self.n_classes = 19
        self.void_classes = [0, 1, 2, 3, 4, 5, 6, 9, 10, 14, 15, 16, 18, 29, 30, -1]
        self.valid_classes = [ 7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33]
        self.class_names = [
            "road", "sidewalk", "building", "wall", "fence", "pole", "traffic_light", "traffic_sign",
            "vegetation", "terrain", "sky", "person", "rider", "car", "truck", "bus", "train", "motorcycle", 
            "bicycle",
        ]

        self.ignore_index = -1
        self.class_map = dict(zip(self.valid_classes, range(self.n_classes)))
        self.rev_class_map = dict(zip(range(self.n_classes), self.valid_classes))

        
        #defining transform params
        self.op_size = (cropHeight, cropWidth)
        self.transform = transform
        self.target_transform = target_transform

        
        if shrinkToSize is not None:
            self.images = self.images[:shrinkToSize]
            self.targets = self.targets[:shrinkToSize]
        
        return
    
    def __getitem__(self, index):
        img, lbl = super().__getitem__(index)
        img, lbl = self.transformImgMask(img, lbl)
        
        # remove void classes from label
        lbl = self.encode_segmap(lbl)
        
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            lbl = self.target_transform(lbl)
        
        return img, lbl
        
    def __len__(self):
        return len(self.images)
    
    def encode_segmap(self, mask):
        # Put all void classes to zero
        for _voidc in self.void_classes:
            mask[mask == _voidc] = self.ignore_index
        for _validc in self.valid_classes:
            mask[mask == _validc] = self.class_map[_validc]
        return mask
    
    def transformImgMask(self, image, mask):
        
        #crop only training images
        if self.split=='train':
            # Random Crop
            i, j, h, w = transforms.RandomCrop.get_params(image, output_size=self.op_size)
            image = TF.crop(image, i, j, h, w)
            mask = TF.crop(mask, i, j, h, w)
        
        # Implement other transforms like vertical and horizontal flips
        
        # Transform to Tensor
        image = self.transformToTensor(image, normalize=True)
        mask = self.transformToTensor(mask, normalize=False)
        return image, mask
    
    def transformToTensor(self, img, normalize=False):
        
        res = torch.from_numpy(np.array(img, np.int64, copy=False))
        res = res.view(img.size[1], img.size[0], len(img.getbands()))
        res = res.permute((2, 0, 1)).contiguous()
        if normalize:
            return res.float().div(255)
        return res

In [84]:
cd = CityscapesDataset("./data", split='val', target_type='semantic', shrinkToSize=50)

In [201]:
img, sem = cd[0]
# cd.transform(img, sem)

In [202]:
img.shape
sem.shape

torch.Size([3, 512, 1024])

In [89]:
bs=1

In [90]:
dl = DataLoader(cd, batch_size=bs, num_workers=0, shuffle=True)

In [4]:
len(cd)

100

In [103]:
transforms.RandomCrop.get_params(img, output_size=(512, 1024))

(498, 172, 512, 1024)

In [104]:
x = np.array([[1,2,3],[4,5,6]])
o = torch.from_numpy(x)
o

tensor([[1, 2, 3],
        [4, 5, 6]], dtype=torch.int32)

In [68]:
pic = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
pic.shape

torch.Size([6291456])

In [66]:
pic

tensor([172, 206, 201,  ...,  55,  70,  63], dtype=torch.uint8)

In [174]:
# lbl = torch.from_numpy(sem).long()
# lbl.shape
pic2 = torch.from_numpy(np.array(sem, np.int32, copy=False))
pic2 = pic2.view(sem.size[1], sem.size[0], len(sem.getbands()))
pic2 = pic2.permute((2, 0, 1)).contiguous()
pic2.shape

torch.Size([1, 512, 1024])

In [94]:
u = np.unique(pic2)
len(u)

16

In [96]:
for i in range(len(u)):
    idx = u[i]
    print (cd.classes[idx].name)

unlabeled
ego vehicle
out of roi
static
road
sidewalk
building
pole
traffic sign
vegetation
terrain
sky
person
rider
car
bicycle


### Caluclate Weights for CrossEntropy Loss function

In [92]:
freq = np.zeros(cd.n_classes)
voids=0
p=0
for img, lbls in dl:
    
#     fig, ax = plt.subplots(1,1)
#     ax.imshow(np.transpose(img[0], [1,2,0]))
#     plt.show()
    
    if p==0:
        print(len(dl))
        print(lbls.shape)
    
    p+=1
    uniques, counts = np.unique(lbls, return_counts=True)
    
    for j,uni in enumerate(uniques):
        if uni==-1:
            voids+=counts[j]
        else:
            freq[uni] += counts[j]
    
    print("done ",p*bs)
print(freq)

50
torch.Size([1, 1, 1024, 2048])
done  1
done  2
done  3
done  4
done  5
done  6
done  7
done  8
done  9
done  10
done  11
done  12
done  13
done  14
done  15
done  16
done  17
done  18
done  19
done  20
done  21
done  22
done  23
done  24
done  25
done  26
done  27
done  28
done  29
done  30
done  31
done  32
done  33
done  34
done  35
done  36
done  37
done  38
done  39
done  40
done  41
done  42
done  43
done  44
done  45
done  46
done  47
done  48
done  49
done  50
[37077183.  5094904. 17139713.   343017.   953831.  2120990.   200740.
   536166. 15092411.   711004.  3520355.   751515.   167691.  5188745.
   708579.   302469.   101684.   211537.   298767.]


In [93]:
print("-1 ",voids,"\t unlabelled ")
for i in range(cd.n_classes):
    print(i," ",freq[i],"\t ",cd.classes[cd.rev_class_map[i]].name, " ", cd.class_names[i])


-1  14336299 	 unlabelled 
0   37077183.0 	  road   road
1   5094904.0 	  sidewalk   sidewalk
2   17139713.0 	  building   building
3   343017.0 	  wall   wall
4   953831.0 	  fence   fence
5   2120990.0 	  pole   pole
6   200740.0 	  traffic light   traffic_light
7   536166.0 	  traffic sign   traffic_sign
8   15092411.0 	  vegetation   vegetation
9   711004.0 	  terrain   terrain
10   3520355.0 	  sky   sky
11   751515.0 	  person   person
12   167691.0 	  rider   rider
13   5188745.0 	  car   car
14   708579.0 	  truck   truck
15   302469.0 	  bus   bus
16   101684.0 	  train   train
17   211537.0 	  motorcycle   motorcycle
18   298767.0 	  bicycle   bicycle


In [97]:
print("images scanned: "+str((np.sum(freq)+voids)/(1024*2048)))

images scanned: 50.0


In [109]:
max_val = np.max(freq)
weights = np.zeros([cd.n_classes, 2])

for i in range(cd.n_classes):
    weights[i][0] = i//1
    weights[i][1] = max_val/freq[i]

print(weights)

[[  0.           1.        ]
 [  1.           7.27730748]
 [  2.           2.16323243]
 [  3.         108.09138614]
 [  4.          38.8718578 ]
 [  5.          17.48107393]
 [  6.         184.70251569]
 [  7.          69.15243227]
 [  8.           2.45667727]
 [  9.          52.14764333]
 [ 10.          10.53222843]
 [ 11.          49.3365841 ]
 [ 12.         221.10419164]
 [ 13.           7.1456938 ]
 [ 14.          52.32611043]
 [ 15.         122.5817621 ]
 [ 16.         364.63143661]
 [ 17.         175.27516699]
 [ 18.         124.10066373]]


In [146]:
weight_df = pd.DataFrame(weights, columns=[ "class_id", "class_weight"])
class_names_df = pd.Series(cd.class_names)
weight_df = pd.concat((class_names_df.rename('class_name'), weight_df), axis=1)

In [147]:
weight_df.to_csv('class_weights.csv')

In [148]:
try1 = pd.read_csv('class_weights.csv')
try1.head()

Unnamed: 0.1,Unnamed: 0,class_name,class_id,class_weight
0,0,road,0.0,1.0
1,1,sidewalk,1.0,7.277307
2,2,building,2.0,2.163232
3,3,wall,3.0,108.091386
4,4,fence,4.0,38.871858


In [154]:
x = torch.Tensor(try1['class_weight'])
x.shape

torch.Size([19])