# Dataset

## Data path list Function

In [1]:
import os.path as osp

In [2]:
def make_datapath_list(rootpath):
    # Template
    imgpath_template = osp.join(rootpath, 'JPEGImages', '%s.jpg')
    annopath_template = osp.join(rootpath, 'SegmentationClass','%s.png')
    
    # File IDs
    train_id_names = osp.join(rootpath + 'ImageSets/Segmentation/train.txt')
    val_id_names = osp.join(rootpath + 'ImageSets/Segmentation/val.txt')
    
    # Path lists for Train
    train_img_list = list()
    train_anno_list = list()
    
    for line in open(train_id_names):
        file_id = line.strip()
        img_path = (imgpath_template % file_id)
        anno_path = (annopath_template % file_id)
        train_img_list.append(img_path)
        train_anno_list.append(anno_path)
    
    # Path lists for Val
    val_img_list = list()
    val_anno_list = list()
    
    for line in open(val_id_names):
        file_id = line.strip()
        img_path = (imgpath_template % file_id)
        anno_path = (annopath_template % file_id)
        val_img_list.append(img_path)
        val_anno_list.append(anno_path)
        
    return train_img_list, train_anno_list, val_img_list, val_anno_list

In [3]:
rootpath = './data/VOCdevkit/VOC2012/'
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(rootpath=rootpath)

print(train_img_list)
print(train_anno_list)

['./data/VOCdevkit/VOC2012/JPEGImages/2007_000032.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000039.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000063.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000068.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000121.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000170.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000241.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000243.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000250.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000256.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000333.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000363.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000364.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000392.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000480.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000504.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000515.jpg', './data/VOCdevkit/VOC2012/JPEGImages/2007_000528.jpg', './data/V

In [4]:
from utils.data_augumentation import Compose, Scale, RandomRotation, RandomMirror, Resize, Normalize_Tensor

## Data Transform Class

In [5]:
class DataTransform():
    def __init__(self, input_size, color_mean, color_std):
        self.data_transform = {
            'train': Compose([
                Scale(scale=[.5, 1.5]),
                RandomRotation(angle=[-10, 10]),
                RandomMirror(),
                Resize(input_size),
                Normalize_Tensor(color_mean, color_std)
            ]),
            'val': Compose([
                Resize(input_size),
                Normalize_Tensor(color_mean, color_std)
            ])
        }
        
    def __call__(self, phase, img, anno_class_img):
        return self.data_transform[phase](img, anno_class_img)

## VOCDataset Class

In [6]:
import torch.utils.data as data
from PIL import Image

In [7]:
class VOCDataset(data.Dataset):
    def __init__(self, img_list, anno_list, phase, transform):
        self.img_list = img_list
        self.anno_list = anno_list
        self.phase = phase
        self.transform = transform
        
    def __len__(self):
        return len(self.img_list)
    
    def __getitem__(self, index):
        img, anno_class_img = self.pull_item(index)
        return img, anno_class_img
    
    def pull_item(self, index):
        image_file_path = self.img_list[index]
        img = Image.open(image_file_path)
        
        anno_file_path = self.anno_list[index]
        anno_class_img = Image.open(anno_file_path)
        
        img, anno_class_img = self.transform(self.phase, img, anno_class_img)
        
        return img, anno_class_img

In [8]:
color_mean = (0.485, 0.456, 0.406)
color_std = (0.229, 0.224, 0.225)

train_dataset = VOCDataset(train_img_list, train_anno_list, phase='train', 
                           transform=DataTransform(input_size=475, color_mean=color_mean, color_std=color_std))
val_dataset = VOCDataset(val_img_list, val_anno_list, phase='val',
                        transform = DataTransform(input_size=475, color_mean=color_mean, color_std=color_std))

# DataLoader

In [9]:
from PIL import Image

In [10]:
batch_size = 8

train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

dataloaders_dict = {'train':train_dataloader, 'val':val_dataloader}

In [11]:
batch_iterator=iter(dataloaders_dict['val'])
imges, anno_class_imges = next(batch_iterator)
print(imges.size())
print(anno_class_imges.size())

torch.Size([8, 3, 475, 475])
torch.Size([8, 475, 475])


# PSPNet

## PSPNet Class
**Featrue Module** + **Pyramid Pooling Module** + **Decoder** (+ **AuxLoss Module**)

In [15]:
import torch.nn as nn

In [18]:
class PSPNet(nn.Module):
    def __init__(self, n_classes):
        super(PSPNet, self).__init__()
        
        block_config = [3, 4, 6, 3] # resnet50
        img_size = 475
        img_size_8 = 60
        
        self.feature_conv = FeatureMap_convolution()
        self.feature_res_1 = ResidualBlockPSP(
            n_blocks=block_config[0], in_channels=128, mid_channels=64, out_channels=256, stride=1, dilation=1)
        self.feature_res_2 = ResidualBlockPSP(
            n_blocks = block_config[1], in_channels = 256, mid_channels = 128, out_channels=256, stride=2, dilation=1)
        self.feature_dilated_res_1 = ResidualBlockPSP(
            n_blocks=block_config[2],in_channels=512, mid_channels=256, out_channels=1024, stride=1, dilation=2)
        self.feature_dilated_res_2 = ResidualBlockPSP(
            n_blocks=block_config[3], in_channels=1024, mid_channels=512, out_channels=2048, stride=1, dilation=4)
        
        self.pyramid_pooling=PyramidPooling(in_channels=2048, pool_sizes=[6, 3, 2, 1], height=img_size_8, width=img_size_8)
        self.decode_feature = DecodePSPFeature(in_channels=1024, height=img_size, width=img_size, n_classes=n_classes)
        
    def forward(self, x):
        x = self.feature_conv(x)
        x = self.feature_res_1(x)
        x = self.feature_res_2(x)
        x = self.feature_dilated_res_1(x)
        
        output_aux = self.aux(x)
        
        x = self.feature_dilated_res_2(x)
        x=self.pyramid_pooling(x)
        output = self.decode_feature(x)
        
        return ( output, output_aux)

## Feature Module
 **FeatureMap_convolution** + 4 * **ResidualBlockPSP**

### FeatureMap_convolution Class
3 * **conv2DBatchNormRelu** + **MaxPooling**

#### conv2DBatchNormRelu Class

In [19]:
class conv2DBatchNormRelu(nn.Module):
    def __init__(self, in_channels, out_channles, kernel_size, stride, padding, dilation, bias):
        super(conv2DBatchNormRelu, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inpulace=True) # To save memory
        
    def __forward(self, x):
        x = self.conv(x)
        x = self.batchnorm(x)
        outputs = self.relu(x)
        
        return outputs

#### FeatureMap_convolution Class

In [21]:
class FeatureMap_convolution(nn.Module):
    def __init__(self):
        super(FeatureMap_convolution, self).__init__()
        
        # 3*Conv + Pooling
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias=3, 64, 3, 2,1,1, False
        self.cbnr_1 = conv2DBatchNormRelu(in_channels, out_channels, kernel_size, stride, padding, dilation, bias)
        
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 64, 64, 3, 1, 1, 1, False
        self.cbnr_2 = conv2DBatchNormRelu(in_channels, out_channels, kernel_siez, stride, padding, dilation, bias)
        
        in_channels, out_channels, kernel_size, stride, padding,dilation, bias = 64, 128, 3, 1, 1, 1, False
        self.cbnr_3 = conv2DBatchNormRelu(in_channels, out_channels, kernel_siez, stride, padding, dilation, bias)
        
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
    def forward(self, x):
        # 3*Conv + Pooling
        x = self.cbnr_1(x)
        x = self.cbnr_2(x)
        x = self.cbnr_3(x)
        outputs = self.maxpool(x)
        
        return outputs

### ResidualBlockPSP Class
bottleNeckPSP + bottleNeckIdentifyPSP * n (n is 3 ~ 6)

#### ResidualBlockPSP Class

In [None]:
class ResidualBlockPSP(nn.Sequential):
    # No need to define forward func! Because this uses nn.Sequential!
    def __init__(self, n_blocks, in_channels, mid_channels, out_channels, stride, dilation):
        super(ResidualBlockPSP, self).__init__()
        
        self.add_module(
            "block1",
            bottleNeckPSP(in_channels, mid_channels, out_channels, stride, dilation)
        )
        for i in range(n_blocks - 1):
            self.add_module(
                "block"+str(i+2),
                bottleNeckIdentifyPSP(out_channels, mid_channels, stride, dilation)
            )

#### conv2DBatchNorm Class

In [22]:
class conv2DBatchNorm(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias):
        super(conv2DBatchNorm).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        
    def forward(self, x):
        x = self.conv(x)
        outputs = self.batchnorm(x)
        
        return outputs

#### bottleNeckPSP Class
Skip connection has Conv2D+BatchNorm layers

In [23]:
class bottleNeckPSP(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels, stride, dilation):
        super(bottleNeckPSP, self).__init__()
        
        self.cbr_1 = conv2dBatchNormRelu(in_channels, mid_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.cbr_2 = conv2dBatchNormRelu(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False)
        self.cb_3 = conv2dBatchNorm(mid_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        
        # Skip connection
        self.cb_residual = conv2DBatchNorm(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, dilation=1, bias=False)
        
        self.relu = nn.ReLU(inplace=True)
        
    def forward(self, x):
        # Skip connection
        res = self.cb_residual(x)
        
        
        x = self.cbr_1(x)
        x = self.cbr_2(x)
        x = self.cb_3(x)
        
        outputs = self.relu(res+x)
        
        return outputs

In [24]:
class bottleNeckIdentifyPSP(nn.Module):
    def __init__(self, in_channels, mid_channels, stride, dilation):
        super(bottleNeckIdentifyPSP, self).__init__()
        
        self.cbr_1 = conv2DBatchNormRelu(in_channels, mid_channels, kernel_size=1, stride=1, padding=0,dilation=1, bias=False)
        self.cbr_2 = conv2DBatchNormRelu(mid_channels, mid_channels, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False)
        self.cb_3 = conv2DBatchNorm(mid_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        
        self.relu = nn.ReLU(inplace=True)
        
    def forward(self, x):
        # Skip connection
        res = x
        
        x = self.cbr_1(x)
        x = self.cbr_2(x)
        x = self.cb_3(x)
        
        outputs = self.relu(res + x)
        
        return outputs