<a href="https://colab.research.google.com/github/lkforward/flower/blob/master/unet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

!ls

Mounted at /content/gdrive
code  gdrive  sample_data


In [26]:
!pip install albumentations==0.3.2

Collecting albumentations==0.3.2
[?25l  Downloading https://files.pythonhosted.org/packages/ad/34/e1da4fab7282d732a6cef827c7e5fb1efa1f02c3ba1bff4a0ace2daf6639/albumentations-0.3.2.tar.gz (79kB)
[K     |████                            | 10kB 16.7MB/s eta 0:00:01[K     |████████▏                       | 20kB 1.7MB/s eta 0:00:01[K     |████████████▎                   | 30kB 2.5MB/s eta 0:00:01[K     |████████████████▍               | 40kB 1.7MB/s eta 0:00:01[K     |████████████████████▌           | 51kB 2.1MB/s eta 0:00:01[K     |████████████████████████▋       | 61kB 2.5MB/s eta 0:00:01[K     |████████████████████████████▊   | 71kB 2.9MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 2.5MB/s 
Collecting opencv-python-headless
[?25l  Downloading https://files.pythonhosted.org/packages/1f/dc/b250f03ab68068033fd2356428c1357431d8ebc6a26405098e0f27c94f7a/opencv_python_headless-4.1.1.26-cp36-cp36m-manylinux1_x86_64.whl (22.1MB)
[K     |██████████████████████████

In [0]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import torchvision
import torchvision.transforms as transforms
import torch
from torch.utils.data import TensorDataset, DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import cv2

In [0]:
import pandas as pd
import numpy as np
import os

import albumentations as albu
from albumentations import torch as AT

# Load the U-Net Model Implementation

In [5]:
# #Clone the code into google drive: 

# Codebase 1:
# !git clone https://github.com/vlievin/Unet.git /content/gdrive/My\ Drive/kaggle_cloud/code_myunet

# Codebase 2:
# !git clone https://github.com/lyakaap/Kaggle-Carvana-3rd-Place-Solution.git /content/gdrive/My\ Drive/kaggle_cloud/code_3rdplace_unet

Cloning into '/content/gdrive/My Drive/kaggle_cloud/code_3rdplace_unet'...
remote: Enumerating objects: 57, done.[K
remote: Total 57 (delta 0), reused 0 (delta 0), pack-reused 57[K
Unpacking objects: 100% (57/57), done.


In [6]:
# !ln -sfn /content/gdrive/My\ Drive/kaggle_cloud/code_myunet code
# !ln -sfn /content/gdrive/My\ Drive/kaggle_cloud/code_3rdplace_unet code
!ls code

losses.py	    model.py	      network.png  train.py
make_submission.py  model_pytorch.py  README.md


In [0]:
# #If you want to view the code from a pop-up window: 
# %pycat code/unet.py
# %pycat code/model_pytorch.py

In [0]:
# %%writefile code/model_pytorch.py

import torch
import torch.nn as nn
import torch.nn.functional as F


class ConvActivation(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1, padding=1, dilation=1,
                 activation=nn.ReLU(inplace=True)):
        super(ConvActivation, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size,
                              stride, padding, dilation)
        self.activation = activation

    def forward(self, x):
        x = self.conv(x)
        x = self.activation(x)
        return x


class ConvBNActivation(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1, padding=1, dilation=1,
                 activation=nn.ReLU(inplace=True)):
        super(ConvBNActivation, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size,
                              stride, padding, dilation)
        self.bn = nn.BatchNorm2d(out_channels)
        self.activation = activation

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.activation(x)
        return x


class ConvBlock(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1, padding=1, dilation=1,
                 batch_norm=False, activation=nn.ReLU(inplace=True)):
        super(ConvBlock, self).__init__()
        conv = ConvBNActivation if batch_norm else ConvActivation
        self.block = nn.Sequential(
            conv(in_channels, out_channels, kernel_size,
                 stride, padding, dilation, activation),
            conv(out_channels, out_channels, kernel_size,
                 stride, padding, dilation, activation)
        )

    def forward(self, x):
        out = self.block(x)
        return out


class UpBlockWithSkip(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1, padding=1, dilation=1, up_mode='deconv',
                 batch_norm=False, activation=nn.ReLU(inplace=True)):
        assert up_mode in ('deconv', 'biupconv', 'nnupconv')
        super(UpBlockWithSkip, self).__init__()

        if up_mode == 'deconv':
            self.up = nn.ConvTranspose2d(
                in_channels, out_channels,
                kernel_size=4, stride=2, padding=1)
        elif up_mode == 'biupconv':
            self.up = nn.Sequential(
                nn.Upsample(mode='bilinear', scale_factor=2,
                            align_corners=False),
                nn.Conv2d(in_channels, out_channels, kernel_size=1)
            )
        elif up_mode == 'nnupconv':
            self.up = nn.Sequential(
                nn.Upsample(mode='nearest', scale_factor=2,
                            align_corners=False),
                nn.Conv2d(in_channels, out_channels, kernel_size=1)
            )

        self.conv_block = ConvBlock(
            out_channels * 2, out_channels, kernel_size,
            stride, padding, dilation, batch_norm, activation)

    def forward(self, x, bridge):
        up = self.up(x)
        out = torch.cat([up, bridge], 1)
        out = self.conv_block(out)

        return out


class DilatedUNet(nn.Module):
    def __init__(self, in_channels=3, classes=1, depth=3,
                 first_channels=44, padding=1,
                 bottleneck_depth=6, bottleneck_type='cascade',
                 batch_norm=False, up_mode='deconv',
                 activation=nn.ReLU(inplace=True)):

        assert bottleneck_type in ('cascade', 'parallel')
        super(DilatedUNet, self).__init__()

        self.depth = depth
        self.bottleneck_type = bottleneck_type

        conv = ConvBNActivation if batch_norm else ConvActivation

        prev_channels = in_channels
        self.down_path = nn.ModuleList()
        for i in range(depth):
            self.down_path.append(
                ConvBlock(prev_channels, first_channels * 2**i, 3,
                          padding=padding, batch_norm=batch_norm,
                          activation=activation))
            prev_channels = first_channels * 2**i

        self.bottleneck_path = nn.ModuleList()
        for i in range(bottleneck_depth):
            bneck_in = prev_channels if i == 0 else prev_channels * 2
            self.bottleneck_path.append(
                conv(bneck_in, prev_channels * 2, 3,
                     dilation=2**i, padding=2**i, activation=activation))

        prev_channels *= 2

        self.up_path = nn.ModuleList()
        for i in reversed(range(depth)):
            self.up_path.append(
                UpBlockWithSkip(prev_channels, first_channels * 2**i, 3,
                                up_mode=up_mode, padding=padding,
                                batch_norm=batch_norm,
                                activation=activation))
            prev_channels = first_channels * 2**i

        self.last = nn.Conv2d(prev_channels, classes, kernel_size=1)

    def forward(self, x):
        bridges = []
        for i, down in enumerate(self.down_path):
            x = down(x)
            bridges.append(x)
            x = F.avg_pool2d(x, 2)

        dilated_layers = []
        for i, bneck in enumerate(self.bottleneck_path):
            if self.bottleneck_type == 'cascade':
                x = bneck(x)
                dilated_layers.append(x.unsqueeze(-1))
            elif self.bottleneck_type == 'parallel':
                dilated_layers.append(bneck(x.unsqueeze(-1)))
        x = torch.cat(dilated_layers, dim=-1)
        x = torch.sum(x, dim=-1)

        for i, up in enumerate(self.up_path):
            x = up(x, bridges[-i-1])

        return self.last(x)

In [17]:
# net = UNet(in_channels=3, out_channels=4, num_hidden_features=[64, 128, 256], 
#            num_dilated_convs=2, n_resblocks=2, 
#            dropout_min=0., dropout_max=0.2)

net = DilatedUNet(in_channels=3, classes=4, depth=5)

print(net)

DilatedUNet(
  (down_path): ModuleList(
    (0): ConvBlock(
      (block): Sequential(
        (0): ConvActivation(
          (conv): Conv2d(3, 44, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (activation): ReLU(inplace=True)
        )
        (1): ConvActivation(
          (conv): Conv2d(44, 44, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (activation): ReLU(inplace=True)
        )
      )
    )
    (1): ConvBlock(
      (block): Sequential(
        (0): ConvActivation(
          (conv): Conv2d(44, 88, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (activation): ReLU(inplace=True)
        )
        (1): ConvActivation(
          (conv): Conv2d(88, 88, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (activation): ReLU(inplace=True)
        )
      )
    )
    (2): ConvBlock(
      (block): Sequential(
        (0): ConvActivation(
          (conv): Conv2d(88, 176, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (act

In [18]:
input = torch.randn(16, 3, 320, 320)
output = net(input)
print(output)

tensor([[[[ 0.0226,  0.0278,  0.0194,  ...,  0.0214,  0.0171,  0.0257],
          [ 0.0236,  0.0196,  0.0312,  ...,  0.0274,  0.0282,  0.0258],
          [ 0.0210,  0.0256,  0.0272,  ...,  0.0295,  0.0239,  0.0250],
          ...,
          [ 0.0208,  0.0270,  0.0110,  ...,  0.0273,  0.0222,  0.0279],
          [ 0.0267,  0.0278,  0.0314,  ...,  0.0371,  0.0219,  0.0300],
          [ 0.0270,  0.0299,  0.0247,  ...,  0.0242,  0.0230,  0.0287]],

         [[-0.1230, -0.1218, -0.1299,  ..., -0.1290, -0.1358, -0.1313],
          [-0.1323, -0.1300, -0.1306,  ..., -0.1283, -0.1337, -0.1271],
          [-0.1315, -0.1158, -0.1212,  ..., -0.1350, -0.1235, -0.1312],
          ...,
          [-0.1283, -0.1232, -0.1355,  ..., -0.1260, -0.1171, -0.1308],
          [-0.1209, -0.1230, -0.1258,  ..., -0.1160, -0.1266, -0.1274],
          [-0.1280, -0.1287, -0.1275,  ..., -0.1268, -0.1234, -0.1243]],

         [[ 0.0389,  0.0436,  0.0301,  ...,  0.0441,  0.0437,  0.0436],
          [ 0.0440,  0.0477,  

In [20]:
output.min()

tensor(-0.1624, grad_fn=<MinBackward1>)

In [21]:
output.max()

tensor(0.0825, grad_fn=<MaxBackward1>)

# Load Data
In smp, the involved preprocessings include: normalization and resizing. We can simply normalize the data by dividing them by 255. 

In [0]:
# Load data

# Preprocess

# Convert into pytorch format "DataLoader"

In [0]:
path = '/content/gdrive/My Drive/kaggle_cloud/data'

def get_data(path, n_samples=None):
  """
  Read the taining data information, including image id and labels. 
  """
  train = pd.read_csv(f'{path}/train.csv')
  sub = pd.read_csv(f'{path}/sample_submission.csv')
  print("Reading the training csv...")
  print(train.columns)
  print(train.shape)

  n_train = len(os.listdir(f'{path}/train_images'))
  print("Reading the training images...")
  print(f'There are {n_train} images in the original train dataset')

  if n_samples: 
    train = train.iloc[:n_samples, :]
    print(f'Use {n_samples} images within the training dataset.')


  train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
  train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])

  sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
  sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])

  return train, sub


def make_mask(df: pd.DataFrame, image_name: str='img.jpg', shape: tuple = (1400, 2100)):
    """
    Create mask based on df, image name and shape.

    [OUTPUTS]:
    masks: an array with shape (shape[0], shape[1], 4).
      Mask for each class labels.
    """
    encoded_masks = df.loc[df['im_id'] == image_name, 'EncodedPixels']
    masks = np.zeros((shape[0], shape[1], 4), dtype=np.float32)

    for idx, label in enumerate(encoded_masks.values):
        if label is not np.nan:
            mask = rle_decode(label)
            masks[:, :, idx] = mask
            
    return masks

def get_img(x, folder: str='train_images'):
    """
    Return image based on image name and folder.
    """
    data_folder = f"{path}/{folder}"
    image_path = os.path.join(data_folder, x)
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img


def rle_decode(mask_rle: str = '', shape: tuple = (1400, 2100)):
    '''
    Decode rle encoded mask.
    
    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')

def mask2rle(img):
    '''
    Convert mask to rle.
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def get_training_augmentation():
    """
    Define the preprocessing for the training data. 
    """
    train_transform = [
        albu.HorizontalFlip(p=0.5),
        albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=0.5, border_mode=0),
        albu.GridDistortion(p=0.5),
        albu.OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5),
        # albu.Resize(320, 640)
        albu.Resize(160, 320)
    ]
    return albu.Compose(train_transform)

def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        # albu.Resize(320, 640)
        albu.Resize(160, 320)
    ]
    return albu.Compose(test_transform)

def split_data(train, sub):
  """
  Split the training dataset into train/valid datasets, and use all the data in 
  the submission dataset as test data. 

  [OUTPUTS]:
  train_ids/valid_ids/test_ids: array of image ids(str). 
  """
  train_labels = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label']
  id_mask_count = train_labels.apply(lambda x: x.split('_')[0]).value_counts()
  id_mask_count = id_mask_count.reset_index().rename(columns={'index': 'img_id', 'Image_Label': 'count'})
  train_ids, valid_ids = train_test_split(id_mask_count['img_id'].values,
                                          random_state=42,
                                          stratify=id_mask_count['count'],
                                          test_size=0.1)
 
  # Alternatively, we can use sub['im_id'] directly. 
  test_ids = sub['Image_Label'].apply(lambda x: x.split('_')[0]).drop_duplicates().values
  
  return train_ids, valid_ids, test_ids


# Convert the augmenttation into standdard transform
# Add others transforms: toTensor (the range of value has been converted to [0, 1] in 
# toTensor)

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, mask = sample['image'], sample['mask']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        return {'image': torch.from_numpy(image),
                'landmarks': torch.from_numpy(mask)}


class CloudDataset(Dataset):
    def __init__(self, df: pd.DataFrame = None, datatype: str = 'train', img_ids: np.array = None,
                 transforms = albu.Compose([albu.HorizontalFlip(),AT.ToTensor()]),
                 preprocessing=None):
        """
        [INPUTS]:
        df: a pandas dataframe. 
          The image information dataframe, obtained from function "get_data()".
        datatype: string. 
          Whether it is 'train' or 'test'. 
        
        """
        self.df = df
        if datatype != 'test':
            self.data_folder = f"{path}/train_images"
        else:
            self.data_folder = f"{path}/test_images"
        self.img_ids = img_ids
        self.transforms = transforms
        self.preprocessing = preprocessing

    def __getitem__(self, idx):
        image_name = self.img_ids[idx]
        mask = make_mask(self.df, image_name)
        image_path = os.path.join(self.data_folder, image_name)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']

        # if self.preprocessing:
        #     preprocessed = self.preprocessing(image=img, mask=mask)
        #     img = preprocessed['image']
        #     mask = preprocessed['mask']
        if self.preprocessing:
            img = self.preprocessing(img)
            mask = self.preprocessing(mask)

        return img, mask

    def __len__(self):
        return len(self.img_ids)

In [46]:
train, sub = get_data(path)
train_ids, valid_ids, test_ids = split_data(train, sub)

num_workers = 0
bs = 16
train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, 
                             transforms = get_training_augmentation(), 
                             preprocessing=transforms.ToTensor())

valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, 
                             transforms = get_validation_augmentation(), 
                             preprocessing=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, 
                          num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, 
                          num_workers=num_workers)

Reading the training csv...
Index(['Image_Label', 'EncodedPixels'], dtype='object')
(22184, 2)
Reading the training images...
There are 5546 images in the original train dataset


In [52]:
for i in range(len(train_dataset)):
    img, mask = train_dataset[i]

    print(i, img.size(), mask.size())

    if i == 3:
        break

0 torch.Size([3, 160, 320]) torch.Size([4, 160, 320])
1 torch.Size([3, 160, 320]) torch.Size([4, 160, 320])
2 torch.Size([3, 160, 320]) torch.Size([4, 160, 320])
3 torch.Size([3, 160, 320]) torch.Size([4, 160, 320])


# Model Training

# Model Prediction