In [1]:
import chainer
from chainer import cuda
from chainer import Variable
import chainer.functions as F
import chainer.links as L

In [366]:
import numpy

from chainer import cuda
from chainer.functions.pooling import pooling_2d
from chainer.utils import conv

if cuda.cudnn_enabled:
    cudnn = cuda.cudnn
    libcudnn = cudnn.cudnn


class MaxPooling2D(pooling_2d.Pooling2D):

    """Max pooling over a set of 2d planes."""

    def forward_cpu(self, x):
        n, c, h, w = x[0].shape
        col = conv.im2col_cpu(
            x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
            pval=-float('inf'), cover_all=self.cover_all)
        n, c, kh, kw, out_h, out_w = col.shape
        col = col.reshape(n, c, kh * kw, out_h, out_w)

        # We select maximum twice, since the implementation using numpy.choose
        # hits its bug when kh * kw >= 32.
        self.indexes = col.argmax(axis=2)
        y = col.max(axis=2)
        return y, self.indexes, np.array([h, w])

    def forward_gpu(self, x):
        if (cuda.cudnn_enabled and self.use_cudnn and
                pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
            return super(MaxPooling2D, self).forward_gpu(x)

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(
            h, self.kh, self.sy, self.ph, self.cover_all)
        y_w = conv.get_conv_outsize(
            w, self.kw, self.sx, self.pw, self.cover_all)
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
        self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32)

        cuda.elementwise(
            'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,'
            'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw',
            'T out, S indexes',
            '''
               int c0    = i / (out_h * out_w);
               int out_y = i / out_w % out_h;
               int out_x = i % out_w;
               int in_y_0 = max(0, out_y * sy - ph);
               int in_y_1 = min(h, out_y * sy + kh - ph);
               int in_x_0 = max(0, out_x * sx - pw);
               int in_x_1 = min(w, out_x * sx + kw - pw);

               T maxval = in[in_x_0 + w * (in_y_0 + h * c0)];
               int argmax_y = in_y_0;
               int argmax_x = in_x_0;
               for (int y = in_y_0; y < in_y_1; ++y) {
                 int offset_y = w * (y + h * c0);
                 for (int x = in_x_0; x < in_x_1; ++x) {
                   float v = in[x + offset_y];
                   if (maxval < v) {
                     maxval   = v;
                     argmax_y = y;
                     argmax_x = x;
                   }
                 }
               }
               out = maxval;

               int argmax_ky = argmax_y + ph - out_y * sy;
               int argmax_kx = argmax_x + pw - out_x * sx;
               indexes = argmax_kx + kw * argmax_ky;
            ''', 'max_pool_fwd')(x[0].reduced_view(),
                                 h, w, y_h, y_w, self.kh, self.kw,
                                 self.sy, self.sx, self.ph, self.pw,
                                 y, self.indexes)
        return y, self.indexes, np.array([h, w]),

    def backward_cpu(self, x, gy):
        n, c, out_h, out_w = gy[0].shape
        h, w = x[0].shape[2:]
        gcol = numpy.zeros(
            (n, c, self.kh, self.kw, out_h, out_w), dtype=x[0].dtype)

        # TODO(beam2d): Make it fast
        gcol_r = numpy.rollaxis(gcol.reshape(n, c, -1, out_h, out_w), 2)
        for i in numpy.ndindex(n, c, out_h, out_w):
            gcol_r[self.indexes[i]][i] = gy[0][i]

        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        return gx,

    def backward_gpu(self, x, gy):
        if (cuda.cudnn_enabled and self.use_cudnn and
                pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
            return super(MaxPooling2D, self).backward_gpu(x, gy)

        n, c, h, w = x[0].shape
        y_h, y_w = gy[0].shape[2:]
        gx = cuda.cupy.empty_like(x[0])

        cuda.elementwise(
            'raw T gy, raw S indexes, int32 h, int32 w,'
            'int32 out_h, int32 out_w, int32 kh, int32 kw,'
            'int32 sy, int32 sx, int32 ph, int32 pw',
            'T gx',
            '''
               int c0 = i / (h * w);
               int y  = i / w % h + ph;
               int x  = i % w + pw;
               int out_y_0 = max(0,     (y - kh + sy) / sy);
               int out_y_1 = min(out_h, (y      + sy) / sy);
               int out_x_0 = max(0,     (x - kw + sx) / sx);
               int out_x_1 = min(out_w, (x      + sx) / sx);

               T val = 0;
               for (int out_y = out_y_0; out_y < out_y_1; ++out_y) {
                 int ky = y - out_y * sy;
                 for (int out_x = out_x_0; out_x < out_x_1; ++out_x) {
                   int kx = x - out_x * sx;
                   int offset = out_x + out_w * (out_y + out_h * c0);
                   if (indexes[offset] == kx + kw * ky) {
                     val = val + gy[offset];
                   }
                 }
               }
               gx = val;
            ''',
            'max_pool_bwd')(gy[0].reduced_view(), self.indexes.reduced_view(),
                            h, w, y_h, y_w, self.kh, self.kw,
                            self.sy, self.sx, self.ph, self.pw,
                            gx)
        return gx,

    def create_pool_desc(self):
        return cudnn.create_pooling_descriptor(
            (self.kh, self.kw), (self.sy, self.sx), (self.ph, self.pw),
            libcudnn.CUDNN_POOLING_MAX)


def max_pooling_2d(x, ksize, stride=None, pad=0, cover_all=True,
                   use_cudnn=True):
    """Spatial max pooling function.

    This function acts similarly to :class:`~functions.Convolution2D`, but
    it computes the maximum of input spatial patch for each channel
    without any parameter instead of computing the inner products.

    Args:
        x (~chainer.Variable): Input variable.
        ksize (int or pair of ints): Size of pooling window. ``ksize=k`` and
            ``ksize=(k, k)`` are equivalent.
        stride (int or pair of ints or None): Stride of pooling applications.
            ``stride=s`` and ``stride=(s, s)`` are equivalent. If ``None`` is
            specified, then it uses same stride as the pooling window size.
        pad (int or pair of ints): Spatial padding width for the input array.
            ``pad=p`` and ``pad=(p, p)`` are equivalent.
        cover_all (bool): If ``True``, all spatial locations are pooled into
            some output pixels. It may make the output size larger.
        use_cudnn (bool): If ``True`` and cuDNN is enabled, then this function
            uses cuDNN as the core implementation.

    Returns:
        ~chainer.Variable: Output variable.

    """
    return MaxPooling2D(ksize, stride, pad, cover_all, use_cudnn)(x)

In [490]:
import numpy

from chainer import cuda
from chainer.functions.pooling import pooling_2d
from chainer.utils import conv
from chainer.utils import type_check

class Upsampling2D(pooling_2d.Pooling2D):
    
    """Upsampling for SegNet."""
    
    def __init__(self, indices, ksize, stride=None, pad=0,
                 outsize=None, cover_all=True):
        super(Upsampling2D, self).__init__(ksize, stride, pad, cover_all)
        self.outh, self.outw = (None, None) if outsize is None else outsize
        self.indices = indices
        
    def check_type_forward(self, in_types):
        n_in = in_types.size()
        type_check.expect(n_in == 1)
        x_type = in_types[0]

        type_check.expect(
            x_type.dtype.kind == 'f',
            x_type.ndim == 4,
        )

        if self.outh is not None:
            expected_h = conv.get_conv_outsize(
                self.outh, self.kh, self.sy, self.ph, cover_all=self.cover_all)
            type_check.expect(x_type.shape[2] == expected_h)
        if self.outw is not None:
            expected_w = conv.get_conv_outsize(
                self.outw, self.kw, self.sx, self.pw, cover_all=self.cover_all)
            type_check.expect(x_type.shape[3] == expected_w)
            
    def forward(self, x):
        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(
                h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(
                w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
        xp = cuda.get_array_module(*x)
        col = xp.zeros((n, c, self.kh, self.kw, h, w), dtype=x[0].dtype)
        col_r = numpy.rollaxis(col.reshape(n, c, -1, h, w), 2)
        for i in numpy.ndindex(n, c, h, w):
            col_r[self.indices[i]][i] = x[0][i]
            
        if isinstance(x[0], cuda.ndarray):
            y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
        else:
            y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
        return y,
    
    def backward(self, x, gy):
        n, c, h, w = x[0].shape
        xp = cuda.get_array_module(gy[0])
        gx = xp.zeros_like(x[0])
        if isinstance(gy[0], cuda.ndarray):
            gcol = conv.im2col_gpu(
                gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
                cover_all=self.cover_all)
        else:
            gcol = conv.im2col_cpu(
                gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
                cover_all=self.cover_all)
        gcol_r = numpy.rollaxis(gcol.reshape(n, c, -1, h, w), 2)
        for i in numpy.ndindex(n, c, h, w):
            gx[i] += gcol_r[self.indices[i]][i]
        return gx,
    
def up_sampling_2d(x, indices, ksize, stride=None, pad=0, outsize=None, cover_all=True):

    return Upsampling2D(indices, ksize, stride, pad, outsize, cover_all)(x)

In [565]:
class SegNet(chainer.Chain):
    
    def __init__(self):
        NUM_CLASS = 21
        super(SegNet, self).__init__(
            #ENCODER
            conv1_1=L.Convolution2D(3, 64, 3, stride=1, pad=1),
            conv1_2=L.Convolution2D(64, 64, 3, stride=1, pad=1),
            bn_conv1_1=L.BatchNormalization(64),
            bn_conv1_2=L.BatchNormalization(64),

            conv2_1=L.Convolution2D(64, 128, 3, stride=1, pad=1),
            conv2_2=L.Convolution2D(128, 128, 3, stride=1, pad=1),
            bn_conv2_1=L.BatchNormalization(128),
            bn_conv2_2=L.BatchNormalization(128),

            conv3_1=L.Convolution2D(128, 256, 3, stride=1, pad=1),
            conv3_2=L.Convolution2D(256, 256, 3, stride=1, pad=1),
            conv3_3=L.Convolution2D(256, 256, 3, stride=1, pad=1),
            bn_conv3_1=L.BatchNormalization(256),
            bn_conv3_2=L.BatchNormalization(256),
            bn_conv3_3=L.BatchNormalization(256),           

            conv4_1=L.Convolution2D(256, 512, 3, stride=1, pad=1),
            conv4_2=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            conv4_3=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            bn_conv4_1=L.BatchNormalization(512),
            bn_conv4_2=L.BatchNormalization(512),
            bn_conv4_3=L.BatchNormalization(512), 

            conv5_1=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            conv5_2=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            conv5_3=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            bn_conv5_1=L.BatchNormalization(512),
            bn_conv5_2=L.BatchNormalization(512),
            bn_conv5_3=L.BatchNormalization(512),

            #DECODER
            conv6_1=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            conv6_2=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            conv6_3=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            bn_conv6_1=L.BatchNormalization(512),
            bn_conv6_2=L.BatchNormalization(512),
            bn_conv6_3=L.BatchNormalization(512),
            
            conv7_1=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            conv7_2=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            conv7_3=L.Convolution2D(512, 256, 3, stride=1, pad=1),
            bn_conv7_1=L.BatchNormalization(512),
            bn_conv7_2=L.BatchNormalization(512),
            bn_conv7_3=L.BatchNormalization(256), 

            conv8_1=L.Convolution2D(256, 256, 3, stride=1, pad=1),
            conv8_2=L.Convolution2D(256, 256, 3, stride=1, pad=1),
            conv8_3=L.Convolution2D(256, 128, 3, stride=1, pad=1),
            bn_conv8_1=L.BatchNormalization(256),
            bn_conv8_2=L.BatchNormalization(256),
            bn_conv8_3=L.BatchNormalization(128),

            conv9_1=L.Convolution2D(128, 128, 3, stride=1, pad=1),
            conv9_2=L.Convolution2D(128, 64, 3, stride=1, pad=1),
            bn_conv9_1=L.BatchNormalization(128),
            bn_conv9_2=L.BatchNormalization(64),
            
            conv10_1=L.Convolution2D(64, 64, 3, stride=1, pad=1),
            conv10_2=L.Convolution2D(64, NUM_CLASS, 3, stride=1, pad=1),
            bn_conv10_1=L.BatchNormalization(64),
            bn_conv10_2=L.BatchNormalization(NUM_CLASS),        
        )
        self.train = True

    def __call__(self, x, t=None):
        
        #ENCODING
        h = F.relu(self.bn_conv1_1(self.conv1_1(x), test=not self.train))
        h = F.relu(self.bn_conv1_2(self.conv1_2(h), test=not self.train))
        h, ind1, size1 = max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.bn_conv2_1(self.conv2_1(h), test=not self.train))
        h = F.relu(self.bn_conv2_2(self.conv2_2(h), test=not self.train))
        h, ind2, size2 = max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.bn_conv3_1(self.conv3_1(h), test=not self.train))
        h = F.relu(self.bn_conv3_2(self.conv3_2(h), test=not self.train))
        h = F.relu(self.bn_conv3_3(self.conv3_3(h), test=not self.train))
        h, ind3, size3 = max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.bn_conv4_1(self.conv4_1(h), test=not self.train))
        h = F.relu(self.bn_conv4_2(self.conv4_2(h), test=not self.train))
        h = F.relu(self.bn_conv4_3(self.conv4_3(h), test=not self.train))
        h, ind4, size4 = max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.bn_conv5_1(self.conv5_1(h), test=not self.train))
        h = F.relu(self.bn_conv5_2(self.conv5_2(h), test=not self.train))
        h = F.relu(self.bn_conv5_3(self.conv5_3(h), test=not self.train))
        h, ind5, size5 = max_pooling_2d(h, 2, stride=2)
        
        #DECODING
        h = up_sampling_2d(h, indices=ind5.data, ksize=2, stride=2, outsize=size5.data)
        h = F.relu(self.bn_conv6_1(self.conv6_1(h), test=not self.train))
        h = F.relu(self.bn_conv6_2(self.conv6_2(h), test=not self.train))
        h = F.relu(self.bn_conv6_3(self.conv6_3(h), test=not self.train))
        
        h = up_sampling_2d(h, indices=ind4.data, ksize=2, stride=2, outsize=size4.data)
        h = F.relu(self.bn_conv7_1(self.conv7_1(h), test=not self.train))
        h = F.relu(self.bn_conv7_2(self.conv7_2(h), test=not self.train))
        h = F.relu(self.bn_conv7_3(self.conv7_3(h), test=not self.train))
        
        h = up_sampling_2d(h, indices=ind3.data, ksize=2, stride=2, outsize=size3.data)
        h = F.relu(self.bn_conv8_1(self.conv8_1(h), test=not self.train))
        h = F.relu(self.bn_conv8_2(self.conv8_2(h), test=not self.train))
        h = F.relu(self.bn_conv8_3(self.conv8_3(h), test=not self.train))
        
        h = up_sampling_2d(h, indices=ind2.data, ksize=2, stride=2, outsize=size2.data)
        h = F.relu(self.bn_conv9_1(self.conv9_1(h), test=not self.train))
        h = F.relu(self.bn_conv9_2(self.conv9_2(h), test=not self.train))
        
        h = up_sampling_2d(h, indices=ind1.data, ksize=2, stride=2, outsize=size1.data)
        h = F.relu(self.bn_conv10_1(self.conv10_1(h), test=not self.train))
        h = F.relu(self.bn_conv10_2(self.conv10_2(h), test=not self.train))
        
        if self.train:
            return F.softmax_cross_entropy(h, t)
        else:
            return F.softmax(h)

In [589]:
def copy_model(src, dst):
    assert isinstance(src, chainer.link.Chain)
    assert isinstance(dst, chainer.link.Chain)
    for child in src.children():
        if child.name not in dst.__dict__: continue
        dst_child = dst[child.name]
        if type(child) != type(dst_child): continue
        if isinstance(child, chainer.link.Chain):
            copy_model(child, dst_child)
        if isinstance(child, chainer.link.Link):
            match = True
            for a, b in zip(child.namedparams(), dst_child.namedparams()):
                if a[0] != b[0]:
                    match = False
                    break
                if a[1].data.shape != b[1].data.shape:
                    match = False
                    break
            if not match:
                print 'Ignore %s because of parameter mismatch' % child.name
                continue
            for a, b in zip(child.namedparams(), dst_child.namedparams()):
                b[1].data = a[1].data
            print 'Copy %s' % child.name

In [598]:
segnet.train = True

In [599]:
y = segnet(x, t)

In [596]:
x = Variable(numpy.random.random((1, 3, 256, 256)).astype('f'))
t = Variable(numpy.zeros((1, 256, 256), dtype=np.int32))

In [404]:
%matplotlib inline
import pylab as plt