In [1]:
import sys
import numpy as np

sys.path.append('../')

import cupy
from mandala import cuda
from mandala import Node
from mandala import Variable

from mandala.autodiff import autodiff
from mandala.autodiff import layer
from mandala.autodiff import initializers

In [2]:
from mandala import cuda


# modified from chainer.
def get_conv_outsize(size, k, s, p, cover_all=False, d=1):
    dk = k + (k - 1) * (d - 1)
    if cover_all:
        return (size + p * 2 - dk + s - 1) // s + 1
    else:
        return (size + p * 2 - dk) // s + 1


def im2col(img, kh, kw, sy, sx, ph, pw, pval=0, cover_all=False,
           dy=1, dx=1, out_h=None, out_w=None):
    xp = cuda.get_array_module(img)
    n, c, h, w = img.shape
    if out_h is None:
        out_h = get_conv_outsize(h, kh, sy, ph, cover_all, dy)
    if out_w is None:
        out_w = get_conv_outsize(w, kw, sx, pw, cover_all, dx)

    img = xp.pad(img,
                 ((0, 0), (0, 0), (ph, ph + sy - 1), (pw, pw + sx - 1)),
                  mode='constant', constant_values=(pval,))
    col = xp.ndarray((n, c, kh, kw, out_h, out_w), dtype=img.dtype)

    for j in range(kh):
        jdy = j * dy
        j_lim = jdy + sy * out_h
        for i in range(kw):
            idx = i * dx
            i_lim = idx + sx * out_w
            col[:, :, j, i, :, :] = img[:, :, jdy:j_lim:sy, idx:i_lim:sx]

    return col

In [3]:
import numpy


def _pair(x):
    if hasattr(x, '__getitem__'):
        return x
    return x, x


class Convolution2D(layer.Layer):
    def __init__(self, in_ch, out_ch, ksize, stride=1, pad=0,
                 nobias=False, cover_all=False, dilate=1):
        self.ksize = _pair(ksize)
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.dilate = _pair(dilate)
        self.cover_all = cover_all

        self.W = Variable(
            initializers.HeNormal((out_ch, in_ch, *self.ksize)))
        if nobias:
            self.b = None
        else:
            self.b = Variable(
                numpy.zeros(out_ch, dtype=numpy.float32))

    def __call__(self, x):
        args = [x, self.W, self.b]
        kargs = {
            'ksize': self.ksize,
            'stride': self.stride,
            'pad': self.pad,
            'dilate': self.dilate,
            'cover_all': self.cover_all
        }
        return Convolution2DFunction()(args, kargs)

In [4]:
class Convolution2DFunction(autodiff.AutoDiff):

    def forward(self, xs, kargs):
        y = Node(convolution_2d_forward, xs, kargs)
        return y

    def backward(self, xs, gy):
        raise NotImplementedError()

In [12]:
def convolution_2d_forward(x, W, b, ksize, stride, pad,
                           dilate=(1, 1), cover_all=None):
    xp = cuda.get_array_module(x)
    
    kh, kw = ksize
    sy, sx = stride
    ph, pw = pad
    dx, dy = dilate

    col = im2col(x, kh, kw, sy, sx, ph, pw,
                 cover_all=cover_all, dy=dy, dx=dx)
    y = xp.tensordot(col, W, ((1, 2, 3), (1, 2, 3)))
    if b is not None:
        y += b
    y = xp.rollaxis(y, 3, 1)

    return y


def convolution_2d_backward(x, W, b, ):
    xp = cuda.get_array_module(x)
    

In [None]:
    def forward_cpu(self, inputs):
        self.retain_inputs((0, 1))
        x, gy = inputs

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (not (gy.flags.c_contiguous or gy.flags.f_contiguous) and
                1 in gy.shape):
            gy = numpy.ascontiguousarray(gy)

        if self.groups > 1:
            return self._forward_grouped_convolution(x, gy)
        else:
            return self._forward_cpu_core(x, gy)

    def _forward_cpu_core(self, x, gy):
        if self._use_ideep:
            return self._forward_ideep(x, gy)

        col = conv.im2col_cpu(
            x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
            cover_all=self.cover_all, dy=self.dy, dx=self.dx)
        gW = numpy.tensordot(gy, col, ((0, 2, 3), (0, 4, 5))
                             ).astype(self.W_dtype, copy=False)
        return gW,

In [23]:
conv = Convolution2D(3, 10, 3)
conv.to_cpu()

In [24]:
xp = numpy
x = xp.zeros((2, 3, 10, 10), dtype=np.float32)

In [25]:
conv(x).data.dtype

dtype('float32')