In [1]:
import sys
import numpy as np

sys.path.append('../')

import cupy
from mandala import cuda
from mandala import Node
from mandala import Variable

from mandala.autodiff import autodiff
from mandala.autodiff import layer
from mandala.autodiff import initializers

In [65]:
from mandala import cuda


# modified from chainer.
def get_conv_outsize(size, k, s, p, cover_all=False, d=1):
    dk = k + (k - 1) * (d - 1)
    if cover_all:
        return (size + p * 2 - dk + s - 1) // s + 1
    else:
        return (size + p * 2 - dk) // s + 1


def im2col(img, kh, kw, sy, sx, ph, pw, pval=0, cover_all=False,
           dy=1, dx=1, out_h=None, out_w=None):
    xp = cuda.get_array_module(img)
    n, c, h, w = img.shape
    if out_h is None:
        out_h = get_conv_outsize(h, kh, sy, ph, cover_all, dy)
    if out_w is None:
        out_w = get_conv_outsize(w, kw, sx, pw, cover_all, dx)

    img = xp.pad(img,
                 ((0, 0), (0, 0), (ph, ph + sy - 1), (pw, pw + sx - 1)),
                  mode='constant', constant_values=(pval,))
    col = xp.ndarray((n, c, kh, kw, out_h, out_w), dtype=img.dtype)

    for j in range(kh):
        jdy = j * dy
        j_lim = jdy + sy * out_h
        for i in range(kw):
            idx = i * dx
            i_lim = idx + sx * out_w
            col[:, :, j, i, :, :] = img[:, :, jdy:j_lim:sy, idx:i_lim:sx]

    return col


def col2im(col, sy, sx, ph, pw, h, w, dy=1, dx=1):
    xp = cuda.get_array_module(col)

    n, c, kh, kw, out_h, out_w = col.shape
    img = xp.zeros((n, c, h + 2 * ph + sy - 1, w + 2 * pw + sx - 1),
                    dtype=col.dtype)
    for j in range(kh):
        jdy = j * dy
        j_lim = jdy + sy * out_h
        for i in range(kw):
            idx = i * dx
            i_lim = idx + sx * out_w
            img[:, :, jdy:j_lim:sy, idx:i_lim:sx] += col[:, :, j, i]
    return img[:, :, ph:h + ph, pw:w + pw]

In [66]:
import chainer.links as L

In [67]:
import numpy


def _pair(x):
    if hasattr(x, '__getitem__'):
        return x
    return x, x


class Convolution2D(layer.Layer):
    def __init__(self, in_ch, out_ch, ksize, stride=1, pad=0,
                 nobias=False, cover_all=False, dilate=1):

        self.config = {
            'ksize': _pair(ksize),
            'stride': _pair(stride),
            'pad': _pair(pad),
            'dilate': _pair(dilate),
            'cover_all': cover_all
        }

        kh, kw = self.config['ksize']
        self.W = Variable(
            initializers.HeNormal((out_ch, in_ch, kh, kw)))
        if nobias:
            self.b = None
        else:
            self.b = Variable(
                numpy.zeros(out_ch, dtype=numpy.float32))

    def __call__(self, x):
        args = [x, self.W, self.b]
        kwargs = self.config
        return Convolution2DFunction()(args, kwargs)

In [68]:
class Convolution2DFunction(autodiff.AutoDiff):

    def forward(self, xs):
        x, W, b = xs

        # im2col
        kh, kw = self.kwargs['ksize']
        sy, sx = self.kwargs['stride']
        ph, pw = self.kwargs['pad']
        dy, dx = self.kwargs['dilate']
        cover_all = self.kwargs['cover_all']

        kwargs ={
            'kw': kh, 'kh': kw,
            'sy': sy, 'sx': sx,
            'ph': ph, 'pw': pw,
            'dy': dy, 'dx': dx,
            'cover_all': cover_all
        }
        col = Node(im2col, [x], kwargs)
        y = Node(convolution_2d_forward, [x, W, b, col])
        return y

    def backward(self, xs, gy):
        x, W, b, col = xs

        kwargs ={
            'stride': self.kwargs['stride'],
            'pad': self.kwargs['pad'],
            'size': 0,
        }

        gx = Node(convolution_2d_backward_x, [x, W, gy], kwargs)
        gW = Node(convolution_2d_backward_W, [col, gy])
        if b is None:
            gb = None
        else:
            gb = Node(convolution_2d_backward_b, [gy])
        return gx, gW, gb, None

In [69]:
def convolution_2d_forward(x, W, b, col):
    xp = cuda.get_array_module(col)

    y = xp.tensordot(col, W, ((1, 2, 3), (1, 2, 3)))
    if b is not None:
        y += b
    y = xp.rollaxis(y, 3, 1)

    return y


def convolution_2d_backward_x(x, W, gy, stride, pad, size):
    xp = cuda.get_array_module(W)

    sy, sx = stride
    ph, pw = pad
    _, _, h, w = x.shape

    gcol = xp.tensordot(W, gy, (0, 1))
    gcol = xp.rollaxis(gcol, 3)
    gx = col2im(gcol, sy, sx, ph, pw, h, w)
    return gx


def convolution_2d_backward_W(col, gy):
    xp = cuda.get_array_module(col)
    gW = xp.tensordot(gy, col, ((0, 2, 3), (0, 4, 5)))
    return gW


def convolution_2d_backward_b(gy):
    gb = gy.sum(axis=(0, 2, 3))
    return gb

In [70]:
y = Node(lambda x: x, [1], retain_data=True)

In [71]:
y.kwargs

{}

In [72]:
conv = Convolution2D(3, 10, 3)
conv.to_gpu()

In [73]:
xp = cupy
x = Variable(xp.ones((2, 3, 10, 10), dtype=np.float32))

In [74]:
h = x ** 2

In [75]:
y = conv(h)

In [76]:
y.data

array([[[[ 1.76885784,  1.76885784,  1.76885784, ...,  1.76885784,
           1.76885784,  1.76885784],
         [ 1.76885784,  1.76885784,  1.76885784, ...,  1.76885784,
           1.76885784,  1.76885784],
         [ 1.76885784,  1.76885784,  1.76885784, ...,  1.76885784,
           1.76885784,  1.76885784],
         ..., 
         [ 1.76885784,  1.76885784,  1.76885784, ...,  1.76885784,
           1.76885784,  1.76885784],
         [ 1.76885784,  1.76885784,  1.76885784, ...,  1.76885784,
           1.76885784,  1.76885784],
         [ 1.76885784,  1.76885784,  1.76885784, ...,  1.76885784,
           1.76885784,  1.76885784]],

        [[-0.34409964, -0.34409964, -0.34409964, ..., -0.34409964,
          -0.34409964, -0.34409964],
         [-0.34409964, -0.34409964, -0.34409964, ..., -0.34409964,
          -0.34409964, -0.34409964],
         [-0.34409964, -0.34409964, -0.34409964, ..., -0.34409964,
          -0.34409964, -0.34409964],
         ..., 
         [-0.34409964, -0.344099

In [77]:
y.backward(gy=Variable(y.data))

In [78]:
h.grad.data

array([[[[ 0.86240691,  0.56204802,  1.1105324 ,  1.1105324 ,  1.1105324 ,
           1.1105324 ,  1.1105324 ,  1.1105324 ,  0.24812549,  0.54848439],
         [ 0.44067636, -0.03594735,  0.90218687,  0.90218687,  0.90218687,
           0.90218687,  0.90218687,  0.90218687,  0.46151045,  0.93813419],
         [-0.17230448,  0.48268104,  2.26339912,  2.26339912,  2.26339912,
           2.26339912,  2.26339912,  2.26339912,  2.43570352,  1.78071809],
         [-0.17230448,  0.48268104,  2.26339912,  2.26339912,  2.26339912,
           2.26339912,  2.26339912,  2.26339912,  2.43570352,  1.78071809],
         [-0.17230448,  0.48268104,  2.26339912,  2.26339912,  2.26339912,
           2.26339912,  2.26339912,  2.26339912,  2.43570352,  1.78071809],
         [-0.17230448,  0.48268104,  2.26339912,  2.26339912,  2.26339912,
           2.26339912,  2.26339912,  2.26339912,  2.43570352,  1.78071809],
         [-0.17230448,  0.48268104,  2.26339912,  2.26339912,  2.26339912,
           2.263399

In [79]:
conv.W.grad.data

array([[[[ 226.41395569,  226.41395569,  226.41395569],
         [ 226.41395569,  226.41395569,  226.41395569],
         [ 226.41395569,  226.41395569,  226.41395569]],

        [[ 226.41395569,  226.41395569,  226.41395569],
         [ 226.41395569,  226.41395569,  226.41395569],
         [ 226.41395569,  226.41395569,  226.41395569]],

        [[ 226.41395569,  226.41395569,  226.41395569],
         [ 226.41395569,  226.41395569,  226.41395569],
         [ 226.41395569,  226.41395569,  226.41395569]]],


       [[[ -44.04478073,  -44.04478073,  -44.04478073],
         [ -44.04478073,  -44.04478073,  -44.04478073],
         [ -44.04478073,  -44.04478073,  -44.04478073]],

        [[ -44.04478073,  -44.04478073,  -44.04478073],
         [ -44.04478073,  -44.04478073,  -44.04478073],
         [ -44.04478073,  -44.04478073,  -44.04478073]],

        [[ -44.04478073,  -44.04478073,  -44.04478073],
         [ -44.04478073,  -44.04478073,  -44.04478073],
         [ -44.04478073,  -44.044780

In [5]:
def hoge(**kwargs):
    return kwargs.get('x')

In [6]:
hoge(x=1, y=2)

1

In [12]:
{}.get('x', 3)

3

In [101]:
def hoge(kwargs={}, **kwargs):
    return fuga(**kwargs, **kwargs)

SyntaxError: duplicate argument 'kwargs' in function definition (<ipython-input-101-2fc7e1c57ed0>, line 1)

In [100]:
def fuga(x):
    return x

In [98]:
hoge(x=1)

TypeError: fuga() missing 1 required positional argument: 'x'

In [97]:
def hoge(kwargs={}, **kwargs2):
    return fuga(**kwargs)