In [1]:
import sys
import numpy as np

sys.path.append('../')

import cupy
from mandala import cuda
from mandala import Node
from mandala import Variable

from mandala.autodiff import autodiff
from mandala.autodiff import layer
from mandala.autodiff import initializers
from mandala.autodiff.utils_conv import im2col
from mandala.autodiff.utils_conv import col2im
from mandala.autodiff.utils_conv import get_conv_outsize

from mandala.autodiff.convolution_2d import Convolution2D

In [2]:
get_conv_outsize?

[0;31mSignature:[0m [0mget_conv_outsize[0m[0;34m([0m[0msize[0m[0;34m,[0m [0mk[0m[0;34m,[0m [0ms[0m[0;34m,[0m [0mp[0m[0;34m,[0m [0mcover_all[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0md[0m[0;34m=[0m[0;36m1[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mFile:[0m      ~/mandala/mandala/autodiff/utils_conv.py
[0;31mType:[0m      function


In [70]:
import numpy
import cupy

from mandala import cuda
from mandala import Node
from mandala import Variable
from mandala.autodiff import autodiff
from mandala.autodiff import layer
from mandala.autodiff import initializers
from mandala.autodiff.utils_conv import im2col
from mandala.autodiff.utils_conv import col2im
from mandala.autodiff.utils_conv import get_conv_outsize


if cuda.cudnn_enabled:
    _cudnn_version = cupy.cuda.cudnn.getVersion()


def _forward_cudnn(x, W, b, sy, sx, ph, pw, cover_all, dy, dx):
    xp = cuda.get_array_module(x)

    out_c, _, kh, kw = W.shape
    n, _, h, w = x.shape

    pad = (ph, pw)
    stride = (sy, sx)
    dilation = (dy, dx)

    out_h = get_conv_outsize(
        h, kh, sy, ph, cover_all=cover_all, d=dy)
    out_w = get_conv_outsize(
        w, kw, sx, pw, cover_all=cover_all, d=dx)

    y = xp.empty((n, out_c, out_h, out_w), dtype=x.dtype)
    cupy.cudnn.convolution_forward(
        x, W, b, y, pad, stride, dilation, 1,
        auto_tune=True, tensor_core='auto')
    return y


def _forward(x, W, b, sy, sx, ph, pw, cover_all, dy, dx, cls):
    xp = cuda.get_array_module(x)

    col = im2col(x, kh, kw, sy, sx, ph, pw,
                 cover_all=cover_all, dy=dy, dx=dx)
    cls.col = col

    y = xp.tensordot(col, W, ((1, 2, 3), (1, 2, 3)))
    if b is not None:
        y += b
    y = xp.rollaxis(y, 3, 1)
    return y

def _backward_W(col, gy):
    xp = cuda.get_array_module(col)
    gW = xp.tensordot(gy, col, ((0, 2, 3), (0, 4, 5)))
    return gW


def _backward_x(x, W, gy, sy, sx, ph, pw):
    xp = cuda.get_array_module(W)

    _, _, h, w = x.shape
    gcol = xp.tensordot(W, gy, (0, 1))
    gcol = xp.rollaxis(gcol, 3)
    gx = col2im(gcol, sy, sx, ph, pw, h, w)
    return gx


def convolution_2d_forward(x, W, b, sy, sx, ph, pw,
                           cover_all, dy, dx, cls):
    xp = cuda.get_array_module(x)

    use_cudnn = (
        xp == cupy
        and x.dtype == W.dtype
        and not cover_all
        and ((dy == 1 and dx == 1) or _cudnn_version >= 6000)
    )

    if use_cudnn:
        y = _forward_cudnn(
            x, W, b, sy, sx, ph, pw, cover_all, dy, dx)
    else:
        y = _forward(
            x, W, b, sy, sx, ph, pw, cover_all, dy, dx, cls)
    cls.used_cudnn = use_cudnn
    return y


def convolution_2d_backward_x(x, W, gy, sy, sx, ph, pw, cls):
    if cls.used_cudnn:
        pass
    else:
        gx = _backward_x(x, W, gy, sy, sx, ph, pw)
    return gx


def convolution_2d_backward_W(col, gy, cls):
    if cls.used_cudnn:
        pass
    else:
        gW = _backward_W(col, gy)
    return gW


def convolution_2d_backward_b(gy):
    gb = gy.sum(axis=(0, 2, 3))
    return gb


class Convolution2DFunction(autodiff.AutoDiff):

    def __init__(self, stride=1, pad=0, cover_all=False,
                 dilate=1):
        self.sy, self.sx = _pair(stride)
        self.ph, self.pw = _pair(pad)
        self.dy, self.dx = _pair(dilate)
        self.cover_all = cover_all

    def forward(self, xs):
        y = Node(convolution_2d_forward, xs,
                 sy=self.sy, sx=self.sx, ph=self.ph, pw=self.pw,
                 cover_all=self.cover_all, dy=self.dy, dx=self.dx,
                 cls=self)
        return y

    def backward(self, xs, gy):
        x, W, b = xs
        col = self.col

        gx = Node(convolution_2d_backward_x, [x, W, gy],
                  sy=self.sy, sx=self.sx, ph=self.ph, pw=self.pw)
        gW = Node(convolution_2d_backward_W, [col, gy])
        if b is None:
            gb = None
        else:
            gb = Node(convolution_2d_backward_b, [gy])
        return gx, gW, gb


def _pair(x):
    if hasattr(x, '__getitem__'):
        return x
    return x, x


class Convolution2D(layer.Layer):
    def __init__(self, in_ch, out_ch, ksize, stride=1, pad=0,
                 nobias=False, cover_all=False, dilate=1):

        self.config = {
            'ksize': _pair(ksize),
            'stride': _pair(stride),
            'pad': _pair(pad),
            'dilate': _pair(dilate),
            'cover_all': cover_all
        }

        kh, kw = self.config['ksize']
        self.W = Variable(
            initializers.HeNormal((out_ch, in_ch, kh, kw)))
        if nobias:
            self.b = None
        else:
            self.b = Variable(
                numpy.zeros(out_ch, dtype=numpy.float32))

    def __call__(self, x):
        xs = [x, self.W, self.b]

        stride = self.config['stride']
        pad = self.config['pad']
        cover_all = self.config['cover_all']
        dilate = self.config['dilate']

        return Convolution2DFunction(
            stride, pad, cover_all, dilate)(xs)


In [54]:
conv = Convolution2D(3, 10, 3)
conv.to_gpu()

In [55]:
xp = cupy
x = Variable(xp.ones((2, 3, 10, 10), dtype=np.float32))

In [56]:
y = conv(x)

In [57]:
cuda.cudnn_enabled = True

In [65]:
y

<mandala.nodecore.Node at 0x7ff0f6dd8978>

In [66]:
y.backward(gy=Variable(y.data))

AttributeError: 'Convolution2DFunction' object has no attribute 'col'

In [8]:
cupy.cuda.cudnn.getVersion()

6021

In [24]:
from chainer import configuration

In [26]:
auto_tune = configuration.config.autotune
tensor_core = configuration.config.use_cudnn_tensor_core

In [27]:
auto_tune

False

In [28]:
tensor_core

'auto'

In [59]:
cupy.cudnn.convolution_backward_data?

[0;31mDocstring:[0m convolution_backward_data(ndarray W, ndarray x, ndarray b, ndarray y, tuple pad, tuple stride, tuple dilation, int groups, *, bool deterministic, bool auto_tune, str tensor_core)
[0;31mType:[0m      builtin_function_or_method


In [60]:
cupy.cudnn.convolution_backward_filter?

[0;31mDocstring:[0m convolution_backward_filter(ndarray x, ndarray gy, ndarray gW, tuple pad, tuple stride, tuple dilation, int groups, *, bool deterministic, bool auto_tune, str tensor_core)
[0;31mType:[0m      builtin_function_or_method


In [68]:
cupy.cudnn.convolution_forward?

[0;31mDocstring:[0m convolution_forward(ndarray x, ndarray W, ndarray b, ndarray y, tuple pad, tuple stride, tuple dilation, int groups, *, bool auto_tune, str tensor_core)
[0;31mType:[0m      builtin_function_or_method


In [2]:
from mandala.autodiff import reshape

In [3]:
reshape.install_node_reshape()

In [4]:
x = Variable(np.zeros(12))

In [5]:
x.reshape(3, 4).data

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])