In [1]:
import torch

In [2]:
#devito set up
from abc import ABC, abstractmethod
from devito import Operator, Function
from numpy import array
import numpy as np
from devito import Grid, Function, dimensions, Eq, Inc
import sympy
class Layer(ABC):
    def __init__(self, input_data):
        self._input_data = input_data
        self._R = self._allocate()

    @abstractmethod
    def _allocate(self) -> Function:
        # This method should return a Function object corresponding to
        # an output of the layer.
        pass

    def execute(self) -> (Operator, array):
        op = Operator(self.equations())
        op.cfunction

        return (op, self._R.data)

    @abstractmethod
    def equations(self) -> list:
        pass

In [3]:
batch_image = torch.randint(0,60,(2,3,3,3))

In [4]:
class Subsampling_4d(Layer):
    def __init__(self, kernel_size, feature_map, function,
                 stride=(1, 1), padding=(0, 0), activation=None,
                 bias=0):
        # All sizes are expressed as (batch, channel, rows, columns).
        #error check to be added later
        #self._error_check(kernel_size, feature_map, stride, padding)

        self._kernel_size = kernel_size
        self._function = function
        self._activation = activation
        self._bias = bias

        self._stride = stride
        self._padding = padding

        super().__init__(input_data=feature_map)


    def _allocate(self):
        map_height = self._input_data.shape[2] + 2 * self._padding[0]
        map_width = self._input_data.shape[3] + 2 * self._padding[1]
        kernel_height, kernel_width = self._kernel_size
        a, b, c, d = dimensions('a b c d')
        gridB = Grid(shape=(self._input_data.shape[0], self._input_data.shape[1], map_height, map_width),\
                    dimensions=(a, b, c, d))
        B = Function(name='B', grid=gridB, space_order=0)

        e, f, g, h = dimensions('e f g h')
        gridR = Grid(shape=( self._input_data.shape[0],  self._input_data.shape[1],\
                            (map_height - kernel_height + self._stride[0])
                            // self._stride[0],
                            (map_width - kernel_width + self._stride[1])
                            // self._stride[1]),
                     dimensions=(e, f, g, h))
        print(gridR)
        R = Function(name='R', grid=gridR, space_order=0)
        #add padding to start and end of each row
        for image in range(self._input_data.shape[0]):
            for channel in range(self._input_data.shape[1]):
                for i in range(self._padding[0], map_height - self._padding[0]):
                    B.data[image, channel, i] = \
                        np.concatenate(([0] * self._padding[1],
                                        self._input_data[image, channel, i - self._padding[0]],
                                        [0] * self._padding[1]))

        self._B = B
        return R

    def equations(self):
        a, b, c, d = self._B.dimensions
        kernel_height, kernel_width = self._kernel_size
        images = self._input_data.shape[0]
        channels = self._input_data.shape[1]
        equation_sum = []
        for image in range(images):
            for channel in range(channels):
                rhs = self._function([self._B[image,channel, self._stride[0] * c + i,
                                          self._stride[1] * d + j]
                                  for i in range(kernel_height)
                                  for j in range(kernel_width)])
                equation_sum.append(Eq(self._R[image,channel, c, d], rhs))
            #equation_sum.append(Eq(self._R[image,0,channel, b, c], rhs))
        if self._activation is not None:
            rhs = self._activation(rhs)
        return equation_sum
Sample_obj4 = Subsampling_4d((2,2),batch_image, lambda l: sympy.Max(*l))
tup4 = Sample_obj4.execute()
tup4[0].apply()
tup4[1].shape

Grid[extent=(1.0, 1.0, 1.0, 1.0), shape=(2, 3, 2, 2), dimensions=(e, f, g, h)]


Operator `Kernel` run in 0.01 s


(2, 3, 2, 2)

In [5]:
Sample_obj4.equations()

[Eq(R[0, 0, c, d], Max(B[0, 0, c, d], B[0, 0, c, d + 1], B[0, 0, c + 1, d], B[0, 0, c + 1, d + 1])),
 Eq(R[0, 1, c, d], Max(B[0, 1, c, d], B[0, 1, c, d + 1], B[0, 1, c + 1, d], B[0, 1, c + 1, d + 1])),
 Eq(R[0, 2, c, d], Max(B[0, 2, c, d], B[0, 2, c, d + 1], B[0, 2, c + 1, d], B[0, 2, c + 1, d + 1])),
 Eq(R[1, 0, c, d], Max(B[1, 0, c, d], B[1, 0, c, d + 1], B[1, 0, c + 1, d], B[1, 0, c + 1, d + 1])),
 Eq(R[1, 1, c, d], Max(B[1, 1, c, d], B[1, 1, c, d + 1], B[1, 1, c + 1, d], B[1, 1, c + 1, d + 1])),
 Eq(R[1, 2, c, d], Max(B[1, 2, c, d], B[1, 2, c, d + 1], B[1, 2, c + 1, d], B[1, 2, c + 1, d + 1]))]

In [7]:
print(tup4[0])

#define _POSIX_C_SOURCE 200809L
#include "stdlib.h"
#include "math.h"
#include "sys/time.h"
#include "xmmintrin.h"
#include "pmmintrin.h"
#include "omp.h"

struct dataobj
{
  void *restrict data;
  int * size;
  int * npsize;
  int * dsize;
  int * hsize;
  int * hofs;
  int * oofs;
} ;

struct profiler
{
  double section0;
} ;


int Kernel(struct dataobj *restrict B_vec, struct dataobj *restrict R_vec, const int c_M, const int c_m, const int d_M, const int d_m, struct profiler * timers)
{
  float (*restrict B)[B_vec->size[1]][B_vec->size[2]][B_vec->size[3]] __attribute__ ((aligned (64))) = (float (*)[B_vec->size[1]][B_vec->size[2]][B_vec->size[3]]) B_vec->data;
  float (*restrict R)[R_vec->size[1]][R_vec->size[2]][R_vec->size[3]] __attribute__ ((aligned (64))) = (float (*)[R_vec->size[1]][R_vec->size[2]][R_vec->size[3]]) R_vec->data;

  /* Flush denormal numbers to zero in hardware */
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 