In [3]:
import numpy as np
import tensorflow as tf
print(tf.__version__)

def arr_diff(x, y):
    assert list(x.shape) == list(y.shape)
    x = np.ravel(x)
    y = np.ravel(y)
    return (x - y) @ (x - y)

2.4.1


# DepthwiseConv2D

In [7]:
x = np.random.randn(3, 32, 29, 7)
w = np.random.randn(5, 5, 7, 2)

@tf.function
def tf_depthwise_conv_2d(x, w):
    y = tf.nn.depthwise_conv2d(input=x, filter=w, strides=[1, 1, 1, 1], padding='SAME')
    return y

y = tf_depthwise_conv(x, w)
print(x.shape, w.shape, y.shape)

fun = tf_depthwise_conv.get_concrete_function(
    tf.TensorSpec(x.shape, tf.dtypes.float32),
    tf.TensorSpec(w.shape, tf.dtypes.float32),
)
mlir = tf.mlir.experimental.convert_function(fun)
print(mlir)

(3, 32, 29, 7) (5, 5, 7, 2) (3, 32, 29, 14)


module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 561 : i32}} {
  func @__inference_tf_depthwise_conv_22(%arg0: tensor<3x32x29x7xf32> {tf._user_specified_name = "x"}, %arg1: tensor<5x5x7x2xf32> {tf._user_specified_name = "w"}) -> tensor<3x32x29x14xf32> attributes {tf.entry_function = {control_outputs = "", inputs = "x,w", outputs = "identity_RetVal"}} {
    %0 = "tf.DepthwiseConv2dNative"(%arg0, %arg1) {data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "SAME", strides = [1, 1, 1, 1]} : (tensor<3x32x29x7xf32>, tensor<5x5x7x2xf32>) -> tensor<3x32x29x14xf32>
    %1 = "tf.Identity"(%0) {device = ""} : (tensor<3x32x29x14xf32>) -> tensor<3x32x29x14xf32>
    return %1 : tensor<3x32x29x14xf32>
  }
}


In [9]:
class MyDepthwiseConv2DDescriptor:
    
    def get_explicit_padding(self):
        padding = self.padding
        pad_top, pad_bottom, pad_left, pad_right = (None, None, None, None)
        xHeight = self.x[1]
        xWidth = self.x[2]
        kHeight = self.w[0]
        kWidth = self.w[1]
        sh = self.strides[1]
        sw = self.strides[2]
        
        if not isinstance(padding, str):
            #explicit padding
            #[[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]]
            assert len(padding) == 4
            for x in padding: assert len(x) == 2
            for x in padding[0]: assert x == 0
            for x in padding[3]: assert x == 0
            pad_top = padding[1][0]
            pad_bottom = padding[1][1]
            pad_left = padding[2][0]
            pad_right = padding[2][1]
        
        elif padding == 'SAME':
            if xHeight % sh == 0:
                pad_height = max(kHeight - sh, 0)
            else:
                pad_height = max(kHeight - (xHeight % sh), 0)
            if xWidth % sw == 0:
                pad_width = max(kWidth - sw, 0)
            else:
                pad_width = max(kWidth - (xWidth % sw), 0)
            
            pad_top = pad_height // 2
            pad_bottom = pad_height - pad_top
            pad_left = pad_width // 2
            pad_right = pad_width - pad_left
            
        
        elif padding == 'VALID':
            pad_top = 0
            pad_bottom = 0
            pad_left = 0
            pad_right = 0
            
        else:
            raise Exception('Unknown padding')
        
        self.padding = [[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]]
            
    def get_output_shape(self):
        pad_top, pad_bottom = self.padding[1]
        pad_left, pad_right = self.padding[2]
        sh, sw = self.strides[1:-1]
        xB, xHeight, xWidth, xC = self.x
        kHeight, kWidth, _, cMult = self.w
        
        assert self.x[3] == self.w[2]
        
        # Padded input dimensions
        xPHeight = xHeight + pad_top + pad_bottom
        xPWidth = xWidth + pad_left + pad_right
        
        yHeight = (xPHeight - kHeight) // sh + 1
        yWidth = (xPWidth - kWidth) // sw + 1
        yC = xC * cMult
        
        self.output_shape = [xB, yHeight, yWidth, yC]
    
    def __init__(self, x, w, strides, padding):
        self.x = x
        self.w = w
        self.strides = strides
        self.padding = padding
        self.output_shape = None
                        
        # Check strides
        assert len(strides) == 4
        assert strides[0] == 1
        assert strides[3] == 1
        
        # Compute real paddings
        self.get_explicit_padding()
        print('Paddings:', self.padding)
        
        # Compute output shape
        self.get_output_shape()
        print('Output shape:', self.output_shape)
    

    
    

    
    
    # compute using tf version directly
    def compute_tf(self, x, w):
        y = tf.nn.depthwise_conv2d(input=x, filter=w, strides=self.strides, padding=[[0,0],[0,0],[0,0],[0,0]])
        return y.numpy()
    
    # compute using conv2d operation
    def compute_with_conv2d(self, x, w):
        sh, sw = self.strides[1:-1]
        xB, xHeight, xWidth, xC = self.x
        kHeight, kWidth, _, cMult = self.w
        yHeight, yWidth, yC  = self.output_shape[1:]
        
        # we apply a different convolution with chanelMultiplier filters to every chanel of the input
        # and we concat on all channels
        
        res = []
        for c in range(xC):
            subX = x[:,:,:,c:c+1] # (xB, xH, xW, xC) => (xB, xH, xW, 1)
            subW = w[:,:,c:c+1,:] # (kH, kW, xC, cMult) => (kH, kW, 1, cMult)
            y = tf.nn.conv2d(input=subX, filters=subW, strides=self.strides, padding=[[0,0],[0,0],[0,0],[0,0]])
            res.append(y.numpy())
            
        return np.concatenate(res, axis=-1)
            
        
        

    # pad input tensor using paddings infos
    def pad_input(self, x):
        return np.pad(x, self.padding)
    
    # 7 for loop using depthwise conv2d formula
    def compute_naive(self, x, w):
        sh, sw = self.strides[1:-1]
        xB, xHeight, xWidth, xC = self.x
        kHeight, kWidth, _, cMult = self.w
        yHeight, yWidth, yC  = self.output_shape[1:]
        
        res = np.zeros((xB, yHeight, yWidth, yC))
        
        for b in range(xB):
            for i in range(yHeight):
                for j in range(yWidth):
                    for k in range(xC):
                        for q in range(cMult):
                            val = 0
                            for di in range(kHeight):
                                for dj in range(kWidth):
                                        val += x[b, sh*i + di, sw * j + dj, k] * w[di, dj, k, q]
                            res[b, i, j, k * cMult + q] = val
                                
                                
                        
        return res
    
    
    
    # flatten and extend the input to run depthwise conv2d as a matmul
    # this is the same transformation than for conv2d
    # turned from (xB, xH, xW, xC) into (xB*yH*yW, kH*kW*xC)
    def matmul_flatten_x(self, x):
        sh, sw = self.strides[1:-1]
        xB, xHeight, xWidth, xC = self.x
        kHeight, kWidth, _, cMult = self.w
        yHeight, yWidth, yC  = self.output_shape[1:]
        
        # divide 1 DIM into 3 to build the mat more easily
        new_x = np.zeros((xB, yHeight, yWidth, kHeight*kWidth*xC))
        
        for b in range(xB):
            for i in range(yHeight):
                for j in range(yWidth):
                    # extract x slice used in the sum of naive implem to compute y[b, i, j, :]
                    new_x[b, i, j] = x[b, sh*i:sh*i+kHeight, sw*j:sw*j+kWidth, :].ravel()
            
        return new_x.reshape(xB * yHeight * yWidth, kHeight * kWidth * xC)
    
    # flatten and extend the filter to run depthwise conv2d as a matmul
    # turned from (kH, kW, xC, cMult) into (kH * kW * xC, xC * cMult)
    # It returns a sparse matrix, where in every columns, 2/3 is 0s
    # That's because every channel of the input gets multiplied by different filters
    # So only the right ones are passed
    def matmul_flatten_k(self, w):
        sh, sw = self.strides[1:-1]
        xB, xHeight, xWidth, xC = self.x
        kHeight, kWidth, _, cMult = self.w
        yHeight, yWidth, yC  = self.output_shape[1:]
        
        # divide into many dims to build more easily
        new_w = np.zeros((kHeight*kWidth, xC, xC, cMult))
        
        for k in range(xC):
            for q in range(cMult):
                new_w[:,k,k,q] = w[:,:,k,q].ravel()
                
        return new_w.reshape(kHeight*kWidth*xC, xC*cMult)
    
    # Compute by turning the conv into a matmul op
    def compute_matmul(self, x, w):
        sh, sw = self.strides[1:-1]
        xB, xHeight, xWidth, xC = self.x
        kHeight, kWidth, _, cMult = self.w
        yHeight, yWidth, yC  = self.output_shape[1:]
        
        # (xB, xH, xW, xC) -> (xB*yH*yW, kH*kW*xC)
        x = self.matmul_flatten_x(x)
        
        # (kH, kW, xC, cMult) -> (kH * kW * xC, xC * cMult)
        w = self.matmul_flatten_k(w)
        
        # [(xB*yH*yW, kH*kW*xC), (kH * kW * xC, xC * cMult)] -> (xB*yH*yW, xC * cMult)
        y = np.matmul(x, w)
        
        # (xB*yH*yW, xC * cMult) -> (xB, yH, yW, yC)
        y = y.reshape(xB, yHeight, yWidth, yC)
        
        return y
    
    
        
        
    def compute(self, x, w):
        assert list(x.shape) == list(self.x)
        assert list(w.shape) == list(self.w)
        
        x = self.pad_input(x)
        
        #y = self.compute_tf(x, w)
        #y = self.compute_with_conv2d(x, w)
        #y = self.compute_naive(x, w)
        y = self.compute_matmul(x, w)
        
        assert list(y.shape) == list(self.output_shape)
        return y
        
        
def my_depthwise_conv2d(x, w, strides, padding):
    op = MyDepthwiseConv2DDescriptor(x.shape, w.shape, strides, padding)
    return op.compute(x, w)

x = np.random.randn(3, 13, 17, 7)
w = np.random.randn(5, 5, 7, 2)

y_ref = tf_depthwise_conv_2d(x, w).numpy()
y = my_depthwise_conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')

print(arr_diff(y_ref, y))

Paddings: [[0, 0], [2, 2], [2, 2], [0, 0]]
Output shape: [3, 13, 17, 14]
4.7139007055828086e-27


## DepthwiseConv3D

In [41]:
@tf.function
def tf_nn_depthwise_conv3d(input, filter, strides, padding):
    res = []
    for c in range(input.shape[-1]):
        subX = input[:,:,:,:,c:c+1] # (xB, xH, xW, xC) => (xB, xH, xW, 1)
        subW = filter[:,:,:,c:c+1,:] # (kH, kW, xC, cMult) => (kH, kW, 1, cMult)
        y = tf.nn.conv3d(input=subX, filters=subW, strides=strides, padding=padding)
        res.append(y)
            
    return tf.concat(res, axis=-1)

@tf.function
def tf_depthwise_conv_3d(x, w):
    y = tf_nn_depthwise_conv3d(input=x, filter=w, strides=[1, 1, 1, 1, 1], padding='SAME')
    return y

x = np.random.randn(3, 13, 17, 6, 7)
w = np.random.randn(5, 5, 2, 7, 2)

y = tf_depthwise_conv_3d(x, w)
print(y.shape)

fun = tf_depthwise_conv_3d.get_concrete_function(
    tf.TensorSpec(x.shape, tf.dtypes.float32),
    tf.TensorSpec(w.shape, tf.dtypes.float32),
)
mlir = tf.mlir.experimental.convert_function(fun)
print(mlir)

(3, 13, 17, 6, 14)


module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 561 : i32}} {
  func @__inference_tf_depthwise_conv_3d_1163(%arg0: tensor<3x13x17x6x7xf32> {tf._user_specified_name = "x"}, %arg1: tensor<5x5x2x7x2xf32> {tf._user_specified_name = "w"}) -> tensor<3x13x17x6x14xf32> attributes {tf.entry_function = {control_outputs = "", inputs = "x,w", outputs = "identity_RetVal"}} {
    %0 = "tf.PartitionedCall"(%arg0, %arg1) {_collective_manager_ids = [], _read_only_resource_inputs = [], config = "", config_proto = "\0A\07\0A\03CPU\10\01\0A\07\0A\03GPU\10\002\02J\008\01\82\01\00", device = "", executor_type = "", f = @__inference_tf_nn_depthwise_conv3d_11600} : (tensor<3x13x17x6x7xf32>, tensor<5x5x2x7x2xf32>) -> tensor<3x13x17x6x14xf32>
    %1 = "tf.Identity"(%0) {device = ""} : (tensor<3x13x17x6x14xf32>) -> tensor<3x13x17x6x14xf32>
    return %1 : tensor<3x13x17x6x14xf32>
  }
  func @__inference_tf_nn_depthwise_conv3d_11600(%arg0: tensor<3x13x

In [42]:
class MyDepthwiseConv3DDescriptor:
    
    def get_explicit_padding(self):
        padding = self.padding
        pad_b0, pad_a0, pad_b1, pad_a1, pad_b2, pad_a2 = [None] * 6
        sd, sh, sw = self.strides[1:-1]
        xB, xDepth, xHeight, xWidth, xC = self.x
        kDepth, kHeight, kWidth, _, cMult = self.w
        
        if not isinstance(padding, str):
            #explicit padding
            #[[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]]
            assert len(padding) == 5
            for x in padding: assert len(x) == 2
            for x in padding[0]: assert x == 0
            for x in padding[-1]: assert x == 0
            pad_b0 = padding[1][0]
            pad_a0 = padding[1][1]
            pad_b1 = padding[2][0]
            pad_a1 = padding[2][1]
            pad_b2 = padding[3][0]
            pad_a2 = padding[3][1]
        
        elif padding == 'SAME':
            if xDepth % sd == 0:
                pad_depth = max(kDepth - sd, 0)
            else:
                pad_depth = max(kDepth - (xDepth % sd), 0)
            if xHeight % sh == 0:
                pad_height = max(kHeight - sh, 0)
            else:
                pad_height = max(kHeight - (xHeight % sh), 0)
            if xWidth % sw == 0:
                pad_width = max(kWidth - sw, 0)
            else:
                pad_width = max(kWidth - (xWidth % sw), 0)
            
            pad_b0 = pad_depth // 2
            pad_a0 = pad_depth - pad_b0
            pad_b1 = pad_height // 2
            pad_a1 = pad_height - pad_b1
            pad_b2 = pad_width // 2
            pad_a2 = pad_width - pad_b2
            
        
        elif padding == 'VALID':
            pad_b0 = 0
            pad_a0 = 0
            pad_b1 = 0
            pad_a1 = 0
            pad_b2 = 0
            pad_a2 = 0
            
        else:
            raise Exception('Unknown padding')
        
        self.padding = [[0, 0], [pad_b0, pad_a0], [pad_b1, pad_a1], [pad_b2, pad_a2], [0, 0]]
            
    def get_output_shape(self):
        pad_b0, pad_a0 = self.padding[1]
        pad_top, pad_bottom = self.padding[2]
        pad_left, pad_right = self.padding[3]
        sd, sh, sw = self.strides[1:-1]
        xB, xDepth, xHeight, xWidth, xC = self.x
        kDepth, kHeight, kWidth, _, cMult = self.w
        
        assert self.x[4] == self.w[3]
        
        # Padded input dimensions
        xPDepth = xDepth + pad_b0 + pad_a0
        xPHeight = xHeight + pad_top + pad_bottom
        xPWidth = xWidth + pad_left + pad_right
        
        yDepth = (xPDepth - kDepth) // sd + 1
        yHeight = (xPHeight - kHeight) // sh + 1
        yWidth = (xPWidth - kWidth) // sw + 1
        yC = xC * cMult
        
        self.output_shape = [xB, yDepth, yHeight, yWidth, yC]
    
    def __init__(self, x, w, strides, padding):
        self.x = x
        self.w = w
        self.strides = strides
        self.padding = padding
        self.output_shape = None
                        
        # Check strides
        assert len(strides) == 5
        assert strides[0] == 1
        assert strides[-1] == 1
        
        # Compute real paddings
        self.get_explicit_padding()
        print('Paddings:', self.padding)
        
        # Compute output shape
        self.get_output_shape()
        print('Output shape:', self.output_shape)
    

    
    

    
    
    # compute using tf version directly
    def compute_tf(self, x, w):
        y = tf_nn_depthwise_conv3d(input=x, filter=w, strides=self.strides, padding='VALID')
        return y.numpy()
    
    # compute using conv3d operation
    def compute_with_conv3d(self, x, w):
        sd, sh, sw = self.strides[1:-1]
        xB, xDepth, xHeight, xWidth, xC = self.x
        kDepth, kHeight, kWidth, _, cMult = self.w
        yDepth, yHeight, yWidth, yC  = self.output_shape[1:]
        
        # we apply a different convolution with chanelMultiplier filters to every chanel of the input
        # and we concat on all channels
        
        res = []
        for c in range(xC):
            subX = x[:,:,:,:,c:c+1] # (xB, xH, xW, xC) => (xB, xH, xW, 1)
            subW = w[:,:,:,c:c+1,:] # (kH, kW, xC, cMult) => (kH, kW, 1, cMult)
            y = tf.nn.conv3d(input=subX, filters=subW, strides=self.strides, padding='VALID')
            res.append(y.numpy())
            
        return np.concatenate(res, axis=-1)
            
        
        

    # pad input tensor using paddings infos
    def pad_input(self, x):
        return np.pad(x, self.padding)
    
    # 9 for loop using depthwise conv3d formula
    def compute_naive(self, x, w):
        sd, sh, sw = self.strides[1:-1]
        xB, xDepth, xHeight, xWidth, xC = self.x
        kDepth, kHeight, kWidth, _, cMult = self.w
        yDepth, yHeight, yWidth, yC  = self.output_shape[1:]
        
        res = np.zeros((xB, yDepth, yHeight, yWidth, yC))
        
        for b in range(xB):
            for h in range(yDepth):
                for i in range(yHeight):
                    for j in range(yWidth):
                        for k in range(xC):
                            for q in range(cMult):
                                val = 0
                                for dh in range(kDepth):
                                    for di in range(kHeight):
                                        for dj in range(kWidth):
                                            val += x[b, sd*h + dh, sh*i + di, sw * j + dj, k] * w[dh, di, dj, k, q]
                                res[b, h, i, j, k * cMult + q] = val
                                
                                
                        
        return res
    
    
    
    # flatten and extend the input to run depthwise conv3d as a matmul
    # this is the same transformation than for conv3d
    # turned from (xB, xD, xH, xW, xC) into (xB*yD*yH*yW, kD*kH*kW*xC)
    def matmul_flatten_x(self, x):
        sd, sh, sw = self.strides[1:-1]
        xB, xDepth, xHeight, xWidth, xC = self.x
        kDepth, kHeight, kWidth, _, cMult = self.w
        yDepth, yHeight, yWidth, yC  = self.output_shape[1:]
        
        # divide 1 DIM into 4 to build the mat more easily
        new_x = np.zeros((xB, yDepth, yHeight, yWidth, kDepth*kHeight*kWidth*xC))
        
        for b in range(xB):
            for h in range(yDepth):
                for i in range(yHeight):
                    for j in range(yWidth):
                        # extract x slice used in the sum of naive implem to compute y[b, i, j, :]
                        new_x[b, h, i, j] = x[b, sd*h:sd*h+kDepth, sh*i:sh*i+kHeight, sw*j:sw*j+kWidth, :].ravel()
            
        return new_x.reshape(xB * yDepth * yHeight * yWidth, kDepth * kHeight * kWidth * xC)
    
    # flatten and extend the filter to run depthwise conv3d as a matmul
    # turned from (kD, kH, kW, xC, cMult) into (kD * kH * kW * xC, xC * cMult)
    # It returns a sparse matrix, where in every columns, 2/3 is 0s
    # That's because every channel of the input gets multiplied by different filters
    # So only the right ones are passed
    def matmul_flatten_k(self, w):
        sd, sh, sw = self.strides[1:-1]
        xB, xDepth, xHeight, xWidth, xC = self.x
        kDepth, kHeight, kWidth, _, cMult = self.w
        yDepth, yHeight, yWidth, yC  = self.output_shape[1:]
        
        # divide into many dims to build more easily
        new_w = np.zeros((kDepth*kHeight*kWidth, xC, xC, cMult))
        
        for k in range(xC):
            for q in range(cMult):
                new_w[:,k,k,q] = w[:,:,:,k,q].ravel()
                
        return new_w.reshape(kDepth*kHeight*kWidth*xC, xC*cMult)
    
    # Compute by turning the conv into a matmul op
    def compute_matmul(self, x, w):
        sd, sh, sw = self.strides[1:-1]
        xB, xDepth, xHeight, xWidth, xC = self.x
        kDepth, kHeight, kWidth, _, cMult = self.w
        yDepth, yHeight, yWidth, yC  = self.output_shape[1:]
        
        # (xB, xD, xH, xW, xC) -> (xB*yD*yH*yW, kD*kH*kW*xC)
        x = self.matmul_flatten_x(x)
        
        # (kD, kH, kW, xC, cMult) -> (kD * kH * kW * xC, xC * cMult)
        w = self.matmul_flatten_k(w)
        
        # [(xB*yD*yH*yW, kD*kH*kW*xC), (kD * kH * kW * xC, xC * cMult)] -> (xB*yD*yH*yW, xC * cMult)
        y = np.matmul(x, w)
        
        # (xB*yD*yH*yW, xC * cMult) -> (xB, yD, yH, yW, yC)
        y = y.reshape(xB, yDepth, yHeight, yWidth, yC)
        
        return y
    
    
        
        
    def compute(self, x, w):
        assert list(x.shape) == list(self.x)
        assert list(w.shape) == list(self.w)
        
        x = self.pad_input(x)
        
        #y = self.compute_tf(x, w)
        #y = self.compute_with_conv3d(x, w)
        #y = self.compute_naive(x, w)
        y = self.compute_matmul(x, w)
        
        assert list(y.shape) == list(self.output_shape)
        return y
        
        
def my_depthwise_conv3d(x, w, strides, padding):
    op = MyDepthwiseConv3DDescriptor(x.shape, w.shape, strides, padding)
    return op.compute(x, w)

x = np.random.randn(3, 13, 17, 6, 7)
w = np.random.randn(5, 5, 2, 7, 2)

y_ref = tf_depthwise_conv_3d(x, w).numpy()
y = my_depthwise_conv3d(x, w, strides=[1, 1, 1, 1, 1], padding='SAME')

print(arr_diff(y_ref, y))

Paddings: [[0, 0], [2, 2], [2, 2], [0, 1], [0, 0]]
Output shape: [3, 13, 17, 6, 14]
1.1480670837803592e-25
