In [1]:
import numpy as np
import time

In [2]:
def get_input_shape(input_matrix_shape, filter_matrix_shape):
    matrix_dim = len(input_matrix_shape)
    filter_dim = len(filter_matrix_shape)
    
    if matrix_dim == 3:
        input_depth, input_height, input_width = input_matrix_shape
    else:
        input_height, input_width = input_matrix_shape
        input_depth = 1
    
    if filter_dim == 4:
        filter_depth = filter_matrix_shape[0]
        filter_height = filter_matrix_shape[2]
        filter_width = filter_matrix_shape[3]
    elif filter_dim == 3:
        filter_depth, filter_height, filter_width = filter_matrix_shape
    else:
        filter_height, filter_width = filter_matrix_shape
        filter_depth = 1
    
    return input_depth, input_height, input_width, filter_depth, filter_height, filter_width

### Функция свертки

In [4]:
def conv2d(input_matrix, filter_matrix, stride=1):
    input_depth, input_height, input_width, \
        filter_depth, filter_height, filter_width = get_input_shape(input_matrix.shape, filter_matrix.shape)
    
    output_height = (input_height - filter_height) // stride + 1
    output_width = (input_width - filter_width) // stride + 1
    output_depth = input_depth - filter_depth + 1
    
    output_matrix = np.zeros((output_depth, output_height, output_width))
    
    for d in range(output_depth):
        for i in range(0, output_height, stride):
            for j in range(0, output_width, stride):
                if input_depth != 1:
                    frame = input_matrix[d:d+filter_depth, i:i+filter_height, j:j+filter_width]
                else:
                    frame = input_matrix[i:i+filter_height, j:j+filter_width]
                output_matrix[d, i // stride, j // stride] = np.sum(frame * filter_matrix)
    
    return output_matrix


### Depthwise - separable сверточный слой

In [6]:
def depthwise_conv2d(input_matrix, filter_matrix, stride=1):
    input_depth, input_height, input_width, \
        filter_depth, filter_height, filter_width = get_input_shape(input_matrix.shape, filter_matrix.shape)
    
    output_height = (input_height - filter_height) // stride + 1
    output_width = (input_width - filter_width) // stride + 1
    
    output_matrix = np.zeros((input_depth, output_height, output_width))
    
    for d in range(input_depth):
        for i in range(0, output_height, stride):
            for j in range(0, output_width, stride):
                frame = input_matrix[d, i:i+filter_height, j:j+filter_width]
                output_matrix[d, i // stride, j // stride] = np.sum(frame * filter_matrix)
    
    return output_matrix

In [7]:
def pointwise_conv2d(input_matrix, filter_matrix, stride=1):
    input_depth, input_height, input_width, \
        filter_depth, filter_height, filter_width = get_input_shape(input_matrix.shape, filter_matrix.shape)
    
    output_depth = filter_depth
    output_height = (input_height - filter_height) // stride + 1
    output_width = (input_width - filter_width) // stride + 1
    
    output_matrix = np.zeros((output_depth, output_height, output_width))
    
    for m in range(output_depth):
        for i in range(0, output_height, stride):
            for j in range(0, output_width, stride):
                for d in range(input_depth):
                    output_matrix[m, i, j] += input_matrix[d, i * stride, j * stride] * filter_matrix[m, d, 0, 0]
    
    return output_matrix

In [8]:
def depthwise_separable_conv2d(input_matrix, depthwise_filter, pointwise_filter, stride=1):
    depthwise_output = depthwise_conv2d(input_matrix, depthwise_filter, stride)
    output_matrix = pointwise_conv2d(depthwise_output, pointwise_filter, stride)
    return output_matrix

### Сравнение результатов

In [10]:
input_matrix = np.random.rand(16, 256, 256)
conv_filter = np.random.rand(3, 3, 3)
depthwise_filter = np.random.rand(1, 3, 3)
pointwise_filter = np.random.rand(1, 16, 1, 1)
stride = 1

start_time = time.perf_counter()
conv_output = conv2d(input_matrix, conv_filter, stride=stride)
conv_time = time.perf_counter() - start_time

start_time = time.perf_counter()
separable_output = depthwise_separable_conv2d(input_matrix, depthwise_filter, pointwise_filter, stride=stride)
separable_time = time.perf_counter() - start_time

In [11]:
print("Direct convolutional 2d time:", conv_time)
print("Depthwise - separable convolution time:", separable_time)

Direct convolutional 2d time: 8.10463140000138
Depthwise - separable convolution time: 9.902731300000596


<br>Ожидаемого ускорения не просходит из-за того, что функция np.sum() в NumPy оптимизирована для быстрого вычисления суммы элементов массива<br>