Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ jobs:
# Custom operator tests
PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
./cmake-out/backends/vulkan/test/custom_ops/q8csw_linear
./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d

nxp-build-test:
name: nxp-build-test
Expand Down
94 changes: 94 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/col2im.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

#define PRECISION ${PRECISION}
#define VEC4_T ${texel_load_type(DTYPE, OUTPUT_STORAGE)}
#define T ${texel_load_component_type(DTYPE, OUTPUT_STORAGE)}

$if OUTPUT_STORAGE == "buffer":
#define OUTPUT_BUFFER
$if INPUT_STORAGE == "buffer":
#define INPUT_BUFFER

#define TILE_M4 1
#define TILE_N4 1
#define TILE_K4 1

#define TILE_M 4
#define TILE_N 4
#define TILE_K 4

${define_required_extensions(DTYPE)}

layout(std430) buffer;

#include "conv2d_common.glslh"

${layout_declare_tensor(B, "w", "t_output", DTYPE, OUTPUT_STORAGE, is_scalar_array=False)}
${layout_declare_tensor(B, "r", "t_input", DTYPE, INPUT_STORAGE, is_scalar_array=False)}

// Sizes of the convolution output image
${layout_declare_ubo(B, "ivec4", "output_sizes")}
// Sizes of the convolution input image
${layout_declare_ubo(B, "ivec4", "input_sizes")}
// Sizes of the im2col matrix of the convolution output
${layout_declare_ubo(B, "ivec4", "matrix_sizes")}

${layout_declare_ubo(B, "Conv2DParams", "conv2d_params")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

#include "conv2d_fp_im2col_block_store.glslh"

#ifdef INPUT_BUFFER

void load_matrix_tile(
out FPOutTile tile,
const int n4,
const int m_start,
const int N4) {
[[unroll]] for (int m = 0; m < TILE_M; m++) {
tile.data[m][0] = t_input[(m_start + m) * N4 + n4];
}
}

#else // INPUT_TEXTURE

void load_matrix_tile(
out FPOutTile tile,
const int n4,
const int m_start,
const int N4) {
[[unroll]] for (int m = 0; m < TILE_M; m++) {
tile.data[m][0] = texelFetch(
t_input, ivec3(n4, m_start + m, 0), 0);
}
}

#endif // INPUT_BUFFER

void main() {
// Each thread loads and writes a 4 wide x 4 high block of the matrix
const int n4 = int(gl_GlobalInvocationID.x);
const int m4 = int(gl_GlobalInvocationID.y);

const int n = mul_4(n4);
const int m = mul_4(m4);

if (n >= matrix_sizes.x || m >= matrix_sizes.y) {
return;
}

FPOutTile tile;

const int N4 = div_4(matrix_sizes.x);
load_matrix_tile(tile, n4, m, N4);
write_im2col_tile_as_image(tile, n4, m);
}
19 changes: 19 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/col2im.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

col2im:
parameter_names_with_default_values:
DTYPE: float
OUTPUT_STORAGE: texture3d
INPUT_STORAGE: buffer
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
shader_variants:
- NAME: col2im_texture3d_buffer
- NAME: col2im_texture3d_texture3d
INPUT_STORAGE: texture3d
51 changes: 51 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/conv2d_common.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef CONV2D_COMMON_GLSLH
#define CONV2D_COMMON_GLSLH

#include "common.glslh"

struct Conv2DParams {
ivec2 kernel_size;
ivec2 stride;
ivec2 padding;
ivec2 dilation;
int groups;
int out_channels_per_group;
int in_channels_per_group;
int logical_K_per_group;
int K_per_group;
int K4_per_group;
int logical_K;
int K;
int K4;
};

#ifdef DEBUG_MODE

void printConv2DParams(const Conv2DParams params) {
debugPrintfEXT("Conv2DParams: \\n");
debugPrintfEXT(
" kernel_size: %d, %d\\n", params.kernel_size.x, params.kernel_size.y);
debugPrintfEXT(" stride: %d, %d\\n", params.stride.x, params.stride.y);
debugPrintfEXT(" padding: %d, %d\\n", params.padding.x, params.padding.y);
debugPrintfEXT(" dilation: %d, %d\\n", params.dilation.x, params.dilation.y);
debugPrintfEXT(" groups: %d\\n", params.groups);
debugPrintfEXT(
" out_channels_per_group: %d\\n", params.out_channels_per_group);
debugPrintfEXT(
" in_channels_per_group: %d\\n", params.in_channels_per_group);
debugPrintfEXT(" logical_K_per_group: %d\\n", params.logical_K_per_group);
debugPrintfEXT(" K_per_group: %d\\n", params.K_per_group);
debugPrintfEXT(" K4_per_group: %d\\n", params.K4_per_group);
}

#endif // DEBUG_MODE

#endif // CONV2D_COMMON_GLSLH
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef CONV2D_FP_IM2COL_BLOCK
#define CONV2D_FP_IM2COL_BLOCK

/*
* Defines utilities to convert between (col, row) indices of an im2col matrix
* and 4-dimension tensor indices of image tensors.
*
* Requires:
* - output_sizes to be defined in the shader layout, corresponding to the sizes
* of the output image of the convolution op.
* - image_sizes to be defined in the shader layout, corresponding to the sizes
* of the input image of the convolution op.
* - conv2d_params to be defined in the shader layout
*/

#extension GL_EXT_control_flow_attributes : require

#include "common.glslh"
#include "conv2d_common.glslh"

struct Im2ColMatrixIdx {
int row;
int col;
// Relevant for grouped convolution. This indicates the column index relative
// to the first column in the group.
int col_idx_in_group;
int group_idx;
};

void unwrap_m(out TensorIndex4D out_tidx_base, const int m) {
out_tidx_base.data[3] = m / (output_sizes.y * output_sizes.x);
out_tidx_base.data[1] = (m / output_sizes.x) % output_sizes.y;
out_tidx_base.data[0] = m % output_sizes.x;

// Initialize channels to 0; assume it will be set later on
out_tidx_base.data[2] = 0;
}

void im2col_tidx_to_output_tidx(
out TensorIndex4D output_tidx,
const Im2ColMatrixIdx im2col_tidx) {
unwrap_m(output_tidx, im2col_tidx.row);
// Set channels
output_tidx.data.z = im2col_tidx.col;
}

/*
* Converts im2col matrix position to corresponding 4D tensor index, accounting
* for grouped convolutions. The conversion should ensure that all data within
* the same group occupy a contiguous block in memory.
*/
void im2col_idx_to_input_tidx(
out TensorIndex4D input_tidx,
const Im2ColMatrixIdx im2col_idx) {
TensorIndex4D output_tidx;
unwrap_m(output_tidx, im2col_idx.row);

const int in_channels_per_group = conv2d_params.in_channels_per_group;
// Determine the corresponding position within the convolution window based
// on the col index (more specifically, the col index within the group)
const int channel_within_group =
im2col_idx.col_idx_in_group % in_channels_per_group;
const int kernel_x = (im2col_idx.col_idx_in_group / in_channels_per_group) %
conv2d_params.kernel_size.x;
const int kernel_y = im2col_idx.col_idx_in_group /
(in_channels_per_group * conv2d_params.kernel_size.x);

// Calculate the actual input channel index
const int channel_idx =
im2col_idx.group_idx * conv2d_params.in_channels_per_group +
channel_within_group;

// Calculate corresponding input coordinates based on output position
// associated with the row index.
const int input_y = int(output_tidx.data.y * conv2d_params.stride.y) -
int(conv2d_params.padding.y) + int(kernel_y * conv2d_params.dilation.y);
const int input_x = int(output_tidx.data.x * conv2d_params.stride.x) -
int(conv2d_params.padding.x) + int(kernel_x * conv2d_params.dilation.x);

input_tidx.data = ivec4(input_x, input_y, channel_idx, output_tidx.data.w);
}

// 4x4 block of the im2col matrix
struct FPIm2ColBlock {
VEC4_T data[4];
};

#endif // CONV2D_FP_IM2COL_BLOCK
Loading
Loading