Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions bench/ConvUnifiedBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ vector<conv_param_t<3>> shapes_3d = {
// MB, IC, OC, {IT, IH, IW}, G, {KT, KH, KW}, {stride_t, stride_h, stride_w},
// {pad_prev, pad_h_top, pad_w_left, pad_next, pad_h_bottom, pad_w_right}
// Regular
conv_param_t<3>(1, 64, 64, {32, 56, 56}, 1, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}),
conv_param_t<3>(1, 64, 64, {8, 14, 14}, 1, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}),
// Depthwise
conv_param_t<3>(1, 64, 64, {32, 56, 56}, 64, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1})
conv_param_t<3>(1, 64, 64, {8, 14, 14}, 64, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1})
};

template <int SPATIAL_DIM, typename Acc_t>
Expand Down Expand Up @@ -109,6 +109,9 @@ void performance_test(const vector<conv_param_t<SPATIAL_DIM>>& shapes) {
aligned_vector<int8_t> Bint8(
kernel_dim * conv_p.IC * (conv_p.OC / conv_p.G));

aligned_vector<int8_t> Bint8_tr(
kernel_dim * conv_p.IC * (conv_p.OC / conv_p.G));

int im_out_dim = accumulate(
conv_p.OUT_DIM.begin(), conv_p.OUT_DIM.end(), 1, multiplies<int>());
aligned_vector<int32_t> Cint32_ref(conv_p.MB * im_out_dim * conv_p.OC);
Expand All @@ -131,14 +134,14 @@ void performance_test(const vector<conv_param_t<SPATIAL_DIM>>& shapes) {
randFill(C_multiplier, 0.1234f / 2, 0.1234f * 3 / 2);
int32_t C_zero_point = 5;

aligned_vector<float> Bfp32(Bint8.begin(), Bint8.end());

// reference implementation
// conv_ref expects weights to be in G (R S C/G) K/G
transposeConvWeights<SPATIAL_DIM>(conv_p, Bint8.data(), Bint8_tr.data());
conv_ref(
conv_p,
Aint8.data(),
Aint8_zero_point,
Bint8.data(),
Bint8_tr.data(),
Cint32_ref.data());

// matrix dimensions after im2col
Expand All @@ -161,7 +164,7 @@ void performance_test(const vector<conv_param_t<SPATIAL_DIM>>& shapes) {
KDimPerGroup,
OC_per_G,
OC_per_G,
Bint8.data() + g * KDimPerGroup * OC_per_G,
Bint8_tr.data() + g * KDimPerGroup * OC_per_G,
Bint8_zero_point.data(),
col_offsets.data() + g * OC_per_G,
conv_p.OC);
Expand Down
6 changes: 6 additions & 0 deletions include/fbgemm/Fbgemm.h
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,12 @@ class FBGEMM_API PackWeightsForConv {
return W_gconv_packed_;
}

/**
* @brief Unpack packed matric into origin_buf (Used for the serialization to
* recover weight matrix).
*/
void unpack(T* origin_buf);

private:
// Packed weights if we use im2col based convolution implementation
std::shared_ptr<PackBMatrix<T, accT>> W_im2col_packed_;
Expand Down
21 changes: 18 additions & 3 deletions src/PackWeightsForConv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,18 @@ PackWeightsForConv<SPATIAL_DIM, T, accT>::PackWeightsForConv(
W_dw_3D_packed_ = nullptr;
W_gconv_packed_ =
std::make_shared<PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>>(
matrix_op_t::NoTranspose, conv_p, sdata, nullptr);
matrix_op_t::Transpose, conv_p, sdata, nullptr);
break;
}
case optimized_conv_t::im2col: {
int NDim = conv_p.OC / conv_p.G;
int KDim = conv_p.K[0] * conv_p.K[1] * conv_p.IC;
W_im2col_packed_ = std::make_shared<PackBMatrix<T, accT>>(
matrix_op_t::NoTranspose,
matrix_op_t::Transpose,
KDim,
NDim,
sdata,
NDim,
KDim / conv_p.G,
nullptr,
conv_p.G,
blocking_params);
Expand All @@ -65,6 +65,21 @@ PackWeightsForConv<SPATIAL_DIM, T, accT>::PackWeightsForConv(
} // switch
}

template <int SPATIAL_DIM, typename T, typename accT>
void PackWeightsForConv<SPATIAL_DIM, T, accT>::unpack(T* origin_buf) {
if (W_dw_2D_packed_) {
W_dw_2D_packed_->unpack(origin_buf);
} else if (W_dw_3D_packed_) {
W_dw_3D_packed_->unpack(origin_buf);
} else if (W_gconv_packed_) {
W_gconv_packed_->unpack(origin_buf);
} else if (W_im2col_packed_) {
W_im2col_packed_->unpack(origin_buf);
} else {
assert(false && "At least one packed weights object should exist");
}
}

template class PackWeightsForConv<2, int8_t, int32_t>;
template class PackWeightsForConv<3, int8_t, int32_t>;

Expand Down
54 changes: 40 additions & 14 deletions src/RefImplementations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ void cblas_sgemm_ref(
int ldb,
float beta,
float* Cfp32,
int ldc
) {
int ldc) {
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
float sum = 0;
Expand All @@ -204,7 +203,6 @@ void cblas_sgemm_ref(
}
}


void row_offsets_u8acc32_ref(
int M,
int K,
Expand Down Expand Up @@ -542,21 +540,49 @@ void transposeConvWeights(
const conv_param_t<SPATIAL_DIM>& conv_p,
const std::int8_t* src,
std::int8_t* dest) {
assert(SPATIAL_DIM == 2 && "Only 2D supported currently");
int R = conv_p.K[0];
int S = conv_p.K[1];
int G = conv_p.G;
int IC_per_G = conv_p.IC / conv_p.G;
int OC_per_G = conv_p.OC / conv_p.G;

// Transforms weights from G K/G (R S C/G) to G (R S C/G) K/G format.
for (int r = 0; r < R; ++r) {
for (int s = 0; s < S; ++s) {
for (int k = 0; k < OC_per_G; ++k) {
for (int g = 0; g < G; ++g) {
for (int c = 0; c < IC_per_G; ++c) {
dest[(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k] =
src[(((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c];
assert(
(SPATIAL_DIM == 3 || SPATIAL_DIM == 2) &&
"Only 2D and 3D convolutions are supported");
if (SPATIAL_DIM == 2) {
int R = conv_p.K[0];
int S = conv_p.K[1];
// Transforms weights from G K/G (R S C/G) to G (R S C/G) K/G format.
for (int r = 0; r < R; ++r) {
for (int s = 0; s < S; ++s) {
for (int k = 0; k < OC_per_G; ++k) {
for (int g = 0; g < G; ++g) {
for (int c = 0; c < IC_per_G; ++c) {
dest[(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k] =
src[(((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c];
}
}
}
}
}
} else {
// Transforms weights from G K/G (T R S C/G) to G (T R S C/G) K/G format.
int T = conv_p.K[0];
int R = conv_p.K[1];
int S = conv_p.K[2];
for (int t = 0; t < T; ++t) {
for (int r = 0; r < R; ++r) {
for (int s = 0; s < S; ++s) {
for (int k = 0; k < OC_per_G; ++k) {
for (int g = 0; g < G; ++g) {
for (int c = 0; c < IC_per_G; ++c) {
dest
[((((g * T + t) * R + r) * S + s) * IC_per_G + c) *
OC_per_G +
k] =
src[((((g * OC_per_G + k) * T + t) * R + r) * S + s) *
IC_per_G +
c];
}
}
}
}
}
Expand Down
63 changes: 60 additions & 3 deletions test/UniConvPackingTest.cc → test/UniConvTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ using namespace fbgemm;
namespace {

// tuple represents MB, IC, OC, IT, IH, IW, KH/KW, stride, pad
class convPackingTest
class uniConvTest
: public testing::TestWithParam<
tuple<int, int, int, int, int, int, int, int, int, int>> {};

}; // namespace

INSTANTIATE_TEST_CASE_P(
InstantiationName,
convPackingTest,
uniConvTest,
::testing::Combine(
::testing::ValuesIn({1, 2}), // MB
::testing::ValuesIn({16, 32}), // IC
Expand All @@ -47,7 +47,7 @@ INSTANTIATE_TEST_CASE_P(
/**
* Test for conv packing
*/
TEST_P(convPackingTest, packingTest) {
TEST_P(uniConvTest, packingTest) {
int MB, IC, OC, IT, IH, IW, G, kernel, stride, pad;
tie(MB, IC, OC, IT, IH, IW, G, kernel, stride, pad) = GetParam();

Expand Down Expand Up @@ -146,3 +146,60 @@ TEST_P(convPackingTest, packingTest) {
}
}
}

/**
* Test for packing/unpacking
*/
TEST_P(uniConvTest, packUnpackTest) {
int MB, IC, OC, IT, IH, IW, G, kernel, stride, pad;
tie(MB, IC, OC, IT, IH, IW, G, kernel, stride, pad) = GetParam();

conv_param_t<2> conv_p_2d(
MB,
IC,
OC,
{IH, IW},
G,
{kernel, kernel},
{stride, stride},
{pad, pad, pad, pad});

int kernel_dim_2d = kernel * kernel;

aligned_vector<int8_t> Bint8_2d(
kernel_dim_2d * conv_p_2d.IC * (conv_p_2d.OC / conv_p_2d.G));
aligned_vector<int8_t> Bint8_2d_unpacked(
kernel_dim_2d * conv_p_2d.IC * (conv_p_2d.OC / conv_p_2d.G));

PackWeightsForConv<2> packedB_2D(conv_p_2d, Bint8_2d.data());

packedB_2D.unpack(Bint8_2d_unpacked.data());

ASSERT_EQ(Bint8_2d, Bint8_2d_unpacked)
<< "Original and unpacked data elements are not the same [2D]";

conv_param_t<3> conv_p_3d(
MB,
IC,
OC,
{IT, IH, IW},
G,
{kernel, kernel, kernel},
{stride, stride, stride},
{pad, pad, pad, pad, pad, pad});

int kernel_dim_3d = kernel * kernel * kernel;

aligned_vector<int8_t> Bint8_3d(
kernel_dim_3d * conv_p_3d.IC * (conv_p_3d.OC / conv_p_3d.G));

aligned_vector<int8_t> Bint8_3d_unpacked(
kernel_dim_3d * conv_p_3d.IC * (conv_p_3d.OC / conv_p_3d.G));

PackWeightsForConv<3> packedB_3D(conv_p_3d, Bint8_3d.data());

packedB_3D.unpack(Bint8_3d_unpacked.data());

ASSERT_EQ(Bint8_3d, Bint8_3d_unpacked)
<< "Original and unpacked data elements are not the same [3D]";
}