From bee229da7d7e8ab5909f5ac039b3d13b9d055dc7 Mon Sep 17 00:00:00 2001 From: Daya Khudia Date: Mon, 15 Jul 2019 14:22:12 -0700 Subject: [PATCH 1/2] Assume input weights to be in transposed format for convUnified Differential Revision: D16186932 fbshipit-source-id: 244fc2d614aeb4d768201a553efeb3058fe9efeb --- bench/ConvUnifiedBenchmark.cc | 15 ++++++---- src/PackWeightsForConv.cc | 6 ++-- src/RefImplementations.cc | 54 ++++++++++++++++++++++++++--------- 3 files changed, 52 insertions(+), 23 deletions(-) diff --git a/bench/ConvUnifiedBenchmark.cc b/bench/ConvUnifiedBenchmark.cc index 59079c7b66..88f40f537a 100644 --- a/bench/ConvUnifiedBenchmark.cc +++ b/bench/ConvUnifiedBenchmark.cc @@ -42,9 +42,9 @@ vector> shapes_3d = { // MB, IC, OC, {IT, IH, IW}, G, {KT, KH, KW}, {stride_t, stride_h, stride_w}, // {pad_prev, pad_h_top, pad_w_left, pad_next, pad_h_bottom, pad_w_right} // Regular - conv_param_t<3>(1, 64, 64, {32, 56, 56}, 1, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}), + conv_param_t<3>(1, 64, 64, {8, 14, 14}, 1, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}), // Depthwise - conv_param_t<3>(1, 64, 64, {32, 56, 56}, 64, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}) + conv_param_t<3>(1, 64, 64, {8, 14, 14}, 64, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}) }; template @@ -109,6 +109,9 @@ void performance_test(const vector>& shapes) { aligned_vector Bint8( kernel_dim * conv_p.IC * (conv_p.OC / conv_p.G)); + aligned_vector Bint8_tr( + kernel_dim * conv_p.IC * (conv_p.OC / conv_p.G)); + int im_out_dim = accumulate( conv_p.OUT_DIM.begin(), conv_p.OUT_DIM.end(), 1, multiplies()); aligned_vector Cint32_ref(conv_p.MB * im_out_dim * conv_p.OC); @@ -131,14 +134,14 @@ void performance_test(const vector>& shapes) { randFill(C_multiplier, 0.1234f / 2, 0.1234f * 3 / 2); int32_t C_zero_point = 5; - aligned_vector Bfp32(Bint8.begin(), Bint8.end()); - // reference implementation + // conv_ref expects weights to be in G (R S C/G) K/G + transposeConvWeights(conv_p, Bint8.data(), Bint8_tr.data()); conv_ref( conv_p, Aint8.data(), Aint8_zero_point, - Bint8.data(), + Bint8_tr.data(), Cint32_ref.data()); // matrix dimensions after im2col @@ -161,7 +164,7 @@ void performance_test(const vector>& shapes) { KDimPerGroup, OC_per_G, OC_per_G, - Bint8.data() + g * KDimPerGroup * OC_per_G, + Bint8_tr.data() + g * KDimPerGroup * OC_per_G, Bint8_zero_point.data(), col_offsets.data() + g * OC_per_G, conv_p.OC); diff --git a/src/PackWeightsForConv.cc b/src/PackWeightsForConv.cc index c81114494d..78379af520 100644 --- a/src/PackWeightsForConv.cc +++ b/src/PackWeightsForConv.cc @@ -42,18 +42,18 @@ PackWeightsForConv::PackWeightsForConv( W_dw_3D_packed_ = nullptr; W_gconv_packed_ = std::make_shared>( - matrix_op_t::NoTranspose, conv_p, sdata, nullptr); + matrix_op_t::Transpose, conv_p, sdata, nullptr); break; } case optimized_conv_t::im2col: { int NDim = conv_p.OC / conv_p.G; int KDim = conv_p.K[0] * conv_p.K[1] * conv_p.IC; W_im2col_packed_ = std::make_shared>( - matrix_op_t::NoTranspose, + matrix_op_t::Transpose, KDim, NDim, sdata, - NDim, + KDim / conv_p.G, nullptr, conv_p.G, blocking_params); diff --git a/src/RefImplementations.cc b/src/RefImplementations.cc index b4b0c2b5e2..e3c0eac7c9 100644 --- a/src/RefImplementations.cc +++ b/src/RefImplementations.cc @@ -181,8 +181,7 @@ void cblas_sgemm_ref( int ldb, float beta, float* Cfp32, - int ldc - ) { + int ldc) { for (int i = 0; i < m; ++i) { for (int j = 0; j < n; ++j) { float sum = 0; @@ -204,7 +203,6 @@ void cblas_sgemm_ref( } } - void row_offsets_u8acc32_ref( int M, int K, @@ -542,21 +540,49 @@ void transposeConvWeights( const conv_param_t& conv_p, const std::int8_t* src, std::int8_t* dest) { - assert(SPATIAL_DIM == 2 && "Only 2D supported currently"); - int R = conv_p.K[0]; - int S = conv_p.K[1]; int G = conv_p.G; int IC_per_G = conv_p.IC / conv_p.G; int OC_per_G = conv_p.OC / conv_p.G; - // Transforms weights from G K/G (R S C/G) to G (R S C/G) K/G format. - for (int r = 0; r < R; ++r) { - for (int s = 0; s < S; ++s) { - for (int k = 0; k < OC_per_G; ++k) { - for (int g = 0; g < G; ++g) { - for (int c = 0; c < IC_per_G; ++c) { - dest[(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k] = - src[(((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c]; + assert( + (SPATIAL_DIM == 3 || SPATIAL_DIM == 2) && + "Only 2D and 3D convolutions are supported"); + if (SPATIAL_DIM == 2) { + int R = conv_p.K[0]; + int S = conv_p.K[1]; + // Transforms weights from G K/G (R S C/G) to G (R S C/G) K/G format. + for (int r = 0; r < R; ++r) { + for (int s = 0; s < S; ++s) { + for (int k = 0; k < OC_per_G; ++k) { + for (int g = 0; g < G; ++g) { + for (int c = 0; c < IC_per_G; ++c) { + dest[(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k] = + src[(((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c]; + } + } + } + } + } + } else { + // Transforms weights from G K/G (T R S C/G) to G (T R S C/G) K/G format. + int T = conv_p.K[0]; + int R = conv_p.K[1]; + int S = conv_p.K[2]; + for (int t = 0; t < T; ++t) { + for (int r = 0; r < R; ++r) { + for (int s = 0; s < S; ++s) { + for (int k = 0; k < OC_per_G; ++k) { + for (int g = 0; g < G; ++g) { + for (int c = 0; c < IC_per_G; ++c) { + dest + [((((g * T + t) * R + r) * S + s) * IC_per_G + c) * + OC_per_G + + k] = + src[((((g * OC_per_G + k) * T + t) * R + r) * S + s) * + IC_per_G + + c]; + } + } } } } From 2346ccb9b6456cb9a0a35fc14b4248302674d07e Mon Sep 17 00:00:00 2001 From: Daya Khudia Date: Mon, 15 Jul 2019 14:22:54 -0700 Subject: [PATCH 2/2] unpack through unified convolution interface (#105) Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/105 Support for calling unpack using unified interface for packing convolution weights Reviewed By: jianyuh Differential Revision: D16190534 fbshipit-source-id: 28e1b95c7642c1cf9ed3d8935f56c740f9b44bcd --- include/fbgemm/Fbgemm.h | 6 ++ src/PackWeightsForConv.cc | 15 +++++ .../{UniConvPackingTest.cc => UniConvTest.cc} | 63 ++++++++++++++++++- 3 files changed, 81 insertions(+), 3 deletions(-) rename test/{UniConvPackingTest.cc => UniConvTest.cc} (75%) diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h index 9ee25b5824..302af516f2 100644 --- a/include/fbgemm/Fbgemm.h +++ b/include/fbgemm/Fbgemm.h @@ -597,6 +597,12 @@ class FBGEMM_API PackWeightsForConv { return W_gconv_packed_; } + /** + * @brief Unpack packed matric into origin_buf (Used for the serialization to + * recover weight matrix). + */ + void unpack(T* origin_buf); + private: // Packed weights if we use im2col based convolution implementation std::shared_ptr> W_im2col_packed_; diff --git a/src/PackWeightsForConv.cc b/src/PackWeightsForConv.cc index 78379af520..e16843c89a 100644 --- a/src/PackWeightsForConv.cc +++ b/src/PackWeightsForConv.cc @@ -65,6 +65,21 @@ PackWeightsForConv::PackWeightsForConv( } // switch } +template +void PackWeightsForConv::unpack(T* origin_buf) { + if (W_dw_2D_packed_) { + W_dw_2D_packed_->unpack(origin_buf); + } else if (W_dw_3D_packed_) { + W_dw_3D_packed_->unpack(origin_buf); + } else if (W_gconv_packed_) { + W_gconv_packed_->unpack(origin_buf); + } else if (W_im2col_packed_) { + W_im2col_packed_->unpack(origin_buf); + } else { + assert(false && "At least one packed weights object should exist"); + } +} + template class PackWeightsForConv<2, int8_t, int32_t>; template class PackWeightsForConv<3, int8_t, int32_t>; diff --git a/test/UniConvPackingTest.cc b/test/UniConvTest.cc similarity index 75% rename from test/UniConvPackingTest.cc rename to test/UniConvTest.cc index 77552af0df..2b110dde73 100644 --- a/test/UniConvPackingTest.cc +++ b/test/UniConvTest.cc @@ -23,7 +23,7 @@ using namespace fbgemm; namespace { // tuple represents MB, IC, OC, IT, IH, IW, KH/KW, stride, pad -class convPackingTest +class uniConvTest : public testing::TestWithParam< tuple> {}; @@ -31,7 +31,7 @@ class convPackingTest INSTANTIATE_TEST_CASE_P( InstantiationName, - convPackingTest, + uniConvTest, ::testing::Combine( ::testing::ValuesIn({1, 2}), // MB ::testing::ValuesIn({16, 32}), // IC @@ -47,7 +47,7 @@ INSTANTIATE_TEST_CASE_P( /** * Test for conv packing */ -TEST_P(convPackingTest, packingTest) { +TEST_P(uniConvTest, packingTest) { int MB, IC, OC, IT, IH, IW, G, kernel, stride, pad; tie(MB, IC, OC, IT, IH, IW, G, kernel, stride, pad) = GetParam(); @@ -146,3 +146,60 @@ TEST_P(convPackingTest, packingTest) { } } } + +/** + * Test for packing/unpacking + */ +TEST_P(uniConvTest, packUnpackTest) { + int MB, IC, OC, IT, IH, IW, G, kernel, stride, pad; + tie(MB, IC, OC, IT, IH, IW, G, kernel, stride, pad) = GetParam(); + + conv_param_t<2> conv_p_2d( + MB, + IC, + OC, + {IH, IW}, + G, + {kernel, kernel}, + {stride, stride}, + {pad, pad, pad, pad}); + + int kernel_dim_2d = kernel * kernel; + + aligned_vector Bint8_2d( + kernel_dim_2d * conv_p_2d.IC * (conv_p_2d.OC / conv_p_2d.G)); + aligned_vector Bint8_2d_unpacked( + kernel_dim_2d * conv_p_2d.IC * (conv_p_2d.OC / conv_p_2d.G)); + + PackWeightsForConv<2> packedB_2D(conv_p_2d, Bint8_2d.data()); + + packedB_2D.unpack(Bint8_2d_unpacked.data()); + + ASSERT_EQ(Bint8_2d, Bint8_2d_unpacked) + << "Original and unpacked data elements are not the same [2D]"; + + conv_param_t<3> conv_p_3d( + MB, + IC, + OC, + {IT, IH, IW}, + G, + {kernel, kernel, kernel}, + {stride, stride, stride}, + {pad, pad, pad, pad, pad, pad}); + + int kernel_dim_3d = kernel * kernel * kernel; + + aligned_vector Bint8_3d( + kernel_dim_3d * conv_p_3d.IC * (conv_p_3d.OC / conv_p_3d.G)); + + aligned_vector Bint8_3d_unpacked( + kernel_dim_3d * conv_p_3d.IC * (conv_p_3d.OC / conv_p_3d.G)); + + PackWeightsForConv<3> packedB_3D(conv_p_3d, Bint8_3d.data()); + + packedB_3D.unpack(Bint8_3d_unpacked.data()); + + ASSERT_EQ(Bint8_3d, Bint8_3d_unpacked) + << "Original and unpacked data elements are not the same [3D]"; +}