Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion bench/Im2ColFusedRequantizeAcc16Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ void performance_test() {
KDim,
nullptr,
1,
Aint8_zero_point,
row_offset_buf.data());

fbgemmPacked(
Expand Down
1 change: 0 additions & 1 deletion bench/Im2ColFusedRequantizeAcc32Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ void performance_test() {
KDim,
nullptr,
1,
Aint8_zero_point,
row_offset_buf.data());

fbgemmPacked(
Expand Down
3 changes: 1 addition & 2 deletions bench/PackedFloatInOutBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,7 @@ void performance_test() {
Bint8.data(),
n,
nullptr,
1,
Bint8_zero_point);
1);

DoNothing<float, float> doNothingObj{};
ReQuantizeForFloat<false> outputProcObj(
Expand Down
4 changes: 1 addition & 3 deletions bench/PackedRequantizeAcc16Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,7 @@ void performance_test() {
Aint8.data(),
k,
nullptr,
1,
Aint8_zero_point);
1);
PackAWithRowOffset<uint8_t, int16_t> packAWithRowOffset(
matrix_op_t::NoTranspose,
m,
Expand All @@ -319,7 +318,6 @@ void performance_test() {
k,
nullptr,
1,
Aint8_zero_point,
row_offset_buf.data());

// no-op output process objects
Expand Down
4 changes: 1 addition & 3 deletions bench/PackedRequantizeAcc32Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,7 @@ void performance_test() {
Bint8.data(),
n,
nullptr,
1,
Bint8_zero_point);
1);

ttot = 0.0;
runType = "FBGEMM_i8_acc32";
Expand Down Expand Up @@ -261,7 +260,6 @@ void performance_test() {
k,
nullptr,
1,
Aint8_zero_point,
row_offset_buf.data());

DoNothing<> doNothingObj{};
Expand Down
23 changes: 6 additions & 17 deletions include/fbgemm/Fbgemm.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,12 @@ class PackMatrix {
* dimension A.rows by B.cols*B.groups .
* A.groups must be same as B.groups, A.groups must divide
* A.cols, and B.groups must divide B.rows and C.cols.
* @param zero_pt the quantized value that maps to 0.0f floating-point number.
*/
PackMatrix(
std::int32_t rows,
std::int32_t cols,
inpType* pmat,
int groups = 1,
std::int32_t zero_pt = 0);
int groups = 1);

/**
* @return true usually when the matrix is constant matrix (e.g., weight
Expand Down Expand Up @@ -276,13 +274,6 @@ class PackMatrix {
: (numPackedCols() % blockColSize());
}

/**
* @return the quantized value that maps to 0.0f floating-point number
*/
std::int32_t zeroPoint() const {
return zero_pt_;
}

inpType* buf_;
std::int32_t brow_; ///< the number of rows in each block
std::int32_t bcol_; ///< the number of columns in each block
Expand All @@ -293,7 +284,6 @@ class PackMatrix {
private:
std::int32_t nrows_, ncols_;
int G_;
std::int32_t zero_pt_;
block_type_t packedBlock_; ///< The block in the source matrix just packed
std::int32_t last_brow_, last_bcol_;
};
Expand All @@ -320,8 +310,7 @@ class PackAMatrix final : public PackMatrix<PackAMatrix<T, accT>, T, accT> {
const inpType* smat,
std::int32_t ld,
inpType* pmat = nullptr,
int groups = 1,
std::int32_t zero_pt = 0);
int groups = 1);

/**
* Activation matrices are not constant so cannot amortize the cost of
Expand Down Expand Up @@ -391,8 +380,7 @@ class PackBMatrix final : public PackMatrix<PackBMatrix<T, accT>, T, accT> {
const inpType* smat,
std::int32_t ld,
inpType* pmat = nullptr,
int groups = 1,
std::int32_t zero_pt = 0);
int groups = 1);

/**
* Weight matrices are usually constant so worth pre-packing.
Expand Down Expand Up @@ -468,7 +456,7 @@ class PackAWithIm2Col

PackAWithIm2Col() = delete; // no default constructor
/**
* TODO: Currently only groups == 1 supported
* @param zero_pt the quantized value that maps to 0.0f floating-point number.
*/
PackAWithIm2Col(
const conv_param_t<SPATIAL_DIM>& conv_param,
Expand Down Expand Up @@ -523,6 +511,7 @@ class PackAWithIm2Col
private:
const conv_param_t<SPATIAL_DIM>& conv_p_;
const T* sdata_;
std::int32_t zero_pt_;
std::int32_t* row_offset_;
bool rowOffsetAllocatedHere;
std::int32_t row_interleave_B_;
Expand Down Expand Up @@ -551,7 +540,6 @@ class PackAWithRowOffset final
std::uint32_t ld,
inpType* pmat = nullptr,
int groups = 1,
std::int32_t zero_pt = 0,
std::int32_t* row_offset = nullptr);

/**
Expand Down Expand Up @@ -693,6 +681,7 @@ class PackAWithQuantRowOffset final
const float* smat_;
std::int32_t ld_;
float scale_;
std::int32_t zero_pt_;
std::int32_t* row_offset_;
bool rowOffsetAllocatedHere;
std::int32_t row_interleave_B_;
Expand Down
2 changes: 0 additions & 2 deletions include/fbgemm/QuantUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
#include <cstdint>
#include <limits>

#include <cpuinfo.h>

namespace fbgemm {

// Copied from gemmlowp
Expand Down
10 changes: 2 additions & 8 deletions src/PackAMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,8 @@ PackAMatrix<T, accT>::PackAMatrix(
const T* smat,
int32_t ld,
inpType* pmat,
int groups,
std::int32_t zero_pt)
: PackMatrix<PackAMatrix<T, accT>, T, accT>(
nRow,
nCol,
pmat,
groups,
zero_pt),
int groups)
: PackMatrix<PackAMatrix<T, accT>, T, accT>(nRow, nCol, pmat, groups),
trans_(trans),
smat_(smat),
ld_(ld) {
Expand Down
10 changes: 5 additions & 5 deletions src/PackAWithIm2Col.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ PackAWithIm2Col<T, accT, SPATIAL_DIM>::PackAWithIm2Col(
std::multiplies<int>()) *
conv_p.IC,
pmat,
conv_p.G,
zero_pt),
conv_p.G),
conv_p_(conv_p),
sdata_(sdata) {
sdata_(sdata),
zero_pt_(zero_pt) {
static_assert(
SPATIAL_DIM == 2 || SPATIAL_DIM == 3, "unsupported conv dimension ");
if (cpuinfo_has_x86_avx512f()) {
Expand Down Expand Up @@ -187,7 +187,7 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
std::memset(
out + (i - block.row_start) * BaseType::blockColSize() +
(j_blk_start - block.col_start),
BaseType::zeroPoint(),
zero_pt_,
sizeof(T) * (j_blk_end - j_blk_start));
} else {
std::memcpy(
Expand Down Expand Up @@ -239,7 +239,7 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
&out
[(i - block.row_start) * BaseType::blockColSize() +
(j_blk_start - block.col_start)],
BaseType::zeroPoint(),
zero_pt_,
sizeof(T) * (j_blk_end - j_blk_start));
} else {
std::memcpy(
Expand Down
8 changes: 4 additions & 4 deletions src/PackAWithQuantRowOffset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ PackAWithQuantRowOffset<T, accT>::PackAWithQuantRowOffset(
nRow,
nCol,
pmat,
groups,
zero_pt),
groups),
trans_(trans),
smat_(smat),
ld_(ld),
scale_(scale),
zero_pt_(zero_pt),
row_offset_(row_offset) {
rowOffsetAllocatedHere = false;

Expand Down Expand Up @@ -158,7 +158,7 @@ void PackAWithQuantRowOffset<T, accT>::pack(const block_type_t& block) {
for (; j < block.col_size / VLEN * VLEN; j += VLEN) {
__m256 val_v = _mm256_loadu_ps(smat_temp + i * ld_temp + j);
__m256 transformed_v = _mm256_fmadd_ps(
val_v, inverse_scale_v, _mm256_set1_ps(BaseType::zeroPoint()));
val_v, inverse_scale_v, _mm256_set1_ps(zero_pt_));
__m256 clipped_v = _mm256_max_ps(
_mm256_set1_ps(std::numeric_limits<uint8_t>::min()),
_mm256_min_ps(
Expand All @@ -180,7 +180,7 @@ void PackAWithQuantRowOffset<T, accT>::pack(const block_type_t& block) {
#endif
for (; j < block.col_size; ++j) {
float val = smat_temp[i * ld_temp + j];
float transformed = val / scale_ + BaseType::zeroPoint();
float transformed = val / scale_ + zero_pt_;
float clipped = std::min<float>(
std::max<float>(transformed, std::numeric_limits<uint8_t>::min()),
std::numeric_limits<uint8_t>::max());
Expand Down
4 changes: 1 addition & 3 deletions src/PackAWithRowOffset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,12 @@ PackAWithRowOffset<T, accT>::PackAWithRowOffset(
uint32_t ld,
inpType* pmat,
int groups,
int32_t zero_pt,
int32_t* row_offset)
: PackMatrix<PackAWithRowOffset<T, accT>, T, accT>(
nRow,
nCol,
pmat,
groups,
zero_pt),
groups),
trans_(trans),
smat_(smat),
ld_(ld),
Expand Down
13 changes: 3 additions & 10 deletions src/PackBMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,8 @@ PackBMatrix<T, accT>::PackBMatrix(
const T* smat,
int32_t ld,
inpType* pmat,
int groups,
std::int32_t zero_pt)
: PackMatrix<PackBMatrix<T, accT>, T, accT>(
nRow,
nCol,
pmat,
groups,
zero_pt),
int groups)
: PackMatrix<PackBMatrix<T, accT>, T, accT>(nRow, nCol, pmat, groups),
trans_(trans),
smat_(smat),
ld_(ld) {
Expand Down Expand Up @@ -162,8 +156,7 @@ bool PackBMatrix<T, accT>::metaEquals(const PackBMatrix<T, accT>& that) const {
BaseType::blockCols() != that.blockCols() ||
BaseType::numPackedRows() != that.numPackedRows() ||
BaseType::numPackedCols() != that.numPackedCols() ||
BaseType::zeroPoint() != that.zeroPoint() || trans_ != that.trans_ ||
BaseType::numGroups() != that.numGroups() ||
trans_ != that.trans_ || BaseType::numGroups() != that.numGroups() ||
row_interleave_ != that.row_interleave_) {
return false;
}
Expand Down
5 changes: 2 additions & 3 deletions src/PackMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ PackMatrix<PT, inpType, accType>::PackMatrix(
int32_t rows,
int32_t cols,
inpType* buf,
int groups,
int32_t zero_pt)
: buf_(buf), nrows_(rows), ncols_(cols), G_(groups), zero_pt_(zero_pt) {
int groups)
: buf_(buf), nrows_(rows), ncols_(cols), G_(groups) {
bufAllocatedHere_ = false;
if (!cpuinfo_initialize()) {
throw std::runtime_error("Failed to initialize cpuinfo!");
Expand Down
1 change: 1 addition & 0 deletions src/QuantUtils.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "fbgemm/QuantUtils.h"

#include <cpuinfo.h>
#include <immintrin.h>

#include "fbgemm/Fbgemm.h"
Expand Down
3 changes: 1 addition & 2 deletions test/Im2ColFusedRequantizeTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,7 @@ static void Im2col3DTest() {
Bint8.data(),
NDim,
nullptr,
conv_p.G,
Bint8_zero_point);
conv_p.G);

#ifdef _OPENMP
#pragma omp parallel
Expand Down
12 changes: 3 additions & 9 deletions test/PackedRequantizeAcc16Test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,7 @@ TEST_P(fbgemmu8s8acc16test, Test) {
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k : n,
nullptr,
groups,
Bint8_zero_point);
groups);

#ifdef _OPENMP
#pragma omp parallel
Expand All @@ -218,7 +217,6 @@ TEST_P(fbgemmu8s8acc16test, Test) {
k,
nullptr,
groups,
Aint8_zero_point,
row_offset_buf.data());

DoNothing<> doNothingObj{};
Expand Down Expand Up @@ -446,8 +444,7 @@ TEST_P(fbgemmu8s8acc16test, SpMDMTest) {
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k : n,
nullptr,
groups,
Bint8_zero_point);
groups);

#ifdef _OPENMP
#pragma omp parallel
Expand All @@ -465,7 +462,6 @@ TEST_P(fbgemmu8s8acc16test, SpMDMTest) {
k,
nullptr,
groups,
Aint8_zero_point,
row_offset_buf.data());

// spmdm -> requantization -> nothing
Expand Down Expand Up @@ -634,8 +630,7 @@ TEST_P(fbgemmu8s8acc16test, NoRequantizeTest) {
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k : n,
nullptr,
groups,
Bint8_zero_point);
groups);

#ifdef _OPENMP
#pragma omp parallel
Expand All @@ -653,7 +648,6 @@ TEST_P(fbgemmu8s8acc16test, NoRequantizeTest) {
k,
nullptr,
groups,
Aint8_zero_point,
row_offset_buf.data());

// DoNothing<> doNothingObj{};
Expand Down
7 changes: 2 additions & 5 deletions test/PackedRequantizeTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,7 @@ TEST_P(fbgemmu8s8acc32test, Test) {
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k : n,
nullptr,
groups,
Bint8_zero_point);
groups);

#ifdef _OPENMP
#pragma omp parallel
Expand All @@ -238,7 +237,6 @@ TEST_P(fbgemmu8s8acc32test, Test) {
k,
nullptr,
groups,
Aint8_zero_point,
row_offset_buf.data());

DoNothing<> doNothingObj{};
Expand Down Expand Up @@ -400,8 +398,7 @@ TEST_P(fbgemmu8s8acc32test, TestFloatInputOutput) {
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k : n,
nullptr,
groups,
Bint8_zero_point);
groups);

#ifdef _OPENMP
#pragma omp parallel
Expand Down