Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sgemm和openblas结果不一致,导致推理结果相差很远 #14

Closed
kismit opened this issue Aug 22, 2022 · 2 comments
Closed

sgemm和openblas结果不一致,导致推理结果相差很远 #14

kismit opened this issue Aug 22, 2022 · 2 comments

Comments

@kismit
Copy link

kismit commented Aug 22, 2022

如题, 使用EMLL sgemm计算的结果和Openblas cblas_sgemm计算的结果有较小差异,但是会导致模型推理结果不正常, 然而使用EMLL 动态量化再s8s32gemm计算最后反量化的方式, 推理结果是正常的, 这两者间的差异是什么?
代码如下:

  enum QuantType
  {
    NO_QUANT = 0,
    SYMMETRIC,
    ASYMMETRIC
  };

inline int emll_s8s32gemm(bool transpose_a, bool transpose_b,
                            dim_t m, dim_t n, dim_t k,
                            const int8_t *a,
                            const int8_t *b,
                            float beta,
                            int32_t *c)
  {
    int status;
    if (!transpose_a && !transpose_b)
    {
      status = s8s32gemm(0, 0, b, a, c, n, m, k, beta, 0);
    }
    else if (transpose_a && !transpose_b)
    {
      status = s8s32gemm(0, 1, b, a, c, n, m, k, beta, 0);
    }
    else if (!transpose_a && transpose_b)
    {
      status = s8s32gemm(1, 0, b, a, c, n, m, k, beta, 0);
    }
    else // transpose_a && transpose_b
    {
      status = s8s32gemm(1, 1, b, a, c, n, m, k, beta, 0);
    }

    return status;
  }

  inline int emll_u8u32gemm(bool transpose_a, bool transpose_b,
                            dim_t m, dim_t n, dim_t k,
                            const uint8_t *a,
                            const uint8_t *b,
                            float beta,
                            uint32_t *c)
  {
    int status;
    if (!transpose_a && !transpose_b)
    {
      status = u8u32gemm(0, 0, b, a, c, n, m, k, beta, 0);
    }
    else if (transpose_a && !transpose_b)
    {
      status = u8u32gemm(0, 1, b, a, c, n, m, k, beta, 0);
    }
    else if (!transpose_a && transpose_b)
    {
      status = u8u32gemm(1, 0, b, a, c, n, m, k, beta, 0);
    }
    else // transpose_a && transpose_b
    {
      status = u8u32gemm(1, 1, b, a, c, n, m, k, beta, 0);
    }

    return status;
  }

  int emll_sgemm(bool transpose_a, bool transpose_b,
                 dim_t m, dim_t n, dim_t k,
                 float alpha,
                 const float *a,
                 const float *b,
                 float beta,
                 float *c,
                 QuantType quant_type)
  {
    int status;

    float *a_f = nullptr;
    if (alpha != 1.0f)
    {
      a_f = static_cast<float *>(allocator.allocate(m * k * sizeof(float)));
      cpu::parallel_for(0, m * k, cpu::GRAIN_SIZE / 2, [&](dim_t begin, dim_t end) {
        for (dim_t i = begin; i < end; ++i)
        {
          a_f[i] = static_cast<float>(alpha * a[i]);
        }
      });
    }

    if (quant_type == QuantType::NO_QUANT) // 这种方法结果不对!!!
    {
      if (!transpose_a && !transpose_b)
      {
        // std::cout << "!!! !transpose_a && !transpose_b" << std::endl;
        if (a_f != nullptr)
        {
          status = sgemm(0, 0, b, a_f, c, n, m, k, beta, 0);
        }
        else
        {
          status = sgemm(0, 0, b, a, c, n, m, k, beta, 0);
        }
      }
      else if (transpose_a && !transpose_b)
      {
        // std::cout << "@@@ transpose_a && !transpose_b" << std::endl;
        if (a_f != nullptr)
        {
          status = sgemm(0, 1, b, a_f, c, n, m, k, beta, 0);
        }
        else
        {
          status = sgemm(0, 1, b, a, c, n, m, k, beta, 0);
        }
      }
      else if (!transpose_a && transpose_b)
      {
        // std::cout << "### !transpose_a && transpose_b" << std::endl;
        if (a_f != nullptr)
        {
          status = sgemm(1, 0, b, a_f, c, n, m, k, beta, 0);
        }
        else
        {
          status = sgemm(1, 0, b, a, c, n, m, k, beta, 0);
        }
      }
      else // transpose_a && transpose_b
      {
        // std::cout << "$$$ transpose_a && transpose_b" << std::endl;
        if (a_f != nullptr)
        {
          status = sgemm(1, 1, b, a_f, c, n, m, k, beta, 0);
        }
        else
        {
          status = sgemm(1, 1, b, a, c, n, m, k, beta, 0);
        }
      }
    }
    else if (quant_type == QuantType::SYMMETRIC)
    {
      int8_t *const a_s = static_cast<int8_t *>(allocator.allocate(m * k * sizeof(int8_t)));
      int8_t *const b_s = static_cast<int8_t *>(allocator.allocate(n * k * sizeof(int8_t)));
      int32_t *const c_qs = static_cast<int32_t *>(allocator.allocate(m * n * sizeof(int32_t)));

      float scale_a, scale_b;

      if (a_f != nullptr)
      {
        quantize_symmetric_f32_s8(a_f, a_s, &scale_a, m * k, 0, -1);
      }
      else
      {
        quantize_symmetric_f32_s8(a, a_s, &scale_a, m * k, 0, -1);
      }

      quantize_symmetric_f32_s8(b, b_s, &scale_b, n * k, 0, -1);

      status = emll_s8s32gemm(transpose_a, transpose_b,
                              m, n, k, a_s, b_s, beta, c_qs);
      if (status != 0)
      {
        fprintf(stderr, "u8u32gemm returns error code %d\n", status);
      }
      else
      {
        dequantize_symmetric_f32_s32(c_qs, c, scale_a * scale_b, m * n);
      }

      allocator.free(a_s);
      allocator.free(b_s);
      allocator.free(c_qs);
    }
    else // ASYMMETRIC  
    {
      uint8_t *const a_u = static_cast<uint8_t *>(allocator.allocate(m * k * sizeof(uint8_t)));
      uint8_t *const b_u = static_cast<uint8_t *>(allocator.allocate(n * k * sizeof(uint8_t)));
      int32_t *const c_qu = static_cast<int32_t *>(allocator.allocate(m * n * sizeof(int32_t)));

      uint32_t *const a_sum = (uint32_t *)(allocator.allocate(m * sizeof(uint32_t)));
      uint32_t *const b_sum = (uint32_t *)(allocator.allocate(n * sizeof(uint32_t)));

      float scale_a, scale_b;
      uint8_t zero_point_a, zero_point_b;

      if (a_f != nullptr)
      {
        quantize_asymmetric_f32_u8(a_f, a_u, &zero_point_a, &scale_a, m * k, 0, -1);
      }
      else
      {
        quantize_asymmetric_f32_u8(a, a_u, &zero_point_a, &scale_a, m * k, 0, -1);
      }

      quantize_asymmetric_f32_u8(b, b_u, &zero_point_b, &scale_b, n * k, 0, -1);

      status = emll_u8u32gemm(transpose_a, transpose_b,
                              m, n, k, a_u, b_u, beta, (uint32_t*)c_qu);

      if (status != 0)
      {
        fprintf(stderr, "u8u32gemm returns error code %d\n", status);
      }
      else
      {
        /* sum row/col of source matrices (along K dim) */
        u8u32_sum(a_u, (uint32_t*)(a_sum), m, k, 0);
        u8u32_sum(b_u, (uint32_t*)(b_sum), k, n, 1);
        /* bias the result of 8->32 bit GEMM */
        bias_int32_t(c_qu,
                     (int32_t)zero_point_a * (int32_t)zero_point_b * (int32_t)k,
                     (int32_t *)(a_sum), -(int32_t)zero_point_b,
                     (int32_t *)(b_sum), -(int32_t)zero_point_a, m, n);
        /* dequantitize the result */
        /* dequant(input_addr, output_addr, scale, array_length) */
        dequantize_symmetric_f32_s32(c_qu, c, scale_a * scale_b, m * n);
      }

      allocator.free(a_u);
      allocator.free(b_u);
      allocator.free(c_qu);
      allocator.free(a_sum);
      allocator.free(b_sum);
    }

    if (a_f != nullptr)
    {
      allocator.free(a_f);
    }

    return status;
  }


@netease-youdao
Copy link
Owner

netease-youdao commented Sep 5, 2022

您好,可以先看一下emll_sgemm后的c元素中有没有NAN,如果有的话可能是传入的参数c指向了未初始化的内存(其中元素被解析为浮点数时可能出现INF或NAN,和零相乘得到NAN,则乘法结束后c的部分元素成为NAN),我们会在后续的版本修复这个问题。如需尽快解决,可以尝试在调用emll_sgemm之前对目标矩阵进行全部填零。

std::fill(c, c + m * n, 0.0f);

若不是以上的问题,那请问方便给一下不量化和量化的结果之间的相似度(余弦和欧式)吗?

@kismit
Copy link
Author

kismit commented Sep 9, 2022

根据这个修改问题已解决,非常感谢!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants