Skip to content

Commit

Permalink
Fix example bugs and refine code.
Browse files Browse the repository at this point in the history
1. thanks @rofirrim's patch.
2. two exmaples passes in spike(riscv-isa-sim) 958dcd.
  • Loading branch information
zakk0610 committed Jun 23, 2020
1 parent 006b57d commit fa898df
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 26 deletions.
8 changes: 4 additions & 4 deletions rvv_saxpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ float output[N] = {
0.2484350696132857};

void saxpy_golden(size_t n, const float a, const float *x, float *y) {
for (size_t i; i < n; ++i) {
for (size_t i = 0; i < n; ++i) {
y[i] = a * x[i] + y[i];
}
}
Expand All @@ -55,11 +55,11 @@ void saxpy_vec(size_t n, const float a, const float *x, float *y) {
vfloat32m8_t vx, vy;

for (; (l = vsetvl_e32m8(n)) > 0; n -= l) {
vx = vle_v_f32m8(x);
vx = vle32_v_f32m8(x);
x += l;
vy = vle_v_f32m8(y);
vy = vle32_v_f32m8(y);
vy = vfmacc_vf_f32m8(vy, a, vx);
vse_v_f32m8 (y, vy);
vse32_v_f32m8 (y, vy);
y += l;
}
}
Expand Down
45 changes: 23 additions & 22 deletions rvv_sgemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,35 +53,37 @@ void sgemm_golden() {
for (size_t i = 0; i < MLEN; ++i)
for (size_t j = 0; j < NLEN; ++j)
for (size_t k = 0; k < KLEN; ++k)
c_array[i * NLEN + j] += a_array[i * KLEN + k] * b_array[j + k * NLEN];
golden_array[i * NLEN + j] += a_array[i * KLEN + k] * b_array[j + k * NLEN];
}


// reference https://github.com/riscv/riscv-v-spec/blob/master/example/sgemm.S
// c += a*b (alpha=1, no transpose on input matrices)
// matrices stored in C row-major order
void sgemm_vec(size_t size_m, size_t size_n, size_t size_k,
const float *a, // m * k matrix
size_t lda,
const float *b, // k * n matrix
size_t ldb,
float *c, // m * n matrix
size_t ldc) {
int i, j, k;
size_t vl;
vfloat32m1_t vec_c;
for (int i = 0; i < size_m; ++i) {
j = size_n;
const float *bnp = b;
float *cnp = c;
for (; vl = vsetvl_e32m1(j); j -= vl) {
const float *akp = a;
const float *bkp = bnp;
vec_c = *(vfloat32m1_t *)cnp;
for (k = 0; k < size_k; ++k) {
vec_c = vfmacc_vf_f32m1(vec_c, *akp, *(vfloat32m1_t *)bkp);
bkp += ldb;
akp++;
for (int m = 0; m < size_m; ++m) {
const float *b_n_ptr = b;
float *c_n_ptr = c;
for (int c_n_count = size_n; (vl = vsetvl_e32m1(c_n_count )); c_n_count -= vl) {
const float *a_k_ptr = a;
const float *b_k_ptr = b_n_ptr;
vfloat32m1_t acc = vle32_v_f32m1(c_n_ptr);
for (size_t k = 0; k < size_k; ++k) {
vfloat32m1_t b_n_data = vle32_v_f32m1(b_k_ptr);
acc = vfmacc_vf_f32m1(acc, *a_k_ptr, b_n_data);
b_k_ptr += ldb;
a_k_ptr++;
}
*(vfloat32m1_t *)cnp = vec_c;
cnp += vl;
bnp += vl;
vse32_v_f32m1(c_n_ptr, acc);
c_n_ptr += vl;
b_n_ptr += vl;
}
a += lda;
c += ldc;
Expand All @@ -98,18 +100,17 @@ int fp_eq(float reference, float actual, float relErr)
int main() {
// golden
memcpy(golden_array, b_array, OUTPUT_LEN * sizeof(float));
sgemm_golden(MLEN, NLEN, KLEN, a_array, KLEN, b_array, NLEN, golden_array, NLEN);
sgemm_golden();
// vector
memcpy(c_array, b_array, OUTPUT_LEN * sizeof(float));
sgemm_vec(MLEN, NLEN, KLEN, a_array, KLEN, b_array, NLEN, c_array, NLEN);

int pass = 1;
for (int i = 0; i < OUTPUT_LEN; i++) {
if (!fp_eq(golden_array[i], c_array[i], 1e-6)) {
printf("failed, %f=!%f\n", golden_array[i], c_array[i]);
if (!fp_eq(golden_array[i], c_array[i], 1e-5)) {
printf("index %d failed, %f=!%f\n", i, golden_array[i], c_array[i]);
pass = 0;
}
printf("%f,%f\n",golden_array[i], c_array[i]);
}
if (pass)
printf("passed\n");
Expand Down

0 comments on commit fa898df

Please sign in to comment.