Skip to content

Commit

Permalink
it should work now
Browse files Browse the repository at this point in the history
  • Loading branch information
ducnguyen-sb committed Sep 12, 2023
1 parent 4747317 commit bd59550
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 34 deletions.
34 changes: 17 additions & 17 deletions src/sig/falcon/pqclean_falcon-1024_aarch64/poly_int.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ int PQCLEAN_FALCON1024_AARCH64_poly_int16_to_int8(int8_t G[FALCON_N], const int1
// Total SIMD registers: 32
int16x8x4_t a, f; // 8
int16x8x4_t d0, d1; // 8
uint16x8x4_t c0, c1; // 8
uint16x8x4_t c0, c1, x0, x1; // 16
uint16x8x2_t e; // 2
int8x16x4_t g; // 4
int16x8_t neon_127, neon__127, neon_q_2, neon__q_2; // 4
Expand Down Expand Up @@ -361,30 +361,30 @@ int PQCLEAN_FALCON1024_AARCH64_poly_int16_to_int8(int8_t G[FALCON_N], const int1
c1.val[3] = vcgtq_s16(a.val[3], neon_127);

// -127 > f ? 1 : 0
d0.val[0] = vcgtq_s16(neon__127, f.val[0]);
d0.val[1] = vcgtq_s16(neon__127, f.val[1]);
d0.val[2] = vcgtq_s16(neon__127, f.val[2]);
d0.val[3] = vcgtq_s16(neon__127, f.val[3]);
x0.val[0] = vcgtq_s16(neon__127, f.val[0]);
x0.val[1] = vcgtq_s16(neon__127, f.val[1]);
x0.val[2] = vcgtq_s16(neon__127, f.val[2]);
x0.val[3] = vcgtq_s16(neon__127, f.val[3]);
// f > 127 ? 1 : 0
d1.val[0] = vcgtq_s16(f.val[0], neon_127);
d1.val[1] = vcgtq_s16(f.val[1], neon_127);
d1.val[2] = vcgtq_s16(f.val[2], neon_127);
d1.val[3] = vcgtq_s16(f.val[3], neon_127);
x1.val[0] = vcgtq_s16(f.val[0], neon_127);
x1.val[1] = vcgtq_s16(f.val[1], neon_127);
x1.val[2] = vcgtq_s16(f.val[2], neon_127);
x1.val[3] = vcgtq_s16(f.val[3], neon_127);

c0.val[0] = vorrq_u16(c0.val[0], c1.val[0]);
c0.val[1] = vorrq_u16(c0.val[1], c1.val[1]);
c0.val[2] = vorrq_u16(c0.val[2], c1.val[2]);
c0.val[3] = vorrq_u16(c0.val[3], c1.val[3]);

d0.val[0] = vorrq_u16(d0.val[0], d1.val[0]);
d0.val[1] = vorrq_u16(d0.val[1], d1.val[1]);
d0.val[2] = vorrq_u16(d0.val[2], d1.val[2]);
d0.val[3] = vorrq_u16(d0.val[3], d1.val[3]);
x0.val[0] = vorrq_u16(x0.val[0], x1.val[0]);
x0.val[1] = vorrq_u16(x0.val[1], x1.val[1]);
x0.val[2] = vorrq_u16(x0.val[2], x1.val[2]);
x0.val[3] = vorrq_u16(x0.val[3], x1.val[3]);

c0.val[0] = vorrq_u16(c0.val[0], d0.val[0]);
c0.val[2] = vorrq_u16(c0.val[2], d0.val[2]);
c0.val[1] = vorrq_u16(c0.val[1], d0.val[1]);
c0.val[3] = vorrq_u16(c0.val[3], d0.val[3]);
c0.val[0] = vorrq_u16(c0.val[0], x0.val[0]);
c0.val[1] = vorrq_u16(c0.val[1], x0.val[1]);
c0.val[2] = vorrq_u16(c0.val[2], x0.val[2]);
c0.val[3] = vorrq_u16(c0.val[3], x0.val[3]);

c0.val[0] = vorrq_u16(c0.val[0], c0.val[2]);
c0.val[1] = vorrq_u16(c0.val[1], c0.val[3]);
Expand Down
34 changes: 17 additions & 17 deletions src/sig/falcon/pqclean_falcon-512_aarch64/poly_int.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ int PQCLEAN_FALCON512_AARCH64_poly_int16_to_int8(int8_t G[FALCON_N], const int16
// Total SIMD registers: 32
int16x8x4_t a, f; // 8
int16x8x4_t d0, d1; // 8
uint16x8x4_t c0, c1; // 8
uint16x8x4_t c0, c1, x0, x1; // 16
uint16x8x2_t e; // 2
int8x16x4_t g; // 4
int16x8_t neon_127, neon__127, neon_q_2, neon__q_2; // 4
Expand Down Expand Up @@ -361,30 +361,30 @@ int PQCLEAN_FALCON512_AARCH64_poly_int16_to_int8(int8_t G[FALCON_N], const int16
c1.val[3] = vcgtq_s16(a.val[3], neon_127);

// -127 > f ? 1 : 0
d0.val[0] = vcgtq_s16(neon__127, f.val[0]);
d0.val[1] = vcgtq_s16(neon__127, f.val[1]);
d0.val[2] = vcgtq_s16(neon__127, f.val[2]);
d0.val[3] = vcgtq_s16(neon__127, f.val[3]);
x0.val[0] = vcgtq_s16(neon__127, f.val[0]);
x0.val[1] = vcgtq_s16(neon__127, f.val[1]);
x0.val[2] = vcgtq_s16(neon__127, f.val[2]);
x0.val[3] = vcgtq_s16(neon__127, f.val[3]);
// f > 127 ? 1 : 0
d1.val[0] = vcgtq_s16(f.val[0], neon_127);
d1.val[1] = vcgtq_s16(f.val[1], neon_127);
d1.val[2] = vcgtq_s16(f.val[2], neon_127);
d1.val[3] = vcgtq_s16(f.val[3], neon_127);
x1.val[0] = vcgtq_s16(f.val[0], neon_127);
x1.val[1] = vcgtq_s16(f.val[1], neon_127);
x1.val[2] = vcgtq_s16(f.val[2], neon_127);
x1.val[3] = vcgtq_s16(f.val[3], neon_127);

c0.val[0] = vorrq_u16(c0.val[0], c1.val[0]);
c0.val[1] = vorrq_u16(c0.val[1], c1.val[1]);
c0.val[2] = vorrq_u16(c0.val[2], c1.val[2]);
c0.val[3] = vorrq_u16(c0.val[3], c1.val[3]);

d0.val[0] = vorrq_u16(d0.val[0], d1.val[0]);
d0.val[1] = vorrq_u16(d0.val[1], d1.val[1]);
d0.val[2] = vorrq_u16(d0.val[2], d1.val[2]);
d0.val[3] = vorrq_u16(d0.val[3], d1.val[3]);
x0.val[0] = vorrq_u16(x0.val[0], x1.val[0]);
x0.val[1] = vorrq_u16(x0.val[1], x1.val[1]);
x0.val[2] = vorrq_u16(x0.val[2], x1.val[2]);
x0.val[3] = vorrq_u16(x0.val[3], x1.val[3]);

c0.val[0] = vorrq_u16(c0.val[0], d0.val[0]);
c0.val[2] = vorrq_u16(c0.val[2], d0.val[2]);
c0.val[1] = vorrq_u16(c0.val[1], d0.val[1]);
c0.val[3] = vorrq_u16(c0.val[3], d0.val[3]);
c0.val[0] = vorrq_u16(c0.val[0], x0.val[0]);
c0.val[1] = vorrq_u16(c0.val[1], x0.val[1]);
c0.val[2] = vorrq_u16(c0.val[2], x0.val[2]);
c0.val[3] = vorrq_u16(c0.val[3], x0.val[3]);

c0.val[0] = vorrq_u16(c0.val[0], c0.val[2]);
c0.val[1] = vorrq_u16(c0.val[1], c0.val[3]);
Expand Down

0 comments on commit bd59550

Please sign in to comment.