1616* Arguments: - unsigned char *r: pointer to output byte array (of length KYBER_POLYCOMPRESSEDBYTES)
1717* - const poly *a: pointer to input polynomial to be serialized
1818*************************************************/
19- void poly_compress (unsigned char * r , poly * a )
19+ void poly_compress (unsigned char * r , const poly * a )
2020{
21+ unsigned int i ,j ;
22+ int16_t u ;
23+ uint32_t d0 ;
2124 uint8_t t [8 ];
22- int i ,j ,k = 0 ;
2325
2426#if (KYBER_POLYCOMPRESSEDBYTES == 128 )
25- for (i = 0 ;i < KYBER_N ;i += 8 )
26- {
27- for (j = 0 ;j < 8 ;j ++ )
28- t [j ] = ((((uint32_t )a -> coeffs [i + j ] << 4 ) + KYBER_Q /2 ) / KYBER_Q ) & 15 ;
29-
30- r [k ] = t [0 ] | (t [1 ] << 4 );
31- r [k + 1 ] = t [2 ] | (t [3 ] << 4 );
32- r [k + 2 ] = t [4 ] | (t [5 ] << 4 );
33- r [k + 3 ] = t [6 ] | (t [7 ] << 4 );
34- k += 4 ;
27+ for (i = 0 ;i < KYBER_N /8 ;i ++ ) {
28+ for (j = 0 ;j < 8 ;j ++ ) {
29+ // map to positive standard representatives
30+ u = a -> coeffs [8 * i + j ];
31+ u += (u >> 15 ) & KYBER_Q ;
32+ /* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */
33+ d0 = u << 4 ;
34+ d0 += 1665 ;
35+ d0 *= 80635 ;
36+ d0 >>= 28 ;
37+ t [j ] = d0 & 0xf ;
38+ }
39+
40+ r [0 ] = t [0 ] | (t [1 ] << 4 );
41+ r [1 ] = t [2 ] | (t [3 ] << 4 );
42+ r [2 ] = t [4 ] | (t [5 ] << 4 );
43+ r [3 ] = t [6 ] | (t [7 ] << 4 );
44+ r += 4 ;
3545 }
3646#elif (KYBER_POLYCOMPRESSEDBYTES == 160 )
37- for (i = 0 ;i < KYBER_N ;i += 8 )
38- {
39- for (j = 0 ;j < 8 ;j ++ )
40- t [j ] = ((((uint32_t )a -> coeffs [i + j ] << 5 ) + KYBER_Q /2 ) / KYBER_Q ) & 31 ;
41-
42- r [k ] = t [0 ] | (t [1 ] << 5 );
43- r [k + 1 ] = (t [1 ] >> 3 ) | (t [2 ] << 2 ) | (t [3 ] << 7 );
44- r [k + 2 ] = (t [3 ] >> 1 ) | (t [4 ] << 4 );
45- r [k + 3 ] = (t [4 ] >> 4 ) | (t [5 ] << 1 ) | (t [6 ] << 6 );
46- r [k + 4 ] = (t [6 ] >> 2 ) | (t [7 ] << 3 );
47- k += 5 ;
47+ for (i = 0 ;i < KYBER_N /8 ;i ++ ) {
48+ for (j = 0 ;j < 8 ;j ++ ) {
49+ // map to positive standard representatives
50+ u = a -> coeffs [8 * i + j ];
51+ u += (u >> 15 ) & KYBER_Q ;
52+ /* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */
53+ d0 = u << 5 ;
54+ d0 += 1664 ;
55+ d0 *= 40318 ;
56+ d0 >>= 27 ;
57+ t [j ] = d0 & 0x1f ;
58+ }
59+
60+ r [0 ] = (t [0 ] >> 0 ) | (t [1 ] << 5 );
61+ r [1 ] = (t [1 ] >> 3 ) | (t [2 ] << 2 ) | (t [3 ] << 7 );
62+ r [2 ] = (t [3 ] >> 1 ) | (t [4 ] << 4 );
63+ r [3 ] = (t [4 ] >> 4 ) | (t [5 ] << 1 ) | (t [6 ] << 6 );
64+ r [4 ] = (t [6 ] >> 2 ) | (t [7 ] << 3 );
65+ r += 5 ;
4866 }
4967#else
50- #error "KYBER_POLYCOMPRESSEDBYTES needs to be in {96, 128, 160}"
68+ #error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}"
5169#endif
5270}
5371
@@ -107,13 +125,24 @@ void poly_decompress(poly *r, const unsigned char *a)
107125**************************************************/
108126void poly_packcompress (unsigned char * r , poly * a , int i ) {
109127 int j , k ;
128+ uint64_t d0 ;
110129
111130#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352 ))
112131 uint16_t t [8 ];
113132
114133 for (j = 0 ;j < KYBER_N /8 ;j ++ ) {
115- for (k = 0 ;k < 8 ;k ++ )
116- t [k ] = ((((uint32_t )a -> coeffs [8 * j + k ] << 11 ) + KYBER_Q /2 ) / KYBER_Q ) & 0x7ff ;
134+ for (k = 0 ;k < 8 ;k ++ ) {
135+ t [k ] = a -> coeffs [8 * j + k ];
136+ t [k ] += ((int16_t )t [k ] >> 15 ) & KYBER_Q ;
137+ /* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */
138+ d0 = t [k ];
139+ d0 <<= 11 ;
140+ d0 += 1664 ;
141+ d0 *= 645084 ;
142+ d0 >>= 31 ;
143+ t [k ] = d0 & 0x7ff ;
144+ }
145+
117146
118147 r [352 * i + 11 * j + 0 ] = t [0 ] & 0xff ;
119148 r [352 * i + 11 * j + 1 ] = (t [0 ] >> 8 ) | ((t [1 ] & 0x1f ) << 3 );
@@ -131,9 +160,17 @@ void poly_packcompress(unsigned char *r, poly *a, int i) {
131160 uint16_t t [4 ];
132161
133162 for (j = 0 ; j < KYBER_N / 4 ; j ++ ) {
134- for (k = 0 ; k < 4 ; k ++ )
135- t [k ] = ((((uint32_t )a -> coeffs [4 * j + k ] << 10 ) + KYBER_Q / 2 ) / KYBER_Q ) & 0x3ff ;
136-
163+ for (k = 0 ;k < 4 ;k ++ ) {
164+ t [k ] = a -> coeffs [4 * j + k ];
165+ t [k ] += ((int16_t )t [k ] >> 15 ) & KYBER_Q ;
166+ /* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */
167+ d0 = t [k ];
168+ d0 <<= 10 ;
169+ d0 += 1665 ;
170+ d0 *= 1290167 ;
171+ d0 >>= 32 ;
172+ t [k ] = d0 & 0x3ff ;
173+ }
137174 r [320 * i + 5 * j + 0 ] = t [0 ] & 0xff ;
138175 r [320 * i + 5 * j + 1 ] = (t [0 ] >> 8 ) | ((t [1 ] & 0x3f ) << 2 );
139176 r [320 * i + 5 * j + 2 ] = ((t [1 ] >> 6 ) | ((t [2 ] & 0x0f ) << 4 )) & 0xff ;
@@ -194,14 +231,24 @@ void poly_unpackdecompress(poly *r, const unsigned char *a, int i) {
194231**************************************************/
195232int cmp_poly_compress (const unsigned char * r , poly * a ) {
196233 unsigned char rc = 0 ;
234+ int16_t u ;
235+ uint32_t d0 ;
197236 uint8_t t [8 ];
198237 int i , j , k = 0 ;
199238
200239#if (KYBER_POLYCOMPRESSEDBYTES == 128 )
201240 for (i = 0 ; i < KYBER_N ; i += 8 ) {
202- for (j = 0 ; j < 8 ; j ++ )
203- t [j ] = ((((uint32_t )a -> coeffs [i + j ] << 4 ) + KYBER_Q / 2 ) / KYBER_Q ) & 15 ;
204-
241+ for (j = 0 ;j < 8 ;j ++ ) {
242+ // map to positive standard representatives
243+ u = a -> coeffs [8 * i + j ];
244+ u += (u >> 15 ) & KYBER_Q ;
245+ /* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */
246+ d0 = u << 4 ;
247+ d0 += 1665 ;
248+ d0 *= 80635 ;
249+ d0 >>= 28 ;
250+ t [j ] = d0 & 0xf ;
251+ }
205252 rc |= r [k ] ^ (t [0 ] | (t [1 ] << 4 ));
206253 rc |= r [k + 1 ] ^ (t [2 ] | (t [3 ] << 4 ));
207254 rc |= r [k + 2 ] ^ (t [4 ] | (t [5 ] << 4 ));
@@ -211,8 +258,18 @@ int cmp_poly_compress(const unsigned char *r, poly *a) {
211258#elif (KYBER_POLYCOMPRESSEDBYTES == 160 )
212259 for (i = 0 ;i < KYBER_N ;i += 8 )
213260 {
214- for (j = 0 ;j < 8 ;j ++ )
215- t [j ] = ((((uint32_t )a -> coeffs [i + j ] << 5 ) + KYBER_Q /2 ) / KYBER_Q ) & 31 ;
261+ for (j = 0 ;j < 8 ;j ++ ) {
262+ // map to positive standard representatives
263+ u = a -> coeffs [8 * i + j ];
264+ u += (u >> 15 ) & KYBER_Q ;
265+ /* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */
266+ d0 = u << 5 ;
267+ d0 += 1664 ;
268+ d0 *= 40318 ;
269+ d0 >>= 27 ;
270+ t [j ] = d0 & 0x1f ;
271+ }
272+
216273
217274 rc |= r [k ] ^ (t [0 ] | (t [1 ] << 5 ));
218275 rc |= r [k + 1 ] ^ ((t [1 ] >> 3 ) | (t [2 ] << 2 ) | (t [3 ] << 7 ));
@@ -222,7 +279,7 @@ int cmp_poly_compress(const unsigned char *r, poly *a) {
222279 k += 5 ;
223280 }
224281#else
225- #error "KYBER_POLYCOMPRESSEDBYTES needs to be in {96, 128, 160}"
282+ #error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}"
226283#endif
227284 return rc ;
228285}
@@ -241,13 +298,23 @@ int cmp_poly_compress(const unsigned char *r, poly *a) {
241298int cmp_poly_packcompress (const unsigned char * r , poly * a , int i ) {
242299 unsigned char rc = 0 ;
243300 int j , k ;
301+ uint64_t d0 ;
244302
245303#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352 ))
246304 uint16_t t [8 ];
247305 for (j = 0 ;j < KYBER_N /8 ;j ++ )
248306 {
249- for (k = 0 ;k < 8 ;k ++ )
250- t [k ] = ((((uint32_t )a -> coeffs [8 * j + k ] << 11 ) + KYBER_Q /2 ) / KYBER_Q ) & 0x7ff ;
307+ for (k = 0 ;k < 8 ;k ++ ) {
308+ t [k ] = a -> coeffs [8 * j + k ];
309+ t [k ] += ((int16_t )t [k ] >> 15 ) & KYBER_Q ;
310+ /* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */
311+ d0 = t [k ];
312+ d0 <<= 11 ;
313+ d0 += 1664 ;
314+ d0 *= 645084 ;
315+ d0 >>= 31 ;
316+ t [k ] = d0 & 0x7ff ;
317+ }
251318
252319 rc |= r [352 * i + 11 * j + 0 ] ^ (t [0 ] & 0xff );
253320 rc |= r [352 * i + 11 * j + 1 ] ^ ((t [0 ] >> 8 ) | ((t [1 ] & 0x1f ) << 3 ));
@@ -264,8 +331,17 @@ int cmp_poly_packcompress(const unsigned char *r, poly *a, int i) {
264331#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320 ))
265332 uint16_t t [4 ];
266333 for (j = 0 ; j < KYBER_N / 4 ; j ++ ) {
267- for (k = 0 ; k < 4 ; k ++ )
268- t [k ] = ((((uint32_t )a -> coeffs [4 * j + k ] << 10 ) + KYBER_Q / 2 ) / KYBER_Q ) & 0x3ff ;
334+ for (k = 0 ;k < 4 ;k ++ ) {
335+ t [k ] = a -> coeffs [4 * j + k ];
336+ t [k ] += ((int16_t )t [k ] >> 15 ) & KYBER_Q ;
337+ /* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */
338+ d0 = t [k ];
339+ d0 <<= 10 ;
340+ d0 += 1665 ;
341+ d0 *= 1290167 ;
342+ d0 >>= 32 ;
343+ t [k ] = d0 & 0x3ff ;
344+ }
269345
270346 rc |= r [320 * i + 5 * j + 0 ] ^ (t [0 ] & 0xff );
271347 rc |= r [320 * i + 5 * j + 1 ] ^ ((t [0 ] >> 8 ) | ((t [1 ] & 0x3f ) << 2 ));
0 commit comments