|
1 | 1 | /* |
2 | | - * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. |
| 2 | + * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. |
3 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | 4 | * |
5 | 5 | * This code is free software; you can redistribute it and/or modify it |
@@ -158,110 +158,131 @@ private static void lanes2Bytes(long[] m, byte[] s) { |
158 | 158 | } |
159 | 159 | } |
160 | 160 |
|
161 | | - /** |
162 | | - * Step mapping Theta as defined in section 3.2.1 . |
163 | | - */ |
164 | | - private static long[] smTheta(long[] a) { |
165 | | - long c0 = a[0]^a[5]^a[10]^a[15]^a[20]; |
166 | | - long c1 = a[1]^a[6]^a[11]^a[16]^a[21]; |
167 | | - long c2 = a[2]^a[7]^a[12]^a[17]^a[22]; |
168 | | - long c3 = a[3]^a[8]^a[13]^a[18]^a[23]; |
169 | | - long c4 = a[4]^a[9]^a[14]^a[19]^a[24]; |
170 | | - long d0 = c4 ^ Long.rotateLeft(c1, 1); |
171 | | - long d1 = c0 ^ Long.rotateLeft(c2, 1); |
172 | | - long d2 = c1 ^ Long.rotateLeft(c3, 1); |
173 | | - long d3 = c2 ^ Long.rotateLeft(c4, 1); |
174 | | - long d4 = c3 ^ Long.rotateLeft(c0, 1); |
175 | | - for (int y = 0; y < a.length; y += DM) { |
176 | | - a[y] ^= d0; |
177 | | - a[y+1] ^= d1; |
178 | | - a[y+2] ^= d2; |
179 | | - a[y+3] ^= d3; |
180 | | - a[y+4] ^= d4; |
181 | | - } |
182 | | - return a; |
183 | | - } |
184 | | - |
185 | | - /** |
186 | | - * Merged Step mapping Rho (section 3.2.2) and Pi (section 3.2.3). |
187 | | - * for performance. Optimization is achieved by precalculating |
188 | | - * shift constants for the following loop |
189 | | - * int xNext, yNext; |
190 | | - * for (int t = 0, x = 1, y = 0; t <= 23; t++, x = xNext, y = yNext) { |
191 | | - * int numberOfShift = ((t + 1)*(t + 2)/2) % 64; |
192 | | - * a[y][x] = Long.rotateLeft(a[y][x], numberOfShift); |
193 | | - * xNext = y; |
194 | | - * yNext = (2 * x + 3 * y) % DM; |
195 | | - * } |
196 | | - * and with inplace permutation. |
197 | | - */ |
198 | | - private static long[] smPiRho(long[] a) { |
199 | | - long tmp = Long.rotateLeft(a[10], 3); |
200 | | - a[10] = Long.rotateLeft(a[1], 1); |
201 | | - a[1] = Long.rotateLeft(a[6], 44); |
202 | | - a[6] = Long.rotateLeft(a[9], 20); |
203 | | - a[9] = Long.rotateLeft(a[22], 61); |
204 | | - a[22] = Long.rotateLeft(a[14], 39); |
205 | | - a[14] = Long.rotateLeft(a[20], 18); |
206 | | - a[20] = Long.rotateLeft(a[2], 62); |
207 | | - a[2] = Long.rotateLeft(a[12], 43); |
208 | | - a[12] = Long.rotateLeft(a[13], 25); |
209 | | - a[13] = Long.rotateLeft(a[19], 8); |
210 | | - a[19] = Long.rotateLeft(a[23], 56); |
211 | | - a[23] = Long.rotateLeft(a[15], 41); |
212 | | - a[15] = Long.rotateLeft(a[4], 27); |
213 | | - a[4] = Long.rotateLeft(a[24], 14); |
214 | | - a[24] = Long.rotateLeft(a[21], 2); |
215 | | - a[21] = Long.rotateLeft(a[8], 55); |
216 | | - a[8] = Long.rotateLeft(a[16], 45); |
217 | | - a[16] = Long.rotateLeft(a[5], 36); |
218 | | - a[5] = Long.rotateLeft(a[3], 28); |
219 | | - a[3] = Long.rotateLeft(a[18], 21); |
220 | | - a[18] = Long.rotateLeft(a[17], 15); |
221 | | - a[17] = Long.rotateLeft(a[11], 10); |
222 | | - a[11] = Long.rotateLeft(a[7], 6); |
223 | | - a[7] = tmp; |
224 | | - return a; |
225 | | - } |
226 | | - |
227 | | - /** |
228 | | - * Step mapping Chi as defined in section 3.2.4. |
229 | | - */ |
230 | | - private static long[] smChi(long[] a) { |
231 | | - for (int y = 0; y < a.length; y+=DM) { |
232 | | - long ay0 = a[y]; |
233 | | - long ay1 = a[y+1]; |
234 | | - long ay2 = a[y+2]; |
235 | | - long ay3 = a[y+3]; |
236 | | - long ay4 = a[y+4]; |
237 | | - a[y] = ay0 ^ ((~ay1) & ay2); |
238 | | - a[y+1] = ay1 ^ ((~ay2) & ay3); |
239 | | - a[y+2] = ay2 ^ ((~ay3) & ay4); |
240 | | - a[y+3] = ay3 ^ ((~ay4) & ay0); |
241 | | - a[y+4] = ay4 ^ ((~ay0) & ay1); |
242 | | - } |
243 | | - return a; |
244 | | - } |
245 | | - |
246 | | - /** |
247 | | - * Step mapping Iota as defined in section 3.2.5. |
248 | | - */ |
249 | | - private static long[] smIota(long[] a, int rndIndex) { |
250 | | - a[0] ^= RC_CONSTANTS[rndIndex]; |
251 | | - return a; |
252 | | - } |
253 | | - |
254 | 161 | /** |
255 | 162 | * The function Keccak as defined in section 5.2 with |
256 | 163 | * rate r = 1600 and capacity c = (digest length x 2). |
257 | 164 | */ |
258 | 165 | private void keccak() { |
259 | 166 | // convert the 200-byte state into 25 lanes |
260 | 167 | bytes2Lanes(state, lanes); |
| 168 | + |
| 169 | + long a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12; |
| 170 | + long a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24; |
| 171 | + // move data into local variables |
| 172 | + a0 = lanes[0]; a1 = lanes[1]; a2 = lanes[2]; a3 = lanes[3]; a4 = lanes[4]; |
| 173 | + a5 = lanes[5]; a6 = lanes[6]; a7 = lanes[7]; a8 = lanes[8]; a9 = lanes[9]; |
| 174 | + a10 = lanes[10]; a11 = lanes[11]; a12 = lanes[12]; a13 = lanes[13]; a14 = lanes[14]; |
| 175 | + a15 = lanes[15]; a16 = lanes[16]; a17 = lanes[17]; a18 = lanes[18]; a19 = lanes[19]; |
| 176 | + a20 = lanes[20]; a21 = lanes[21]; a22 = lanes[22]; a23 = lanes[23]; a24 = lanes[24]; |
| 177 | + |
261 | 178 | // process the lanes through step mappings |
262 | 179 | for (int ir = 0; ir < NR; ir++) { |
263 | | - smIota(smChi(smPiRho(smTheta(lanes))), ir); |
| 180 | + // Step mapping Theta as defined in section 3.2.1. |
| 181 | + long c0 = a0^a5^a10^a15^a20; |
| 182 | + long c1 = a1^a6^a11^a16^a21; |
| 183 | + long c2 = a2^a7^a12^a17^a22; |
| 184 | + long c3 = a3^a8^a13^a18^a23; |
| 185 | + long c4 = a4^a9^a14^a19^a24; |
| 186 | + long d0 = c4 ^ Long.rotateLeft(c1, 1); |
| 187 | + long d1 = c0 ^ Long.rotateLeft(c2, 1); |
| 188 | + long d2 = c1 ^ Long.rotateLeft(c3, 1); |
| 189 | + long d3 = c2 ^ Long.rotateLeft(c4, 1); |
| 190 | + long d4 = c3 ^ Long.rotateLeft(c0, 1); |
| 191 | + a0 ^= d0; a1 ^= d1; a2 ^= d2; a3 ^= d3; a4 ^= d4; |
| 192 | + a5 ^= d0; a6 ^= d1; a7 ^= d2; a8 ^= d3; a9 ^= d4; |
| 193 | + a10 ^= d0; a11 ^= d1; a12 ^= d2; a13 ^= d3; a14 ^= d4; |
| 194 | + a15 ^= d0; a16 ^= d1; a17 ^= d2; a18 ^= d3; a19 ^= d4; |
| 195 | + a20 ^= d0; a21 ^= d1; a22 ^= d2; a23 ^= d3; a24 ^= d4; |
| 196 | + |
| 197 | + /** |
| 198 | + * Merged Step mapping Rho (section 3.2.2) and Pi (section 3.2.3). |
| 199 | + * for performance. Optimization is achieved by precalculating |
| 200 | + * shift constants for the following loop |
| 201 | + * int xNext, yNext; |
| 202 | + * for (int t = 0, x = 1, y = 0; t <= 23; t++, x = xNext, y = yNext) { |
| 203 | + * int numberOfShift = ((t + 1)*(t + 2)/2) % 64; |
| 204 | + * a[y][x] = Long.rotateLeft(a[y][x], numberOfShift); |
| 205 | + * xNext = y; |
| 206 | + * yNext = (2 * x + 3 * y) % DM; |
| 207 | + * } |
| 208 | + * and with inplace permutation. |
| 209 | + */ |
| 210 | + long ay = Long.rotateLeft(a10, 3); |
| 211 | + a10 = Long.rotateLeft(a1, 1); |
| 212 | + a1 = Long.rotateLeft(a6, 44); |
| 213 | + a6 = Long.rotateLeft(a9, 20); |
| 214 | + a9 = Long.rotateLeft(a22, 61); |
| 215 | + a22 = Long.rotateLeft(a14, 39); |
| 216 | + a14 = Long.rotateLeft(a20, 18); |
| 217 | + a20 = Long.rotateLeft(a2, 62); |
| 218 | + a2 = Long.rotateLeft(a12, 43); |
| 219 | + a12 = Long.rotateLeft(a13, 25); |
| 220 | + a13 = Long.rotateLeft(a19, 8); |
| 221 | + a19 = Long.rotateLeft(a23, 56); |
| 222 | + a23 = Long.rotateLeft(a15, 41); |
| 223 | + a15 = Long.rotateLeft(a4, 27); |
| 224 | + a4 = Long.rotateLeft(a24, 14); |
| 225 | + a24 = Long.rotateLeft(a21, 2); |
| 226 | + a21 = Long.rotateLeft(a8, 55); |
| 227 | + a8 = Long.rotateLeft(a16, 45); |
| 228 | + a16 = Long.rotateLeft(a5, 36); |
| 229 | + a5 = Long.rotateLeft(a3, 28); |
| 230 | + a3 = Long.rotateLeft(a18, 21); |
| 231 | + a18 = Long.rotateLeft(a17, 15); |
| 232 | + a17 = Long.rotateLeft(a11, 10); |
| 233 | + a11 = Long.rotateLeft(a7, 6); |
| 234 | + a7 = ay; |
| 235 | + |
| 236 | + // Step mapping Chi as defined in section 3.2.4. |
| 237 | + long tmp0 = a0; |
| 238 | + long tmp1 = a1; |
| 239 | + long tmp2 = a2; |
| 240 | + long tmp3 = a3; |
| 241 | + long tmp4 = a4; |
| 242 | + a0 = tmp0 ^ ((~tmp1) & tmp2); |
| 243 | + a1 = tmp1 ^ ((~tmp2) & tmp3); |
| 244 | + a2 = tmp2 ^ ((~tmp3) & tmp4); |
| 245 | + a3 = tmp3 ^ ((~tmp4) & tmp0); |
| 246 | + a4 = tmp4 ^ ((~tmp0) & tmp1); |
| 247 | + |
| 248 | + tmp0 = a5; tmp1 = a6; tmp2 = a7; tmp3 = a8; tmp4 = a9; |
| 249 | + a5 = tmp0 ^ ((~tmp1) & tmp2); |
| 250 | + a6 = tmp1 ^ ((~tmp2) & tmp3); |
| 251 | + a7 = tmp2 ^ ((~tmp3) & tmp4); |
| 252 | + a8 = tmp3 ^ ((~tmp4) & tmp0); |
| 253 | + a9 = tmp4 ^ ((~tmp0) & tmp1); |
| 254 | + |
| 255 | + tmp0 = a10; tmp1 = a11; tmp2 = a12; tmp3 = a13; tmp4 = a14; |
| 256 | + a10 = tmp0 ^ ((~tmp1) & tmp2); |
| 257 | + a11 = tmp1 ^ ((~tmp2) & tmp3); |
| 258 | + a12 = tmp2 ^ ((~tmp3) & tmp4); |
| 259 | + a13 = tmp3 ^ ((~tmp4) & tmp0); |
| 260 | + a14 = tmp4 ^ ((~tmp0) & tmp1); |
| 261 | + |
| 262 | + tmp0 = a15; tmp1 = a16; tmp2 = a17; tmp3 = a18; tmp4 = a19; |
| 263 | + a15 = tmp0 ^ ((~tmp1) & tmp2); |
| 264 | + a16 = tmp1 ^ ((~tmp2) & tmp3); |
| 265 | + a17 = tmp2 ^ ((~tmp3) & tmp4); |
| 266 | + a18 = tmp3 ^ ((~tmp4) & tmp0); |
| 267 | + a19 = tmp4 ^ ((~tmp0) & tmp1); |
| 268 | + |
| 269 | + tmp0 = a20; tmp1 = a21; tmp2 = a22; tmp3 = a23; tmp4 = a24; |
| 270 | + a20 = tmp0 ^ ((~tmp1) & tmp2); |
| 271 | + a21 = tmp1 ^ ((~tmp2) & tmp3); |
| 272 | + a22 = tmp2 ^ ((~tmp3) & tmp4); |
| 273 | + a23 = tmp3 ^ ((~tmp4) & tmp0); |
| 274 | + a24 = tmp4 ^ ((~tmp0) & tmp1); |
| 275 | + |
| 276 | + // Step mapping Iota as defined in section 3.2.5. |
| 277 | + a0 ^= RC_CONSTANTS[ir]; |
264 | 278 | } |
| 279 | + |
| 280 | + lanes[0] = a0; lanes[1] = a1; lanes[2] = a2; lanes[3] = a3; lanes[4] = a4; |
| 281 | + lanes[5] = a5; lanes[6] = a6; lanes[7] = a7; lanes[8] = a8; lanes[9] = a9; |
| 282 | + lanes[10] = a10; lanes[11] = a11; lanes[12] = a12; lanes[13] = a13; lanes[14] = a14; |
| 283 | + lanes[15] = a15; lanes[16] = a16; lanes[17] = a17; lanes[18] = a18; lanes[19] = a19; |
| 284 | + lanes[20] = a20; lanes[21] = a21; lanes[22] = a22; lanes[23] = a23; lanes[24] = a24; |
| 285 | + |
265 | 286 | // convert the resulting 25 lanes back into 200-byte state |
266 | 287 | lanes2Bytes(lanes, state); |
267 | 288 | } |
|
0 commit comments