Skip to content

Commit c74b8f4

Browse files
author
Boris Ulasevich
committed
8275914: SHA3: changing java implementation to help C2 create high-performance code
Reviewed-by: ascarpino, phh
1 parent a18beb4 commit c74b8f4

File tree

1 file changed

+116
-95
lines changed
  • src/java.base/share/classes/sun/security/provider

1 file changed

+116
-95
lines changed

src/java.base/share/classes/sun/security/provider/SHA3.java

Lines changed: 116 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -158,110 +158,131 @@ private static void lanes2Bytes(long[] m, byte[] s) {
158158
}
159159
}
160160

161-
/**
162-
* Step mapping Theta as defined in section 3.2.1 .
163-
*/
164-
private static long[] smTheta(long[] a) {
165-
long c0 = a[0]^a[5]^a[10]^a[15]^a[20];
166-
long c1 = a[1]^a[6]^a[11]^a[16]^a[21];
167-
long c2 = a[2]^a[7]^a[12]^a[17]^a[22];
168-
long c3 = a[3]^a[8]^a[13]^a[18]^a[23];
169-
long c4 = a[4]^a[9]^a[14]^a[19]^a[24];
170-
long d0 = c4 ^ Long.rotateLeft(c1, 1);
171-
long d1 = c0 ^ Long.rotateLeft(c2, 1);
172-
long d2 = c1 ^ Long.rotateLeft(c3, 1);
173-
long d3 = c2 ^ Long.rotateLeft(c4, 1);
174-
long d4 = c3 ^ Long.rotateLeft(c0, 1);
175-
for (int y = 0; y < a.length; y += DM) {
176-
a[y] ^= d0;
177-
a[y+1] ^= d1;
178-
a[y+2] ^= d2;
179-
a[y+3] ^= d3;
180-
a[y+4] ^= d4;
181-
}
182-
return a;
183-
}
184-
185-
/**
186-
* Merged Step mapping Rho (section 3.2.2) and Pi (section 3.2.3).
187-
* for performance. Optimization is achieved by precalculating
188-
* shift constants for the following loop
189-
* int xNext, yNext;
190-
* for (int t = 0, x = 1, y = 0; t <= 23; t++, x = xNext, y = yNext) {
191-
* int numberOfShift = ((t + 1)*(t + 2)/2) % 64;
192-
* a[y][x] = Long.rotateLeft(a[y][x], numberOfShift);
193-
* xNext = y;
194-
* yNext = (2 * x + 3 * y) % DM;
195-
* }
196-
* and with inplace permutation.
197-
*/
198-
private static long[] smPiRho(long[] a) {
199-
long tmp = Long.rotateLeft(a[10], 3);
200-
a[10] = Long.rotateLeft(a[1], 1);
201-
a[1] = Long.rotateLeft(a[6], 44);
202-
a[6] = Long.rotateLeft(a[9], 20);
203-
a[9] = Long.rotateLeft(a[22], 61);
204-
a[22] = Long.rotateLeft(a[14], 39);
205-
a[14] = Long.rotateLeft(a[20], 18);
206-
a[20] = Long.rotateLeft(a[2], 62);
207-
a[2] = Long.rotateLeft(a[12], 43);
208-
a[12] = Long.rotateLeft(a[13], 25);
209-
a[13] = Long.rotateLeft(a[19], 8);
210-
a[19] = Long.rotateLeft(a[23], 56);
211-
a[23] = Long.rotateLeft(a[15], 41);
212-
a[15] = Long.rotateLeft(a[4], 27);
213-
a[4] = Long.rotateLeft(a[24], 14);
214-
a[24] = Long.rotateLeft(a[21], 2);
215-
a[21] = Long.rotateLeft(a[8], 55);
216-
a[8] = Long.rotateLeft(a[16], 45);
217-
a[16] = Long.rotateLeft(a[5], 36);
218-
a[5] = Long.rotateLeft(a[3], 28);
219-
a[3] = Long.rotateLeft(a[18], 21);
220-
a[18] = Long.rotateLeft(a[17], 15);
221-
a[17] = Long.rotateLeft(a[11], 10);
222-
a[11] = Long.rotateLeft(a[7], 6);
223-
a[7] = tmp;
224-
return a;
225-
}
226-
227-
/**
228-
* Step mapping Chi as defined in section 3.2.4.
229-
*/
230-
private static long[] smChi(long[] a) {
231-
for (int y = 0; y < a.length; y+=DM) {
232-
long ay0 = a[y];
233-
long ay1 = a[y+1];
234-
long ay2 = a[y+2];
235-
long ay3 = a[y+3];
236-
long ay4 = a[y+4];
237-
a[y] = ay0 ^ ((~ay1) & ay2);
238-
a[y+1] = ay1 ^ ((~ay2) & ay3);
239-
a[y+2] = ay2 ^ ((~ay3) & ay4);
240-
a[y+3] = ay3 ^ ((~ay4) & ay0);
241-
a[y+4] = ay4 ^ ((~ay0) & ay1);
242-
}
243-
return a;
244-
}
245-
246-
/**
247-
* Step mapping Iota as defined in section 3.2.5.
248-
*/
249-
private static long[] smIota(long[] a, int rndIndex) {
250-
a[0] ^= RC_CONSTANTS[rndIndex];
251-
return a;
252-
}
253-
254161
/**
255162
* The function Keccak as defined in section 5.2 with
256163
* rate r = 1600 and capacity c = (digest length x 2).
257164
*/
258165
private void keccak() {
259166
// convert the 200-byte state into 25 lanes
260167
bytes2Lanes(state, lanes);
168+
169+
long a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12;
170+
long a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24;
171+
// move data into local variables
172+
a0 = lanes[0]; a1 = lanes[1]; a2 = lanes[2]; a3 = lanes[3]; a4 = lanes[4];
173+
a5 = lanes[5]; a6 = lanes[6]; a7 = lanes[7]; a8 = lanes[8]; a9 = lanes[9];
174+
a10 = lanes[10]; a11 = lanes[11]; a12 = lanes[12]; a13 = lanes[13]; a14 = lanes[14];
175+
a15 = lanes[15]; a16 = lanes[16]; a17 = lanes[17]; a18 = lanes[18]; a19 = lanes[19];
176+
a20 = lanes[20]; a21 = lanes[21]; a22 = lanes[22]; a23 = lanes[23]; a24 = lanes[24];
177+
261178
// process the lanes through step mappings
262179
for (int ir = 0; ir < NR; ir++) {
263-
smIota(smChi(smPiRho(smTheta(lanes))), ir);
180+
// Step mapping Theta as defined in section 3.2.1.
181+
long c0 = a0^a5^a10^a15^a20;
182+
long c1 = a1^a6^a11^a16^a21;
183+
long c2 = a2^a7^a12^a17^a22;
184+
long c3 = a3^a8^a13^a18^a23;
185+
long c4 = a4^a9^a14^a19^a24;
186+
long d0 = c4 ^ Long.rotateLeft(c1, 1);
187+
long d1 = c0 ^ Long.rotateLeft(c2, 1);
188+
long d2 = c1 ^ Long.rotateLeft(c3, 1);
189+
long d3 = c2 ^ Long.rotateLeft(c4, 1);
190+
long d4 = c3 ^ Long.rotateLeft(c0, 1);
191+
a0 ^= d0; a1 ^= d1; a2 ^= d2; a3 ^= d3; a4 ^= d4;
192+
a5 ^= d0; a6 ^= d1; a7 ^= d2; a8 ^= d3; a9 ^= d4;
193+
a10 ^= d0; a11 ^= d1; a12 ^= d2; a13 ^= d3; a14 ^= d4;
194+
a15 ^= d0; a16 ^= d1; a17 ^= d2; a18 ^= d3; a19 ^= d4;
195+
a20 ^= d0; a21 ^= d1; a22 ^= d2; a23 ^= d3; a24 ^= d4;
196+
197+
/**
198+
* Merged Step mapping Rho (section 3.2.2) and Pi (section 3.2.3).
199+
* for performance. Optimization is achieved by precalculating
200+
* shift constants for the following loop
201+
* int xNext, yNext;
202+
* for (int t = 0, x = 1, y = 0; t <= 23; t++, x = xNext, y = yNext) {
203+
* int numberOfShift = ((t + 1)*(t + 2)/2) % 64;
204+
* a[y][x] = Long.rotateLeft(a[y][x], numberOfShift);
205+
* xNext = y;
206+
* yNext = (2 * x + 3 * y) % DM;
207+
* }
208+
* and with inplace permutation.
209+
*/
210+
long ay = Long.rotateLeft(a10, 3);
211+
a10 = Long.rotateLeft(a1, 1);
212+
a1 = Long.rotateLeft(a6, 44);
213+
a6 = Long.rotateLeft(a9, 20);
214+
a9 = Long.rotateLeft(a22, 61);
215+
a22 = Long.rotateLeft(a14, 39);
216+
a14 = Long.rotateLeft(a20, 18);
217+
a20 = Long.rotateLeft(a2, 62);
218+
a2 = Long.rotateLeft(a12, 43);
219+
a12 = Long.rotateLeft(a13, 25);
220+
a13 = Long.rotateLeft(a19, 8);
221+
a19 = Long.rotateLeft(a23, 56);
222+
a23 = Long.rotateLeft(a15, 41);
223+
a15 = Long.rotateLeft(a4, 27);
224+
a4 = Long.rotateLeft(a24, 14);
225+
a24 = Long.rotateLeft(a21, 2);
226+
a21 = Long.rotateLeft(a8, 55);
227+
a8 = Long.rotateLeft(a16, 45);
228+
a16 = Long.rotateLeft(a5, 36);
229+
a5 = Long.rotateLeft(a3, 28);
230+
a3 = Long.rotateLeft(a18, 21);
231+
a18 = Long.rotateLeft(a17, 15);
232+
a17 = Long.rotateLeft(a11, 10);
233+
a11 = Long.rotateLeft(a7, 6);
234+
a7 = ay;
235+
236+
// Step mapping Chi as defined in section 3.2.4.
237+
long tmp0 = a0;
238+
long tmp1 = a1;
239+
long tmp2 = a2;
240+
long tmp3 = a3;
241+
long tmp4 = a4;
242+
a0 = tmp0 ^ ((~tmp1) & tmp2);
243+
a1 = tmp1 ^ ((~tmp2) & tmp3);
244+
a2 = tmp2 ^ ((~tmp3) & tmp4);
245+
a3 = tmp3 ^ ((~tmp4) & tmp0);
246+
a4 = tmp4 ^ ((~tmp0) & tmp1);
247+
248+
tmp0 = a5; tmp1 = a6; tmp2 = a7; tmp3 = a8; tmp4 = a9;
249+
a5 = tmp0 ^ ((~tmp1) & tmp2);
250+
a6 = tmp1 ^ ((~tmp2) & tmp3);
251+
a7 = tmp2 ^ ((~tmp3) & tmp4);
252+
a8 = tmp3 ^ ((~tmp4) & tmp0);
253+
a9 = tmp4 ^ ((~tmp0) & tmp1);
254+
255+
tmp0 = a10; tmp1 = a11; tmp2 = a12; tmp3 = a13; tmp4 = a14;
256+
a10 = tmp0 ^ ((~tmp1) & tmp2);
257+
a11 = tmp1 ^ ((~tmp2) & tmp3);
258+
a12 = tmp2 ^ ((~tmp3) & tmp4);
259+
a13 = tmp3 ^ ((~tmp4) & tmp0);
260+
a14 = tmp4 ^ ((~tmp0) & tmp1);
261+
262+
tmp0 = a15; tmp1 = a16; tmp2 = a17; tmp3 = a18; tmp4 = a19;
263+
a15 = tmp0 ^ ((~tmp1) & tmp2);
264+
a16 = tmp1 ^ ((~tmp2) & tmp3);
265+
a17 = tmp2 ^ ((~tmp3) & tmp4);
266+
a18 = tmp3 ^ ((~tmp4) & tmp0);
267+
a19 = tmp4 ^ ((~tmp0) & tmp1);
268+
269+
tmp0 = a20; tmp1 = a21; tmp2 = a22; tmp3 = a23; tmp4 = a24;
270+
a20 = tmp0 ^ ((~tmp1) & tmp2);
271+
a21 = tmp1 ^ ((~tmp2) & tmp3);
272+
a22 = tmp2 ^ ((~tmp3) & tmp4);
273+
a23 = tmp3 ^ ((~tmp4) & tmp0);
274+
a24 = tmp4 ^ ((~tmp0) & tmp1);
275+
276+
// Step mapping Iota as defined in section 3.2.5.
277+
a0 ^= RC_CONSTANTS[ir];
264278
}
279+
280+
lanes[0] = a0; lanes[1] = a1; lanes[2] = a2; lanes[3] = a3; lanes[4] = a4;
281+
lanes[5] = a5; lanes[6] = a6; lanes[7] = a7; lanes[8] = a8; lanes[9] = a9;
282+
lanes[10] = a10; lanes[11] = a11; lanes[12] = a12; lanes[13] = a13; lanes[14] = a14;
283+
lanes[15] = a15; lanes[16] = a16; lanes[17] = a17; lanes[18] = a18; lanes[19] = a19;
284+
lanes[20] = a20; lanes[21] = a21; lanes[22] = a22; lanes[23] = a23; lanes[24] = a24;
285+
265286
// convert the resulting 25 lanes back into 200-byte state
266287
lanes2Bytes(lanes, state);
267288
}

0 commit comments

Comments
 (0)