Skip to content

Commit 49cf332

Browse files
Dmitry ChuykoPaul Hohensee
authored andcommitted
8251216: Implement MD5 intrinsics on AArch64
Reviewed-by: phh, luhenry Backport-of: 088b244ec6d9393a1fcd2233fa5b4cf46f9ae0dd
1 parent 9378f26 commit 49cf332

File tree

4 files changed

+198
-6
lines changed

4 files changed

+198
-6
lines changed

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3223,6 +3223,194 @@ class StubGenerator: public StubCodeGenerator {
32233223
return start;
32243224
}
32253225

3226+
// Arguments:
3227+
//
3228+
// Inputs:
3229+
// c_rarg0 - byte[] source+offset
3230+
// c_rarg1 - int[] SHA.state
3231+
// c_rarg2 - int offset
3232+
// c_rarg3 - int limit
3233+
//
3234+
address generate_md5_implCompress(bool multi_block, const char *name) {
3235+
__ align(CodeEntryAlignment);
3236+
StubCodeMark mark(this, "StubRoutines", name);
3237+
address start = __ pc();
3238+
3239+
Register buf = c_rarg0;
3240+
Register state = c_rarg1;
3241+
Register ofs = c_rarg2;
3242+
Register limit = c_rarg3;
3243+
Register a = r4;
3244+
Register b = r5;
3245+
Register c = r6;
3246+
Register d = r7;
3247+
Register rscratch3 = r10;
3248+
Register rscratch4 = r11;
3249+
3250+
Label keys;
3251+
Label md5_loop;
3252+
3253+
__ BIND(md5_loop);
3254+
3255+
// Save hash values for addition after rounds
3256+
__ ldrw(a, Address(state, 0));
3257+
__ ldrw(b, Address(state, 4));
3258+
__ ldrw(c, Address(state, 8));
3259+
__ ldrw(d, Address(state, 12));
3260+
3261+
#define FF(r1, r2, r3, r4, k, s, t) \
3262+
__ eorw(rscratch3, r3, r4); \
3263+
__ movw(rscratch2, t); \
3264+
__ andw(rscratch3, rscratch3, r2); \
3265+
__ addw(rscratch4, r1, rscratch2); \
3266+
__ ldrw(rscratch1, Address(buf, k*4)); \
3267+
__ eorw(rscratch3, rscratch3, r4); \
3268+
__ addw(rscratch3, rscratch3, rscratch1); \
3269+
__ addw(rscratch3, rscratch3, rscratch4); \
3270+
__ rorw(rscratch2, rscratch3, 32 - s); \
3271+
__ addw(r1, rscratch2, r2);
3272+
3273+
#define GG(r1, r2, r3, r4, k, s, t) \
3274+
__ eorw(rscratch2, r2, r3); \
3275+
__ ldrw(rscratch1, Address(buf, k*4)); \
3276+
__ andw(rscratch3, rscratch2, r4); \
3277+
__ movw(rscratch2, t); \
3278+
__ eorw(rscratch3, rscratch3, r3); \
3279+
__ addw(rscratch4, r1, rscratch2); \
3280+
__ addw(rscratch3, rscratch3, rscratch1); \
3281+
__ addw(rscratch3, rscratch3, rscratch4); \
3282+
__ rorw(rscratch2, rscratch3, 32 - s); \
3283+
__ addw(r1, rscratch2, r2);
3284+
3285+
#define HH(r1, r2, r3, r4, k, s, t) \
3286+
__ eorw(rscratch3, r3, r4); \
3287+
__ movw(rscratch2, t); \
3288+
__ addw(rscratch4, r1, rscratch2); \
3289+
__ ldrw(rscratch1, Address(buf, k*4)); \
3290+
__ eorw(rscratch3, rscratch3, r2); \
3291+
__ addw(rscratch3, rscratch3, rscratch1); \
3292+
__ addw(rscratch3, rscratch3, rscratch4); \
3293+
__ rorw(rscratch2, rscratch3, 32 - s); \
3294+
__ addw(r1, rscratch2, r2);
3295+
3296+
#define II(r1, r2, r3, r4, k, s, t) \
3297+
__ movw(rscratch3, t); \
3298+
__ ornw(rscratch2, r2, r4); \
3299+
__ addw(rscratch4, r1, rscratch3); \
3300+
__ ldrw(rscratch1, Address(buf, k*4)); \
3301+
__ eorw(rscratch3, rscratch2, r3); \
3302+
__ addw(rscratch3, rscratch3, rscratch1); \
3303+
__ addw(rscratch3, rscratch3, rscratch4); \
3304+
__ rorw(rscratch2, rscratch3, 32 - s); \
3305+
__ addw(r1, rscratch2, r2);
3306+
3307+
// Round 1
3308+
FF(a, b, c, d, 0, 7, 0xd76aa478)
3309+
FF(d, a, b, c, 1, 12, 0xe8c7b756)
3310+
FF(c, d, a, b, 2, 17, 0x242070db)
3311+
FF(b, c, d, a, 3, 22, 0xc1bdceee)
3312+
FF(a, b, c, d, 4, 7, 0xf57c0faf)
3313+
FF(d, a, b, c, 5, 12, 0x4787c62a)
3314+
FF(c, d, a, b, 6, 17, 0xa8304613)
3315+
FF(b, c, d, a, 7, 22, 0xfd469501)
3316+
FF(a, b, c, d, 8, 7, 0x698098d8)
3317+
FF(d, a, b, c, 9, 12, 0x8b44f7af)
3318+
FF(c, d, a, b, 10, 17, 0xffff5bb1)
3319+
FF(b, c, d, a, 11, 22, 0x895cd7be)
3320+
FF(a, b, c, d, 12, 7, 0x6b901122)
3321+
FF(d, a, b, c, 13, 12, 0xfd987193)
3322+
FF(c, d, a, b, 14, 17, 0xa679438e)
3323+
FF(b, c, d, a, 15, 22, 0x49b40821)
3324+
3325+
// Round 2
3326+
GG(a, b, c, d, 1, 5, 0xf61e2562)
3327+
GG(d, a, b, c, 6, 9, 0xc040b340)
3328+
GG(c, d, a, b, 11, 14, 0x265e5a51)
3329+
GG(b, c, d, a, 0, 20, 0xe9b6c7aa)
3330+
GG(a, b, c, d, 5, 5, 0xd62f105d)
3331+
GG(d, a, b, c, 10, 9, 0x02441453)
3332+
GG(c, d, a, b, 15, 14, 0xd8a1e681)
3333+
GG(b, c, d, a, 4, 20, 0xe7d3fbc8)
3334+
GG(a, b, c, d, 9, 5, 0x21e1cde6)
3335+
GG(d, a, b, c, 14, 9, 0xc33707d6)
3336+
GG(c, d, a, b, 3, 14, 0xf4d50d87)
3337+
GG(b, c, d, a, 8, 20, 0x455a14ed)
3338+
GG(a, b, c, d, 13, 5, 0xa9e3e905)
3339+
GG(d, a, b, c, 2, 9, 0xfcefa3f8)
3340+
GG(c, d, a, b, 7, 14, 0x676f02d9)
3341+
GG(b, c, d, a, 12, 20, 0x8d2a4c8a)
3342+
3343+
// Round 3
3344+
HH(a, b, c, d, 5, 4, 0xfffa3942)
3345+
HH(d, a, b, c, 8, 11, 0x8771f681)
3346+
HH(c, d, a, b, 11, 16, 0x6d9d6122)
3347+
HH(b, c, d, a, 14, 23, 0xfde5380c)
3348+
HH(a, b, c, d, 1, 4, 0xa4beea44)
3349+
HH(d, a, b, c, 4, 11, 0x4bdecfa9)
3350+
HH(c, d, a, b, 7, 16, 0xf6bb4b60)
3351+
HH(b, c, d, a, 10, 23, 0xbebfbc70)
3352+
HH(a, b, c, d, 13, 4, 0x289b7ec6)
3353+
HH(d, a, b, c, 0, 11, 0xeaa127fa)
3354+
HH(c, d, a, b, 3, 16, 0xd4ef3085)
3355+
HH(b, c, d, a, 6, 23, 0x04881d05)
3356+
HH(a, b, c, d, 9, 4, 0xd9d4d039)
3357+
HH(d, a, b, c, 12, 11, 0xe6db99e5)
3358+
HH(c, d, a, b, 15, 16, 0x1fa27cf8)
3359+
HH(b, c, d, a, 2, 23, 0xc4ac5665)
3360+
3361+
// Round 4
3362+
II(a, b, c, d, 0, 6, 0xf4292244)
3363+
II(d, a, b, c, 7, 10, 0x432aff97)
3364+
II(c, d, a, b, 14, 15, 0xab9423a7)
3365+
II(b, c, d, a, 5, 21, 0xfc93a039)
3366+
II(a, b, c, d, 12, 6, 0x655b59c3)
3367+
II(d, a, b, c, 3, 10, 0x8f0ccc92)
3368+
II(c, d, a, b, 10, 15, 0xffeff47d)
3369+
II(b, c, d, a, 1, 21, 0x85845dd1)
3370+
II(a, b, c, d, 8, 6, 0x6fa87e4f)
3371+
II(d, a, b, c, 15, 10, 0xfe2ce6e0)
3372+
II(c, d, a, b, 6, 15, 0xa3014314)
3373+
II(b, c, d, a, 13, 21, 0x4e0811a1)
3374+
II(a, b, c, d, 4, 6, 0xf7537e82)
3375+
II(d, a, b, c, 11, 10, 0xbd3af235)
3376+
II(c, d, a, b, 2, 15, 0x2ad7d2bb)
3377+
II(b, c, d, a, 9, 21, 0xeb86d391)
3378+
3379+
#undef FF
3380+
#undef GG
3381+
#undef HH
3382+
#undef II
3383+
3384+
// write hash values back in the correct order
3385+
__ ldrw(rscratch1, Address(state, 0));
3386+
__ addw(rscratch1, rscratch1, a);
3387+
__ strw(rscratch1, Address(state, 0));
3388+
3389+
__ ldrw(rscratch2, Address(state, 4));
3390+
__ addw(rscratch2, rscratch2, b);
3391+
__ strw(rscratch2, Address(state, 4));
3392+
3393+
__ ldrw(rscratch3, Address(state, 8));
3394+
__ addw(rscratch3, rscratch3, c);
3395+
__ strw(rscratch3, Address(state, 8));
3396+
3397+
__ ldrw(rscratch4, Address(state, 12));
3398+
__ addw(rscratch4, rscratch4, d);
3399+
__ strw(rscratch4, Address(state, 12));
3400+
3401+
if (multi_block) {
3402+
__ add(buf, buf, 64);
3403+
__ add(ofs, ofs, 64);
3404+
__ cmp(ofs, limit);
3405+
__ br(Assembler::LE, md5_loop);
3406+
__ mov(c_rarg0, ofs); // return ofs
3407+
}
3408+
3409+
__ ret(lr);
3410+
3411+
return start;
3412+
}
3413+
32263414
// Arguments:
32273415
//
32283416
// Inputs:
@@ -7476,6 +7664,10 @@ class StubGenerator: public StubCodeGenerator {
74767664
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt();
74777665
}
74787666

7667+
if (UseMD5Intrinsics) {
7668+
StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress");
7669+
StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB");
7670+
}
74797671
if (UseSHA1Intrinsics) {
74807672
StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
74817673
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");

src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ static bool returns_to_call_stub(address return_pc) {
3636

3737
enum platform_dependent_constants {
3838
code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
39-
code_size2 = 32000 // simply increase if too small (assembler will crash if too small)
39+
code_size2 = 45000 // simply increase if too small (assembler will crash if too small)
4040
};
4141

4242
class aarch64 {

src/hotspot/cpu/aarch64/vm_version_aarch64.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -300,9 +300,8 @@ void VM_Version::initialize() {
300300
FLAG_SET_DEFAULT(UseFMA, true);
301301
}
302302

303-
if (UseMD5Intrinsics) {
304-
warning("MD5 intrinsics are not available on this CPU");
305-
FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
303+
if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
304+
UseMD5Intrinsics = true;
306305
}
307306

308307
if (_features & (CPU_SHA1 | CPU_SHA2 | CPU_SHA3 | CPU_SHA512)) {

test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,11 @@ public class IntrinsicPredicates {
6060
};
6161

6262
public static final BooleanSupplier MD5_INSTRUCTION_AVAILABLE
63-
= // x86 variants
63+
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", null, null),
64+
// x86 variants
6465
new OrPredicate(new CPUSpecificPredicate("amd64.*", null, null),
6566
new OrPredicate(new CPUSpecificPredicate("i386.*", null, null),
66-
new CPUSpecificPredicate("x86.*", null, null)));
67+
new CPUSpecificPredicate("x86.*", null, null))));
6768

6869
public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
6970
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),

0 commit comments

Comments
 (0)