Skip to content
This repository was archived by the owner on Feb 2, 2023. It is now read-only.

Commit 2802620

Browse files
author
Paul Hohensee
committed
8251216: Implement MD5 intrinsics on AArch64
Reviewed-by: simonis Backport-of: 088b244ec6d9393a1fcd2233fa5b4cf46f9ae0dd
1 parent 7bf914d commit 2802620

File tree

4 files changed

+198
-6
lines changed

4 files changed

+198
-6
lines changed

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2920,6 +2920,194 @@ class StubGenerator: public StubCodeGenerator {
29202920
return start;
29212921
}
29222922

2923+
// Arguments:
2924+
//
2925+
// Inputs:
2926+
// c_rarg0 - byte[] source+offset
2927+
// c_rarg1 - int[] SHA.state
2928+
// c_rarg2 - int offset
2929+
// c_rarg3 - int limit
2930+
//
2931+
address generate_md5_implCompress(bool multi_block, const char *name) {
2932+
__ align(CodeEntryAlignment);
2933+
StubCodeMark mark(this, "StubRoutines", name);
2934+
address start = __ pc();
2935+
2936+
Register buf = c_rarg0;
2937+
Register state = c_rarg1;
2938+
Register ofs = c_rarg2;
2939+
Register limit = c_rarg3;
2940+
Register a = r4;
2941+
Register b = r5;
2942+
Register c = r6;
2943+
Register d = r7;
2944+
Register rscratch3 = r10;
2945+
Register rscratch4 = r11;
2946+
2947+
Label keys;
2948+
Label md5_loop;
2949+
2950+
__ BIND(md5_loop);
2951+
2952+
// Save hash values for addition after rounds
2953+
__ ldrw(a, Address(state, 0));
2954+
__ ldrw(b, Address(state, 4));
2955+
__ ldrw(c, Address(state, 8));
2956+
__ ldrw(d, Address(state, 12));
2957+
2958+
#define FF(r1, r2, r3, r4, k, s, t) \
2959+
__ eorw(rscratch3, r3, r4); \
2960+
__ movw(rscratch2, t); \
2961+
__ andw(rscratch3, rscratch3, r2); \
2962+
__ addw(rscratch4, r1, rscratch2); \
2963+
__ ldrw(rscratch1, Address(buf, k*4)); \
2964+
__ eorw(rscratch3, rscratch3, r4); \
2965+
__ addw(rscratch3, rscratch3, rscratch1); \
2966+
__ addw(rscratch3, rscratch3, rscratch4); \
2967+
__ rorw(rscratch2, rscratch3, 32 - s); \
2968+
__ addw(r1, rscratch2, r2);
2969+
2970+
#define GG(r1, r2, r3, r4, k, s, t) \
2971+
__ eorw(rscratch2, r2, r3); \
2972+
__ ldrw(rscratch1, Address(buf, k*4)); \
2973+
__ andw(rscratch3, rscratch2, r4); \
2974+
__ movw(rscratch2, t); \
2975+
__ eorw(rscratch3, rscratch3, r3); \
2976+
__ addw(rscratch4, r1, rscratch2); \
2977+
__ addw(rscratch3, rscratch3, rscratch1); \
2978+
__ addw(rscratch3, rscratch3, rscratch4); \
2979+
__ rorw(rscratch2, rscratch3, 32 - s); \
2980+
__ addw(r1, rscratch2, r2);
2981+
2982+
#define HH(r1, r2, r3, r4, k, s, t) \
2983+
__ eorw(rscratch3, r3, r4); \
2984+
__ movw(rscratch2, t); \
2985+
__ addw(rscratch4, r1, rscratch2); \
2986+
__ ldrw(rscratch1, Address(buf, k*4)); \
2987+
__ eorw(rscratch3, rscratch3, r2); \
2988+
__ addw(rscratch3, rscratch3, rscratch1); \
2989+
__ addw(rscratch3, rscratch3, rscratch4); \
2990+
__ rorw(rscratch2, rscratch3, 32 - s); \
2991+
__ addw(r1, rscratch2, r2);
2992+
2993+
#define II(r1, r2, r3, r4, k, s, t) \
2994+
__ movw(rscratch3, t); \
2995+
__ ornw(rscratch2, r2, r4); \
2996+
__ addw(rscratch4, r1, rscratch3); \
2997+
__ ldrw(rscratch1, Address(buf, k*4)); \
2998+
__ eorw(rscratch3, rscratch2, r3); \
2999+
__ addw(rscratch3, rscratch3, rscratch1); \
3000+
__ addw(rscratch3, rscratch3, rscratch4); \
3001+
__ rorw(rscratch2, rscratch3, 32 - s); \
3002+
__ addw(r1, rscratch2, r2);
3003+
3004+
// Round 1
3005+
FF(a, b, c, d, 0, 7, 0xd76aa478)
3006+
FF(d, a, b, c, 1, 12, 0xe8c7b756)
3007+
FF(c, d, a, b, 2, 17, 0x242070db)
3008+
FF(b, c, d, a, 3, 22, 0xc1bdceee)
3009+
FF(a, b, c, d, 4, 7, 0xf57c0faf)
3010+
FF(d, a, b, c, 5, 12, 0x4787c62a)
3011+
FF(c, d, a, b, 6, 17, 0xa8304613)
3012+
FF(b, c, d, a, 7, 22, 0xfd469501)
3013+
FF(a, b, c, d, 8, 7, 0x698098d8)
3014+
FF(d, a, b, c, 9, 12, 0x8b44f7af)
3015+
FF(c, d, a, b, 10, 17, 0xffff5bb1)
3016+
FF(b, c, d, a, 11, 22, 0x895cd7be)
3017+
FF(a, b, c, d, 12, 7, 0x6b901122)
3018+
FF(d, a, b, c, 13, 12, 0xfd987193)
3019+
FF(c, d, a, b, 14, 17, 0xa679438e)
3020+
FF(b, c, d, a, 15, 22, 0x49b40821)
3021+
3022+
// Round 2
3023+
GG(a, b, c, d, 1, 5, 0xf61e2562)
3024+
GG(d, a, b, c, 6, 9, 0xc040b340)
3025+
GG(c, d, a, b, 11, 14, 0x265e5a51)
3026+
GG(b, c, d, a, 0, 20, 0xe9b6c7aa)
3027+
GG(a, b, c, d, 5, 5, 0xd62f105d)
3028+
GG(d, a, b, c, 10, 9, 0x02441453)
3029+
GG(c, d, a, b, 15, 14, 0xd8a1e681)
3030+
GG(b, c, d, a, 4, 20, 0xe7d3fbc8)
3031+
GG(a, b, c, d, 9, 5, 0x21e1cde6)
3032+
GG(d, a, b, c, 14, 9, 0xc33707d6)
3033+
GG(c, d, a, b, 3, 14, 0xf4d50d87)
3034+
GG(b, c, d, a, 8, 20, 0x455a14ed)
3035+
GG(a, b, c, d, 13, 5, 0xa9e3e905)
3036+
GG(d, a, b, c, 2, 9, 0xfcefa3f8)
3037+
GG(c, d, a, b, 7, 14, 0x676f02d9)
3038+
GG(b, c, d, a, 12, 20, 0x8d2a4c8a)
3039+
3040+
// Round 3
3041+
HH(a, b, c, d, 5, 4, 0xfffa3942)
3042+
HH(d, a, b, c, 8, 11, 0x8771f681)
3043+
HH(c, d, a, b, 11, 16, 0x6d9d6122)
3044+
HH(b, c, d, a, 14, 23, 0xfde5380c)
3045+
HH(a, b, c, d, 1, 4, 0xa4beea44)
3046+
HH(d, a, b, c, 4, 11, 0x4bdecfa9)
3047+
HH(c, d, a, b, 7, 16, 0xf6bb4b60)
3048+
HH(b, c, d, a, 10, 23, 0xbebfbc70)
3049+
HH(a, b, c, d, 13, 4, 0x289b7ec6)
3050+
HH(d, a, b, c, 0, 11, 0xeaa127fa)
3051+
HH(c, d, a, b, 3, 16, 0xd4ef3085)
3052+
HH(b, c, d, a, 6, 23, 0x04881d05)
3053+
HH(a, b, c, d, 9, 4, 0xd9d4d039)
3054+
HH(d, a, b, c, 12, 11, 0xe6db99e5)
3055+
HH(c, d, a, b, 15, 16, 0x1fa27cf8)
3056+
HH(b, c, d, a, 2, 23, 0xc4ac5665)
3057+
3058+
// Round 4
3059+
II(a, b, c, d, 0, 6, 0xf4292244)
3060+
II(d, a, b, c, 7, 10, 0x432aff97)
3061+
II(c, d, a, b, 14, 15, 0xab9423a7)
3062+
II(b, c, d, a, 5, 21, 0xfc93a039)
3063+
II(a, b, c, d, 12, 6, 0x655b59c3)
3064+
II(d, a, b, c, 3, 10, 0x8f0ccc92)
3065+
II(c, d, a, b, 10, 15, 0xffeff47d)
3066+
II(b, c, d, a, 1, 21, 0x85845dd1)
3067+
II(a, b, c, d, 8, 6, 0x6fa87e4f)
3068+
II(d, a, b, c, 15, 10, 0xfe2ce6e0)
3069+
II(c, d, a, b, 6, 15, 0xa3014314)
3070+
II(b, c, d, a, 13, 21, 0x4e0811a1)
3071+
II(a, b, c, d, 4, 6, 0xf7537e82)
3072+
II(d, a, b, c, 11, 10, 0xbd3af235)
3073+
II(c, d, a, b, 2, 15, 0x2ad7d2bb)
3074+
II(b, c, d, a, 9, 21, 0xeb86d391)
3075+
3076+
#undef FF
3077+
#undef GG
3078+
#undef HH
3079+
#undef II
3080+
3081+
// write hash values back in the correct order
3082+
__ ldrw(rscratch1, Address(state, 0));
3083+
__ addw(rscratch1, rscratch1, a);
3084+
__ strw(rscratch1, Address(state, 0));
3085+
3086+
__ ldrw(rscratch2, Address(state, 4));
3087+
__ addw(rscratch2, rscratch2, b);
3088+
__ strw(rscratch2, Address(state, 4));
3089+
3090+
__ ldrw(rscratch3, Address(state, 8));
3091+
__ addw(rscratch3, rscratch3, c);
3092+
__ strw(rscratch3, Address(state, 8));
3093+
3094+
__ ldrw(rscratch4, Address(state, 12));
3095+
__ addw(rscratch4, rscratch4, d);
3096+
__ strw(rscratch4, Address(state, 12));
3097+
3098+
if (multi_block) {
3099+
__ add(buf, buf, 64);
3100+
__ add(ofs, ofs, 64);
3101+
__ cmp(ofs, limit);
3102+
__ br(Assembler::LE, md5_loop);
3103+
__ mov(c_rarg0, ofs); // return ofs
3104+
}
3105+
3106+
__ ret(lr);
3107+
3108+
return start;
3109+
}
3110+
29233111
// Arguments:
29243112
//
29253113
// Inputs:
@@ -5845,6 +6033,10 @@ class StubGenerator: public StubCodeGenerator {
58456033
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
58466034
}
58476035

6036+
if (UseMD5Intrinsics) {
6037+
StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress");
6038+
StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB");
6039+
}
58486040
if (UseSHA1Intrinsics) {
58496041
StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
58506042
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");

src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ static bool returns_to_call_stub(address return_pc) {
3636

3737
enum platform_dependent_constants {
3838
code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
39-
code_size2 = 28000 // simply increase if too small (assembler will crash if too small)
39+
code_size2 = 45000 // simply increase if too small (assembler will crash if too small)
4040
};
4141

4242
class aarch64 {

src/hotspot/cpu/aarch64/vm_version_aarch64.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,9 +358,8 @@ void VM_Version::get_processor_features() {
358358
FLAG_SET_DEFAULT(UseFMA, true);
359359
}
360360

361-
if (UseMD5Intrinsics) {
362-
warning("MD5 intrinsics are not available on this CPU");
363-
FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
361+
if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
362+
UseMD5Intrinsics = true;
364363
}
365364

366365
if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) {

test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,11 @@ public class IntrinsicPredicates {
6060
};
6161

6262
public static final BooleanSupplier MD5_INSTRUCTION_AVAILABLE
63-
= // x86 variants
63+
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", null, null),
64+
// x86 variants
6465
new OrPredicate(new CPUSpecificPredicate("amd64.*", null, null),
6566
new OrPredicate(new CPUSpecificPredicate("i386.*", null, null),
66-
new CPUSpecificPredicate("x86.*", null, null)));
67+
new CPUSpecificPredicate("x86.*", null, null))));
6768

6869
public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
6970
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),

0 commit comments

Comments
 (0)