@@ -3332,9 +3332,36 @@ class StubGenerator: public StubCodeGenerator {
3332
3332
return start;
3333
3333
}
3334
3334
3335
+ class Cached64Bytes {
3336
+ private:
3337
+ MacroAssembler *_masm;
3338
+ Register _regs[8 ];
3339
+
3340
+ public:
3341
+ Cached64Bytes (MacroAssembler *masm, RegSet rs): _masm(masm) {
3342
+ assert (rs.size () == 8 , " %u registers are used to cache 16 4-byte data" , rs.size ());
3343
+ auto it = rs.begin ();
3344
+ for (auto &r: _regs) {
3345
+ r = *it;
3346
+ ++it;
3347
+ }
3348
+ }
3349
+
3350
+ void gen_loads (Register base) {
3351
+ for (int i = 0 ; i < 8 ; i += 2 ) {
3352
+ __ ldp (_regs[i], _regs[i + 1 ], Address (base, 8 * i));
3353
+ }
3354
+ }
3355
+
3356
+ // Generate code extracting i-th unsigned word (4 bytes) from cached 64 bytes.
3357
+ void extract_u32 (Register dest, int i) {
3358
+ __ ubfx (dest, _regs[i / 2 ], 32 * (i % 2 ), 32 );
3359
+ }
3360
+ };
3361
+
3335
3362
// Utility routines for md5.
3336
3363
// Clobbers r10 and r11.
3337
- void md5_FF (Register buf , Register r1, Register r2, Register r3, Register r4,
3364
+ void md5_FF (Cached64Bytes& reg_cache , Register r1, Register r2, Register r3, Register r4,
3338
3365
int k, int s, int t) {
3339
3366
Register rscratch3 = r10;
3340
3367
Register rscratch4 = r11;
@@ -3343,22 +3370,22 @@ class StubGenerator: public StubCodeGenerator {
3343
3370
__ movw (rscratch2, t);
3344
3371
__ andw (rscratch3, rscratch3, r2);
3345
3372
__ addw (rscratch4, r1, rscratch2);
3346
- __ ldrw (rscratch1, Address (buf, k* 4 ) );
3373
+ reg_cache. extract_u32 (rscratch1, k );
3347
3374
__ eorw (rscratch3, rscratch3, r4);
3348
3375
__ addw (rscratch4, rscratch4, rscratch1);
3349
3376
__ addw (rscratch3, rscratch3, rscratch4);
3350
3377
__ rorw (rscratch2, rscratch3, 32 - s);
3351
3378
__ addw (r1, rscratch2, r2);
3352
3379
}
3353
3380
3354
- void md5_GG (Register buf , Register r1, Register r2, Register r3, Register r4,
3381
+ void md5_GG (Cached64Bytes& reg_cache , Register r1, Register r2, Register r3, Register r4,
3355
3382
int k, int s, int t) {
3356
3383
Register rscratch3 = r10;
3357
3384
Register rscratch4 = r11;
3358
3385
3359
3386
__ andw (rscratch3, r2, r4);
3360
3387
__ bicw (rscratch4, r3, r4);
3361
- __ ldrw (rscratch1, Address (buf, k* 4 ) );
3388
+ reg_cache. extract_u32 (rscratch1, k );
3362
3389
__ movw (rscratch2, t);
3363
3390
__ orrw (rscratch3, rscratch3, rscratch4);
3364
3391
__ addw (rscratch4, r1, rscratch2);
@@ -3368,31 +3395,31 @@ class StubGenerator: public StubCodeGenerator {
3368
3395
__ addw (r1, rscratch2, r2);
3369
3396
}
3370
3397
3371
- void md5_HH (Register buf , Register r1, Register r2, Register r3, Register r4,
3398
+ void md5_HH (Cached64Bytes& reg_cache , Register r1, Register r2, Register r3, Register r4,
3372
3399
int k, int s, int t) {
3373
3400
Register rscratch3 = r10;
3374
3401
Register rscratch4 = r11;
3375
3402
3376
3403
__ eorw (rscratch3, r3, r4);
3377
3404
__ movw (rscratch2, t);
3378
3405
__ addw (rscratch4, r1, rscratch2);
3379
- __ ldrw (rscratch1, Address (buf, k* 4 ) );
3406
+ reg_cache. extract_u32 (rscratch1, k );
3380
3407
__ eorw (rscratch3, rscratch3, r2);
3381
3408
__ addw (rscratch4, rscratch4, rscratch1);
3382
3409
__ addw (rscratch3, rscratch3, rscratch4);
3383
3410
__ rorw (rscratch2, rscratch3, 32 - s);
3384
3411
__ addw (r1, rscratch2, r2);
3385
3412
}
3386
3413
3387
- void md5_II (Register buf , Register r1, Register r2, Register r3, Register r4,
3414
+ void md5_II (Cached64Bytes& reg_cache , Register r1, Register r2, Register r3, Register r4,
3388
3415
int k, int s, int t) {
3389
3416
Register rscratch3 = r10;
3390
3417
Register rscratch4 = r11;
3391
3418
3392
3419
__ movw (rscratch3, t);
3393
3420
__ ornw (rscratch2, r2, r4);
3394
3421
__ addw (rscratch4, r1, rscratch3);
3395
- __ ldrw (rscratch1, Address (buf, k* 4 ) );
3422
+ reg_cache. extract_u32 (rscratch1, k );
3396
3423
__ eorw (rscratch3, rscratch2, r3);
3397
3424
__ addw (rscratch4, rscratch4, rscratch1);
3398
3425
__ addw (rscratch3, rscratch3, rscratch4);
@@ -3424,103 +3451,104 @@ class StubGenerator: public StubCodeGenerator {
3424
3451
Register rscratch3 = r10;
3425
3452
Register rscratch4 = r11;
3426
3453
3454
+ Register state_regs[2 ] = { r12, r13 };
3455
+ RegSet saved_regs = RegSet::range (r16, r22) - r18_tls;
3456
+ Cached64Bytes reg_cache (_masm, RegSet::of (r14, r15) + saved_regs); // using 8 registers
3457
+
3458
+ __ push (saved_regs, sp);
3459
+
3460
+ __ ldp (state_regs[0 ], state_regs[1 ], Address (state));
3461
+ __ ubfx (a, state_regs[0 ], 0 , 32 );
3462
+ __ ubfx (b, state_regs[0 ], 32 , 32 );
3463
+ __ ubfx (c, state_regs[1 ], 0 , 32 );
3464
+ __ ubfx (d, state_regs[1 ], 32 , 32 );
3465
+
3427
3466
Label md5_loop;
3428
3467
__ BIND (md5_loop);
3429
3468
3430
- // Save hash values for addition after rounds
3431
- __ ldrw (a, Address (state, 0 ));
3432
- __ ldrw (b, Address (state, 4 ));
3433
- __ ldrw (c, Address (state, 8 ));
3434
- __ ldrw (d, Address (state, 12 ));
3469
+ reg_cache.gen_loads (buf);
3435
3470
3436
3471
// Round 1
3437
- md5_FF (buf , a, b, c, d, 0 , 7 , 0xd76aa478 );
3438
- md5_FF (buf , d, a, b, c, 1 , 12 , 0xe8c7b756 );
3439
- md5_FF (buf , c, d, a, b, 2 , 17 , 0x242070db );
3440
- md5_FF (buf , b, c, d, a, 3 , 22 , 0xc1bdceee );
3441
- md5_FF (buf , a, b, c, d, 4 , 7 , 0xf57c0faf );
3442
- md5_FF (buf , d, a, b, c, 5 , 12 , 0x4787c62a );
3443
- md5_FF (buf , c, d, a, b, 6 , 17 , 0xa8304613 );
3444
- md5_FF (buf , b, c, d, a, 7 , 22 , 0xfd469501 );
3445
- md5_FF (buf , a, b, c, d, 8 , 7 , 0x698098d8 );
3446
- md5_FF (buf , d, a, b, c, 9 , 12 , 0x8b44f7af );
3447
- md5_FF (buf , c, d, a, b, 10 , 17 , 0xffff5bb1 );
3448
- md5_FF (buf , b, c, d, a, 11 , 22 , 0x895cd7be );
3449
- md5_FF (buf , a, b, c, d, 12 , 7 , 0x6b901122 );
3450
- md5_FF (buf , d, a, b, c, 13 , 12 , 0xfd987193 );
3451
- md5_FF (buf , c, d, a, b, 14 , 17 , 0xa679438e );
3452
- md5_FF (buf , b, c, d, a, 15 , 22 , 0x49b40821 );
3472
+ md5_FF (reg_cache , a, b, c, d, 0 , 7 , 0xd76aa478 );
3473
+ md5_FF (reg_cache , d, a, b, c, 1 , 12 , 0xe8c7b756 );
3474
+ md5_FF (reg_cache , c, d, a, b, 2 , 17 , 0x242070db );
3475
+ md5_FF (reg_cache , b, c, d, a, 3 , 22 , 0xc1bdceee );
3476
+ md5_FF (reg_cache , a, b, c, d, 4 , 7 , 0xf57c0faf );
3477
+ md5_FF (reg_cache , d, a, b, c, 5 , 12 , 0x4787c62a );
3478
+ md5_FF (reg_cache , c, d, a, b, 6 , 17 , 0xa8304613 );
3479
+ md5_FF (reg_cache , b, c, d, a, 7 , 22 , 0xfd469501 );
3480
+ md5_FF (reg_cache , a, b, c, d, 8 , 7 , 0x698098d8 );
3481
+ md5_FF (reg_cache , d, a, b, c, 9 , 12 , 0x8b44f7af );
3482
+ md5_FF (reg_cache , c, d, a, b, 10 , 17 , 0xffff5bb1 );
3483
+ md5_FF (reg_cache , b, c, d, a, 11 , 22 , 0x895cd7be );
3484
+ md5_FF (reg_cache , a, b, c, d, 12 , 7 , 0x6b901122 );
3485
+ md5_FF (reg_cache , d, a, b, c, 13 , 12 , 0xfd987193 );
3486
+ md5_FF (reg_cache , c, d, a, b, 14 , 17 , 0xa679438e );
3487
+ md5_FF (reg_cache , b, c, d, a, 15 , 22 , 0x49b40821 );
3453
3488
3454
3489
// Round 2
3455
- md5_GG (buf , a, b, c, d, 1 , 5 , 0xf61e2562 );
3456
- md5_GG (buf , d, a, b, c, 6 , 9 , 0xc040b340 );
3457
- md5_GG (buf , c, d, a, b, 11 , 14 , 0x265e5a51 );
3458
- md5_GG (buf , b, c, d, a, 0 , 20 , 0xe9b6c7aa );
3459
- md5_GG (buf , a, b, c, d, 5 , 5 , 0xd62f105d );
3460
- md5_GG (buf , d, a, b, c, 10 , 9 , 0x02441453 );
3461
- md5_GG (buf , c, d, a, b, 15 , 14 , 0xd8a1e681 );
3462
- md5_GG (buf , b, c, d, a, 4 , 20 , 0xe7d3fbc8 );
3463
- md5_GG (buf , a, b, c, d, 9 , 5 , 0x21e1cde6 );
3464
- md5_GG (buf , d, a, b, c, 14 , 9 , 0xc33707d6 );
3465
- md5_GG (buf , c, d, a, b, 3 , 14 , 0xf4d50d87 );
3466
- md5_GG (buf , b, c, d, a, 8 , 20 , 0x455a14ed );
3467
- md5_GG (buf , a, b, c, d, 13 , 5 , 0xa9e3e905 );
3468
- md5_GG (buf , d, a, b, c, 2 , 9 , 0xfcefa3f8 );
3469
- md5_GG (buf , c, d, a, b, 7 , 14 , 0x676f02d9 );
3470
- md5_GG (buf , b, c, d, a, 12 , 20 , 0x8d2a4c8a );
3490
+ md5_GG (reg_cache , a, b, c, d, 1 , 5 , 0xf61e2562 );
3491
+ md5_GG (reg_cache , d, a, b, c, 6 , 9 , 0xc040b340 );
3492
+ md5_GG (reg_cache , c, d, a, b, 11 , 14 , 0x265e5a51 );
3493
+ md5_GG (reg_cache , b, c, d, a, 0 , 20 , 0xe9b6c7aa );
3494
+ md5_GG (reg_cache , a, b, c, d, 5 , 5 , 0xd62f105d );
3495
+ md5_GG (reg_cache , d, a, b, c, 10 , 9 , 0x02441453 );
3496
+ md5_GG (reg_cache , c, d, a, b, 15 , 14 , 0xd8a1e681 );
3497
+ md5_GG (reg_cache , b, c, d, a, 4 , 20 , 0xe7d3fbc8 );
3498
+ md5_GG (reg_cache , a, b, c, d, 9 , 5 , 0x21e1cde6 );
3499
+ md5_GG (reg_cache , d, a, b, c, 14 , 9 , 0xc33707d6 );
3500
+ md5_GG (reg_cache , c, d, a, b, 3 , 14 , 0xf4d50d87 );
3501
+ md5_GG (reg_cache , b, c, d, a, 8 , 20 , 0x455a14ed );
3502
+ md5_GG (reg_cache , a, b, c, d, 13 , 5 , 0xa9e3e905 );
3503
+ md5_GG (reg_cache , d, a, b, c, 2 , 9 , 0xfcefa3f8 );
3504
+ md5_GG (reg_cache , c, d, a, b, 7 , 14 , 0x676f02d9 );
3505
+ md5_GG (reg_cache , b, c, d, a, 12 , 20 , 0x8d2a4c8a );
3471
3506
3472
3507
// Round 3
3473
- md5_HH (buf , a, b, c, d, 5 , 4 , 0xfffa3942 );
3474
- md5_HH (buf , d, a, b, c, 8 , 11 , 0x8771f681 );
3475
- md5_HH (buf , c, d, a, b, 11 , 16 , 0x6d9d6122 );
3476
- md5_HH (buf , b, c, d, a, 14 , 23 , 0xfde5380c );
3477
- md5_HH (buf , a, b, c, d, 1 , 4 , 0xa4beea44 );
3478
- md5_HH (buf , d, a, b, c, 4 , 11 , 0x4bdecfa9 );
3479
- md5_HH (buf , c, d, a, b, 7 , 16 , 0xf6bb4b60 );
3480
- md5_HH (buf , b, c, d, a, 10 , 23 , 0xbebfbc70 );
3481
- md5_HH (buf , a, b, c, d, 13 , 4 , 0x289b7ec6 );
3482
- md5_HH (buf , d, a, b, c, 0 , 11 , 0xeaa127fa );
3483
- md5_HH (buf , c, d, a, b, 3 , 16 , 0xd4ef3085 );
3484
- md5_HH (buf , b, c, d, a, 6 , 23 , 0x04881d05 );
3485
- md5_HH (buf , a, b, c, d, 9 , 4 , 0xd9d4d039 );
3486
- md5_HH (buf , d, a, b, c, 12 , 11 , 0xe6db99e5 );
3487
- md5_HH (buf , c, d, a, b, 15 , 16 , 0x1fa27cf8 );
3488
- md5_HH (buf , b, c, d, a, 2 , 23 , 0xc4ac5665 );
3508
+ md5_HH (reg_cache , a, b, c, d, 5 , 4 , 0xfffa3942 );
3509
+ md5_HH (reg_cache , d, a, b, c, 8 , 11 , 0x8771f681 );
3510
+ md5_HH (reg_cache , c, d, a, b, 11 , 16 , 0x6d9d6122 );
3511
+ md5_HH (reg_cache , b, c, d, a, 14 , 23 , 0xfde5380c );
3512
+ md5_HH (reg_cache , a, b, c, d, 1 , 4 , 0xa4beea44 );
3513
+ md5_HH (reg_cache , d, a, b, c, 4 , 11 , 0x4bdecfa9 );
3514
+ md5_HH (reg_cache , c, d, a, b, 7 , 16 , 0xf6bb4b60 );
3515
+ md5_HH (reg_cache , b, c, d, a, 10 , 23 , 0xbebfbc70 );
3516
+ md5_HH (reg_cache , a, b, c, d, 13 , 4 , 0x289b7ec6 );
3517
+ md5_HH (reg_cache , d, a, b, c, 0 , 11 , 0xeaa127fa );
3518
+ md5_HH (reg_cache , c, d, a, b, 3 , 16 , 0xd4ef3085 );
3519
+ md5_HH (reg_cache , b, c, d, a, 6 , 23 , 0x04881d05 );
3520
+ md5_HH (reg_cache , a, b, c, d, 9 , 4 , 0xd9d4d039 );
3521
+ md5_HH (reg_cache , d, a, b, c, 12 , 11 , 0xe6db99e5 );
3522
+ md5_HH (reg_cache , c, d, a, b, 15 , 16 , 0x1fa27cf8 );
3523
+ md5_HH (reg_cache , b, c, d, a, 2 , 23 , 0xc4ac5665 );
3489
3524
3490
3525
// Round 4
3491
- md5_II (buf, a, b, c, d, 0 , 6 , 0xf4292244 );
3492
- md5_II (buf, d, a, b, c, 7 , 10 , 0x432aff97 );
3493
- md5_II (buf, c, d, a, b, 14 , 15 , 0xab9423a7 );
3494
- md5_II (buf, b, c, d, a, 5 , 21 , 0xfc93a039 );
3495
- md5_II (buf, a, b, c, d, 12 , 6 , 0x655b59c3 );
3496
- md5_II (buf, d, a, b, c, 3 , 10 , 0x8f0ccc92 );
3497
- md5_II (buf, c, d, a, b, 10 , 15 , 0xffeff47d );
3498
- md5_II (buf, b, c, d, a, 1 , 21 , 0x85845dd1 );
3499
- md5_II (buf, a, b, c, d, 8 , 6 , 0x6fa87e4f );
3500
- md5_II (buf, d, a, b, c, 15 , 10 , 0xfe2ce6e0 );
3501
- md5_II (buf, c, d, a, b, 6 , 15 , 0xa3014314 );
3502
- md5_II (buf, b, c, d, a, 13 , 21 , 0x4e0811a1 );
3503
- md5_II (buf, a, b, c, d, 4 , 6 , 0xf7537e82 );
3504
- md5_II (buf, d, a, b, c, 11 , 10 , 0xbd3af235 );
3505
- md5_II (buf, c, d, a, b, 2 , 15 , 0x2ad7d2bb );
3506
- md5_II (buf, b, c, d, a, 9 , 21 , 0xeb86d391 );
3507
-
3508
- // write hash values back in the correct order
3509
- __ ldrw (rscratch1, Address (state, 0 ));
3510
- __ addw (rscratch1, rscratch1, a);
3511
- __ strw (rscratch1, Address (state, 0 ));
3512
-
3513
- __ ldrw (rscratch2, Address (state, 4 ));
3514
- __ addw (rscratch2, rscratch2, b);
3515
- __ strw (rscratch2, Address (state, 4 ));
3516
-
3517
- __ ldrw (rscratch3, Address (state, 8 ));
3518
- __ addw (rscratch3, rscratch3, c);
3519
- __ strw (rscratch3, Address (state, 8 ));
3520
-
3521
- __ ldrw (rscratch4, Address (state, 12 ));
3522
- __ addw (rscratch4, rscratch4, d);
3523
- __ strw (rscratch4, Address (state, 12 ));
3526
+ md5_II (reg_cache, a, b, c, d, 0 , 6 , 0xf4292244 );
3527
+ md5_II (reg_cache, d, a, b, c, 7 , 10 , 0x432aff97 );
3528
+ md5_II (reg_cache, c, d, a, b, 14 , 15 , 0xab9423a7 );
3529
+ md5_II (reg_cache, b, c, d, a, 5 , 21 , 0xfc93a039 );
3530
+ md5_II (reg_cache, a, b, c, d, 12 , 6 , 0x655b59c3 );
3531
+ md5_II (reg_cache, d, a, b, c, 3 , 10 , 0x8f0ccc92 );
3532
+ md5_II (reg_cache, c, d, a, b, 10 , 15 , 0xffeff47d );
3533
+ md5_II (reg_cache, b, c, d, a, 1 , 21 , 0x85845dd1 );
3534
+ md5_II (reg_cache, a, b, c, d, 8 , 6 , 0x6fa87e4f );
3535
+ md5_II (reg_cache, d, a, b, c, 15 , 10 , 0xfe2ce6e0 );
3536
+ md5_II (reg_cache, c, d, a, b, 6 , 15 , 0xa3014314 );
3537
+ md5_II (reg_cache, b, c, d, a, 13 , 21 , 0x4e0811a1 );
3538
+ md5_II (reg_cache, a, b, c, d, 4 , 6 , 0xf7537e82 );
3539
+ md5_II (reg_cache, d, a, b, c, 11 , 10 , 0xbd3af235 );
3540
+ md5_II (reg_cache, c, d, a, b, 2 , 15 , 0x2ad7d2bb );
3541
+ md5_II (reg_cache, b, c, d, a, 9 , 21 , 0xeb86d391 );
3542
+
3543
+ __ addw (a, state_regs[0 ], a);
3544
+ __ ubfx (rscratch2, state_regs[0 ], 32 , 32 );
3545
+ __ addw (b, rscratch2, b);
3546
+ __ addw (c, state_regs[1 ], c);
3547
+ __ ubfx (rscratch4, state_regs[1 ], 32 , 32 );
3548
+ __ addw (d, rscratch4, d);
3549
+
3550
+ __ orr (state_regs[0 ], a, b, Assembler::LSL, 32 );
3551
+ __ orr (state_regs[1 ], c, d, Assembler::LSL, 32 );
3524
3552
3525
3553
if (multi_block) {
3526
3554
__ add (buf, buf, 64 );
@@ -3530,6 +3558,11 @@ class StubGenerator: public StubCodeGenerator {
3530
3558
__ mov (c_rarg0, ofs); // return ofs
3531
3559
}
3532
3560
3561
+ // write hash values back in the correct order
3562
+ __ stp (state_regs[0 ], state_regs[1 ], Address (state));
3563
+
3564
+ __ pop (saved_regs, sp);
3565
+
3533
3566
__ ret (lr);
3534
3567
3535
3568
return start;
0 commit comments