@@ -576,36 +576,58 @@ private static byte[] getJ0(byte[] iv, byte[] subkeyH, int blockSize) {
576
576
return j0 ;
577
577
}
578
578
579
- // Wrapper function around AES-GCM interleaved intrinsic that splits
580
- // large chunks of data into 1MB sized chunks. This is to place
581
- // an upper limit on the number of blocks encrypted in the intrinsic.
579
+ /**
580
+ * Wrapper function around Combined AES-GCM intrinsic method that splits
581
+ * large chunks of data into 1MB sized chunks. This is to place
582
+ * an upper limit on the number of blocks encrypted in the intrinsic.
583
+ *
584
+ * The combined intrinsic is not used when decrypting in-place heap
585
+ * bytebuffers because 'ct' will be the same as 'in' and overwritten by
586
+ * GCTR before GHASH calculates the encrypted tag.
587
+ */
582
588
private static int implGCMCrypt (byte [] in , int inOfs , int inLen , byte [] ct ,
583
589
int ctOfs , byte [] out , int outOfs ,
584
590
GCTR gctr , GHASH ghash ) {
585
591
586
592
int len = 0 ;
587
- if (inLen > SPLIT_LEN ) {
593
+ // Loop if input length is greater than the SPLIT_LEN
594
+ if (inLen > SPLIT_LEN && ct != null ) {
595
+ int partlen ;
588
596
while (inLen >= SPLIT_LEN ) {
589
- int partlen = implGCMCrypt0 (in , inOfs + len , SPLIT_LEN , ct ,
597
+ partlen = implGCMCrypt0 (in , inOfs + len , SPLIT_LEN , ct ,
590
598
ctOfs + len , out , outOfs + len , gctr , ghash );
591
599
len += partlen ;
592
600
inLen -= partlen ;
593
601
}
594
602
}
603
+
604
+ // Finish any remaining data
595
605
if (inLen > 0 ) {
596
- len += implGCMCrypt0 (in , inOfs + len , inLen , ct ,
597
- ctOfs + len , out , outOfs + len , gctr , ghash );
606
+ if (ct == null ) {
607
+ ghash .update (in , inOfs + len , inLen );
608
+ len += gctr .update (in , inOfs + len , inLen , out , outOfs );
609
+ } else {
610
+ len += implGCMCrypt0 (in , inOfs + len , inLen , ct ,
611
+ ctOfs + len , out , outOfs + len , gctr , ghash );
612
+ }
598
613
}
599
614
return len ;
600
615
}
616
+
601
617
/**
602
- * Intrinsic for Vector AES Galois Counter Mode implementation.
603
- * AES and GHASH operations are interleaved in the intrinsic implementation.
604
- * return - number of processed bytes
618
+ * Intrinsic for the combined AES Galois Counter Mode implementation.
619
+ * AES and GHASH operations are combined in the intrinsic implementation.
605
620
*
606
621
* Requires 768 bytes (48 AES blocks) to efficiently use the intrinsic.
607
622
* inLen that is less than 768 size block sizes, before or after this
608
623
* intrinsic is used, will be done by the calling method
624
+ *
625
+ * Note:
626
+ * Only Intel processors with AVX512 that support vaes, vpclmulqdq,
627
+ * avx512dq, and avx512vl trigger this intrinsic.
628
+ * Other processors will always use GHASH and GCTR which may have their own
629
+ * intrinsic support
630
+ *
609
631
* @param in input buffer
610
632
* @param inOfs input offset
611
633
* @param inLen input length
@@ -614,7 +636,7 @@ private static int implGCMCrypt(byte[] in, int inOfs, int inLen, byte[] ct,
614
636
* @param out output buffer
615
637
* @param outOfs output offset
616
638
* @param gctr object for the GCTR operation
617
- * @param ghash object for the ghash operation
639
+ * @param ghash object for the GHASH operation
618
640
* @return number of processed bytes
619
641
*/
620
642
@ IntrinsicCandidate
@@ -670,6 +692,11 @@ abstract class GCMEngine {
670
692
byte [] originalOut = null ;
671
693
int originalOutOfs = 0 ;
672
694
695
+ // True if ops is an in-place array decryption with the offset between
696
+ // input & output the same or the input greater. This is to
697
+ // avoid the AVX512 intrinsic.
698
+ boolean inPlaceArray = false ;
699
+
673
700
GCMEngine (SymmetricCipher blockCipher ) {
674
701
blockSize = blockCipher .getBlockSize ();
675
702
byte [] subkeyH = new byte [blockSize ];
@@ -736,7 +763,8 @@ int implGCMCrypt(ByteBuffer src, ByteBuffer dst) {
736
763
ByteBuffer ct = (encryption ? dst : src );
737
764
len = GaloisCounterMode .implGCMCrypt (src .array (),
738
765
src .arrayOffset () + src .position (), srcLen ,
739
- ct .array (), ct .arrayOffset () + ct .position (),
766
+ inPlaceArray ? null : ct .array (),
767
+ ct .arrayOffset () + ct .position (),
740
768
dst .array (), dst .arrayOffset () + dst .position (),
741
769
gctr , ghash );
742
770
src .position (src .position () + len );
@@ -948,10 +976,13 @@ ByteBuffer overlapDetection(ByteBuffer src, ByteBuffer dst) {
948
976
// from the passed object. That gives up the true offset from
949
977
// the base address. As long as the src side is >= the dst
950
978
// side, we are not in overlap.
979
+ // NOTE: inPlaceArray does not apply here as direct buffers run
980
+ // through a byte[] to get to the combined intrinsic
951
981
if (((DirectBuffer ) src ).address () - srcaddr + src .position () >=
952
982
((DirectBuffer ) dst ).address () - dstaddr + dst .position ()) {
953
983
return dst ;
954
984
}
985
+
955
986
} else if (!src .isDirect () && !dst .isDirect ()) {
956
987
// if src is read only, then we need a copy
957
988
if (!src .isReadOnly ()) {
@@ -964,10 +995,12 @@ ByteBuffer overlapDetection(ByteBuffer src, ByteBuffer dst) {
964
995
// from the underlying byte[] address.
965
996
// If during encryption and the input offset is behind or
966
997
// the same as the output offset, the same buffer can be
967
- // used. But during decryption always create a new
968
- // buffer in case of a bad auth tag.
969
- if (encryption && src .position () + src .arrayOffset () >=
998
+ // used.
999
+ // Set 'inPlaceArray' true for decryption operations to
1000
+ // avoid the AVX512 combined intrinsic
1001
+ if (src .position () + src .arrayOffset () >=
970
1002
dst .position () + dst .arrayOffset ()) {
1003
+ inPlaceArray = (!encryption );
971
1004
return dst ;
972
1005
}
973
1006
}
@@ -989,18 +1022,21 @@ ByteBuffer overlapDetection(ByteBuffer src, ByteBuffer dst) {
989
1022
}
990
1023
991
1024
/**
992
- * This is used for both overlap detection for the data or decryption
1025
+ * This is used for both overlap detection for the data or decryption
993
1026
* during in-place crypto, so to not overwrite the input if the auth tag
994
1027
* is invalid.
995
1028
*
996
1029
* If an intermediate array is needed, the original out array length is
997
1030
* allocated because for code simplicity.
998
1031
*/
999
1032
byte [] overlapDetection (byte [] in , int inOfs , byte [] out , int outOfs ) {
1000
- if (in == out && (!encryption || inOfs < outOfs )) {
1001
- originalOut = out ;
1002
- originalOutOfs = outOfs ;
1003
- return new byte [out .length ];
1033
+ if (in == out ) {
1034
+ if (inOfs < outOfs ) {
1035
+ originalOut = out ;
1036
+ originalOutOfs = outOfs ;
1037
+ return new byte [out .length ];
1038
+ }
1039
+ inPlaceArray = (!encryption );
1004
1040
}
1005
1041
return out ;
1006
1042
}
@@ -1501,8 +1537,11 @@ public int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
1501
1537
}
1502
1538
1503
1539
if (mismatch != 0 ) {
1504
- // Clear output data
1505
- Arrays .fill (out , outOfs , outOfs + len , (byte ) 0 );
1540
+ // If this is an in-place array, don't zero the input
1541
+ if (!inPlaceArray ) {
1542
+ // Clear output data
1543
+ Arrays .fill (out , outOfs , outOfs + len , (byte ) 0 );
1544
+ }
1506
1545
throw new AEADBadTagException ("Tag mismatch" );
1507
1546
}
1508
1547
@@ -1586,16 +1625,21 @@ public int doFinal(ByteBuffer src, ByteBuffer dst)
1586
1625
if (mismatch != 0 ) {
1587
1626
// Clear output data
1588
1627
dst .reset ();
1589
- if (dst .hasArray ()) {
1590
- int ofs = dst .arrayOffset () + dst .position ();
1591
- Arrays .fill (dst .array (), ofs , ofs + len , (byte )0 );
1592
- } else {
1593
- NIO_ACCESS .acquireSession (dst );
1594
- try {
1595
- Unsafe .getUnsafe ().setMemory (((DirectBuffer )dst ).address (),
1628
+ // If this is an in-place array, don't zero the src
1629
+ if (!inPlaceArray ) {
1630
+ if (dst .hasArray ()) {
1631
+ int ofs = dst .arrayOffset () + dst .position ();
1632
+ Arrays .fill (dst .array (), ofs , ofs + len ,
1633
+ (byte ) 0 );
1634
+ } else {
1635
+ NIO_ACCESS .acquireSession (dst );
1636
+ try {
1637
+ Unsafe .getUnsafe ().setMemory (
1638
+ ((DirectBuffer )dst ).address (),
1596
1639
len + dst .position (), (byte ) 0 );
1597
- } finally {
1598
- NIO_ACCESS .releaseSession (dst );
1640
+ } finally {
1641
+ NIO_ACCESS .releaseSession (dst );
1642
+ }
1599
1643
}
1600
1644
}
1601
1645
throw new AEADBadTagException ("Tag mismatch" );
@@ -1807,8 +1851,11 @@ public int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
1807
1851
int outOfs ) {
1808
1852
int len = 0 ;
1809
1853
if (inLen >= PARALLEL_LEN ) {
1810
- len += implGCMCrypt (in , inOfs , inLen , in , inOfs , out , outOfs ,
1811
- gctr , ghash );
1854
+ // Since GCMDecrypt.inPlaceArray cannot be accessed, check that
1855
+ // 'in' and 'out' are the same. All other in-place situations
1856
+ // have been resolved by overlapDetection()
1857
+ len += implGCMCrypt (in , inOfs , inLen , (in == out ? null : in ),
1858
+ inOfs , out , outOfs , gctr , ghash );
1812
1859
}
1813
1860
ghash .doFinal (in , inOfs + len , inLen - len );
1814
1861
return len + gctr .doFinal (in , inOfs + len , inLen - len , out ,
0 commit comments