Skip to content

Commit b4da0ee

Browse files
author
Anthony Scarpino
committed
8296507: GCM using more memory than necessary with in-place operations
Reviewed-by: jnimeh
1 parent cd2182a commit b4da0ee

File tree

1 file changed

+80
-33
lines changed

1 file changed

+80
-33
lines changed

src/java.base/share/classes/com/sun/crypto/provider/GaloisCounterMode.java

+80-33
Original file line numberDiff line numberDiff line change
@@ -576,36 +576,58 @@ private static byte[] getJ0(byte[] iv, byte[] subkeyH, int blockSize) {
576576
return j0;
577577
}
578578

579-
// Wrapper function around AES-GCM interleaved intrinsic that splits
580-
// large chunks of data into 1MB sized chunks. This is to place
581-
// an upper limit on the number of blocks encrypted in the intrinsic.
579+
/**
580+
* Wrapper function around Combined AES-GCM intrinsic method that splits
581+
* large chunks of data into 1MB sized chunks. This is to place
582+
* an upper limit on the number of blocks encrypted in the intrinsic.
583+
*
584+
* The combined intrinsic is not used when decrypting in-place heap
585+
* bytebuffers because 'ct' will be the same as 'in' and overwritten by
586+
* GCTR before GHASH calculates the encrypted tag.
587+
*/
582588
private static int implGCMCrypt(byte[] in, int inOfs, int inLen, byte[] ct,
583589
int ctOfs, byte[] out, int outOfs,
584590
GCTR gctr, GHASH ghash) {
585591

586592
int len = 0;
587-
if (inLen > SPLIT_LEN) {
593+
// Loop if input length is greater than the SPLIT_LEN
594+
if (inLen > SPLIT_LEN && ct != null) {
595+
int partlen;
588596
while (inLen >= SPLIT_LEN) {
589-
int partlen = implGCMCrypt0(in, inOfs + len, SPLIT_LEN, ct,
597+
partlen = implGCMCrypt0(in, inOfs + len, SPLIT_LEN, ct,
590598
ctOfs + len, out, outOfs + len, gctr, ghash);
591599
len += partlen;
592600
inLen -= partlen;
593601
}
594602
}
603+
604+
// Finish any remaining data
595605
if (inLen > 0) {
596-
len += implGCMCrypt0(in, inOfs + len, inLen, ct,
597-
ctOfs + len, out, outOfs + len, gctr, ghash);
606+
if (ct == null) {
607+
ghash.update(in, inOfs + len, inLen);
608+
len += gctr.update(in, inOfs + len, inLen, out, outOfs);
609+
} else {
610+
len += implGCMCrypt0(in, inOfs + len, inLen, ct,
611+
ctOfs + len, out, outOfs + len, gctr, ghash);
612+
}
598613
}
599614
return len;
600615
}
616+
601617
/**
602-
* Intrinsic for Vector AES Galois Counter Mode implementation.
603-
* AES and GHASH operations are interleaved in the intrinsic implementation.
604-
* return - number of processed bytes
618+
* Intrinsic for the combined AES Galois Counter Mode implementation.
619+
* AES and GHASH operations are combined in the intrinsic implementation.
605620
*
606621
* Requires 768 bytes (48 AES blocks) to efficiently use the intrinsic.
607622
* inLen that is less than 768 size block sizes, before or after this
608623
* intrinsic is used, will be done by the calling method
624+
*
625+
* Note:
626+
* Only Intel processors with AVX512 that support vaes, vpclmulqdq,
627+
* avx512dq, and avx512vl trigger this intrinsic.
628+
* Other processors will always use GHASH and GCTR which may have their own
629+
* intrinsic support
630+
*
609631
* @param in input buffer
610632
* @param inOfs input offset
611633
* @param inLen input length
@@ -614,7 +636,7 @@ private static int implGCMCrypt(byte[] in, int inOfs, int inLen, byte[] ct,
614636
* @param out output buffer
615637
* @param outOfs output offset
616638
* @param gctr object for the GCTR operation
617-
* @param ghash object for the ghash operation
639+
* @param ghash object for the GHASH operation
618640
* @return number of processed bytes
619641
*/
620642
@IntrinsicCandidate
@@ -670,6 +692,11 @@ abstract class GCMEngine {
670692
byte[] originalOut = null;
671693
int originalOutOfs = 0;
672694

695+
// True if ops is an in-place array decryption with the offset between
696+
// input & output the same or the input greater. This is to
697+
// avoid the AVX512 intrinsic.
698+
boolean inPlaceArray = false;
699+
673700
GCMEngine(SymmetricCipher blockCipher) {
674701
blockSize = blockCipher.getBlockSize();
675702
byte[] subkeyH = new byte[blockSize];
@@ -736,7 +763,8 @@ int implGCMCrypt(ByteBuffer src, ByteBuffer dst) {
736763
ByteBuffer ct = (encryption ? dst : src);
737764
len = GaloisCounterMode.implGCMCrypt(src.array(),
738765
src.arrayOffset() + src.position(), srcLen,
739-
ct.array(), ct.arrayOffset() + ct.position(),
766+
inPlaceArray ? null : ct.array(),
767+
ct.arrayOffset() + ct.position(),
740768
dst.array(), dst.arrayOffset() + dst.position(),
741769
gctr, ghash);
742770
src.position(src.position() + len);
@@ -948,10 +976,13 @@ ByteBuffer overlapDetection(ByteBuffer src, ByteBuffer dst) {
948976
// from the passed object. That gives up the true offset from
949977
// the base address. As long as the src side is >= the dst
950978
// side, we are not in overlap.
979+
// NOTE: inPlaceArray does not apply here as direct buffers run
980+
// through a byte[] to get to the combined intrinsic
951981
if (((DirectBuffer) src).address() - srcaddr + src.position() >=
952982
((DirectBuffer) dst).address() - dstaddr + dst.position()) {
953983
return dst;
954984
}
985+
955986
} else if (!src.isDirect() && !dst.isDirect()) {
956987
// if src is read only, then we need a copy
957988
if (!src.isReadOnly()) {
@@ -964,10 +995,12 @@ ByteBuffer overlapDetection(ByteBuffer src, ByteBuffer dst) {
964995
// from the underlying byte[] address.
965996
// If during encryption and the input offset is behind or
966997
// the same as the output offset, the same buffer can be
967-
// used. But during decryption always create a new
968-
// buffer in case of a bad auth tag.
969-
if (encryption && src.position() + src.arrayOffset() >=
998+
// used.
999+
// Set 'inPlaceArray' true for decryption operations to
1000+
// avoid the AVX512 combined intrinsic
1001+
if (src.position() + src.arrayOffset() >=
9701002
dst.position() + dst.arrayOffset()) {
1003+
inPlaceArray = (!encryption);
9711004
return dst;
9721005
}
9731006
}
@@ -989,18 +1022,21 @@ ByteBuffer overlapDetection(ByteBuffer src, ByteBuffer dst) {
9891022
}
9901023

9911024
/**
992-
* This is used for both overlap detection for the data or decryption
1025+
* This is used for both overlap detection for the data or decryption
9931026
* during in-place crypto, so to not overwrite the input if the auth tag
9941027
* is invalid.
9951028
*
9961029
* If an intermediate array is needed, the original out array length is
9971030
* allocated because for code simplicity.
9981031
*/
9991032
byte[] overlapDetection(byte[] in, int inOfs, byte[] out, int outOfs) {
1000-
if (in == out && (!encryption || inOfs < outOfs)) {
1001-
originalOut = out;
1002-
originalOutOfs = outOfs;
1003-
return new byte[out.length];
1033+
if (in == out) {
1034+
if (inOfs < outOfs) {
1035+
originalOut = out;
1036+
originalOutOfs = outOfs;
1037+
return new byte[out.length];
1038+
}
1039+
inPlaceArray = (!encryption);
10041040
}
10051041
return out;
10061042
}
@@ -1501,8 +1537,11 @@ public int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
15011537
}
15021538

15031539
if (mismatch != 0) {
1504-
// Clear output data
1505-
Arrays.fill(out, outOfs, outOfs + len, (byte) 0);
1540+
// If this is an in-place array, don't zero the input
1541+
if (!inPlaceArray) {
1542+
// Clear output data
1543+
Arrays.fill(out, outOfs, outOfs + len, (byte) 0);
1544+
}
15061545
throw new AEADBadTagException("Tag mismatch");
15071546
}
15081547

@@ -1586,16 +1625,21 @@ public int doFinal(ByteBuffer src, ByteBuffer dst)
15861625
if (mismatch != 0) {
15871626
// Clear output data
15881627
dst.reset();
1589-
if (dst.hasArray()) {
1590-
int ofs = dst.arrayOffset() + dst.position();
1591-
Arrays.fill(dst.array(), ofs , ofs + len, (byte)0);
1592-
} else {
1593-
NIO_ACCESS.acquireSession(dst);
1594-
try {
1595-
Unsafe.getUnsafe().setMemory(((DirectBuffer)dst).address(),
1628+
// If this is an in-place array, don't zero the src
1629+
if (!inPlaceArray) {
1630+
if (dst.hasArray()) {
1631+
int ofs = dst.arrayOffset() + dst.position();
1632+
Arrays.fill(dst.array(), ofs, ofs + len,
1633+
(byte) 0);
1634+
} else {
1635+
NIO_ACCESS.acquireSession(dst);
1636+
try {
1637+
Unsafe.getUnsafe().setMemory(
1638+
((DirectBuffer)dst).address(),
15961639
len + dst.position(), (byte) 0);
1597-
} finally {
1598-
NIO_ACCESS.releaseSession(dst);
1640+
} finally {
1641+
NIO_ACCESS.releaseSession(dst);
1642+
}
15991643
}
16001644
}
16011645
throw new AEADBadTagException("Tag mismatch");
@@ -1807,8 +1851,11 @@ public int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
18071851
int outOfs) {
18081852
int len = 0;
18091853
if (inLen >= PARALLEL_LEN) {
1810-
len += implGCMCrypt(in, inOfs, inLen, in, inOfs, out, outOfs,
1811-
gctr, ghash);
1854+
// Since GCMDecrypt.inPlaceArray cannot be accessed, check that
1855+
// 'in' and 'out' are the same. All other in-place situations
1856+
// have been resolved by overlapDetection()
1857+
len += implGCMCrypt(in, inOfs, inLen, (in == out ? null : in),
1858+
inOfs, out, outOfs, gctr, ghash);
18121859
}
18131860
ghash.doFinal(in, inOfs + len, inLen - len);
18141861
return len + gctr.doFinal(in, inOfs + len, inLen - len, out,

0 commit comments

Comments
 (0)