Skip to content

Commit

Permalink
Handle raw deflate streams without zlib wrapping (#25)
Browse files Browse the repository at this point in the history
  • Loading branch information
mizosoft committed Sep 25, 2020
1 parent ec48277 commit 8a897a7
Show file tree
Hide file tree
Showing 11 changed files with 176 additions and 80 deletions.
Expand Up @@ -35,6 +35,7 @@
import static com.github.mizosoft.methanol.testutils.TestUtils.lines;
import static com.github.mizosoft.methanol.testutils.TestUtils.load;
import static com.github.mizosoft.methanol.testutils.TestUtils.loadAscii;
import static com.github.mizosoft.methanol.testutils.TestUtils.zlibUnwrap;
import static java.net.http.HttpRequest.BodyPublishers.fromPublisher;
import static java.net.http.HttpResponse.BodyHandlers.discarding;
import static java.net.http.HttpResponse.BodyHandlers.ofString;
Expand Down Expand Up @@ -229,22 +230,31 @@ void tearDownLifecycle() throws IOException {
TestUtils.shutdown(executor, scheduler);
}

private void assertDecodesSmall(String encoding) throws Exception {
private void assertDecodes(String encoding, String expected, byte[] compressed) throws Exception {
server.enqueue(new MockResponse()
.setBody(okBuffer(BASE64_DEC.decode(poemEncodings.get(encoding))))
.setBody(okBuffer(compressed))
.setHeader("Content-Encoding", encoding));
var request = HttpRequest.newBuilder(server.url("/").uri()).build();
var response = client.send(request, decoding(ofString()));
assertEquals(poem, response.body());
assertLinesMatch(lines(expected), lines(response.body()));
}

private void assertDecodesSmall(String encoding) throws Exception {
var compressed = BASE64_DEC.decode(poemEncodings.get(encoding));
assertDecodes(encoding, poem, compressed);
// Test deflate body without zlib wrapping
if (encoding.equals("deflate")) {
assertDecodes(encoding, poem, zlibUnwrap(compressed));
}
}

private void assertDecodesLarge(String encoding) throws Exception {
server.enqueue(new MockResponse()
.setBody(okBuffer(lotsOfTextEncodings.get(encoding)))
.setHeader("Content-Encoding", encoding));
var request = HttpRequest.newBuilder(server.url("/").uri()).build();
var response = client.send(request, decoding(ofString()));
assertLinesMatch(lines(lotsOfText), lines(response.body()));
var compressed = lotsOfTextEncodings.get(encoding);
assertDecodes(encoding, lotsOfText, compressed);
// Test deflate body without zlib wrapping
if (encoding.equals("deflate")) {
assertDecodes(encoding, lotsOfText, zlibUnwrap(compressed));
}
}

@Test
Expand Down
Expand Up @@ -32,6 +32,7 @@
import java.nio.ByteBuffer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.concurrent.CountDownLatch;
Expand All @@ -40,6 +41,7 @@
import java.util.concurrent.Flow.Subscription;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import javax.net.ssl.SSLContext;
import okhttp3.tls.HandshakeCertificates;
Expand Down Expand Up @@ -83,9 +85,22 @@ public static byte[] gunzip(byte[] data) {
}
}

public static byte[] zlibUnwrap(byte[] zlibWrapped) {
assert zlibWrapped.length > Short.BYTES + Integer.BYTES;
return Arrays.copyOfRange(zlibWrapped, Short.BYTES, zlibWrapped.length - Integer.BYTES);
}

public static byte[] inflate(byte[] data) {
return inflate0(data, new Inflater());
}

public static byte[] inflateNowrap(byte[] data) {
return inflate0(data, new Inflater(true));
}

private static byte[] inflate0(byte[] data, Inflater inflater) {
try {
return new InflaterInputStream(new ByteArrayInputStream(data)).readAllBytes();
return new InflaterInputStream(new ByteArrayInputStream(data), inflater).readAllBytes();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
Expand Down
Expand Up @@ -24,15 +24,15 @@

import com.github.mizosoft.methanol.BodyDecoder;
import com.github.mizosoft.methanol.decoder.AsyncBodyDecoder;
import com.github.mizosoft.methanol.decoder.AsyncDecoder;
import java.net.http.HttpResponse.BodySubscriber;
import java.util.concurrent.Executor;

/** Convenient base class for deflate and gzip {@code BodyDecoder.Factory} providers. */
abstract class ZLibBodyDecoderFactory implements BodyDecoder.Factory {
abstract class AsyncBodyDecoderFactory implements BodyDecoder.Factory {
AsyncBodyDecoderFactory() {}

ZLibBodyDecoderFactory() {}

abstract ZLibDecoder newDecoder();
abstract AsyncDecoder newDecoder();

@Override
public <T> BodyDecoder<T> create(BodySubscriber<T> downstream) {
Expand Down
Expand Up @@ -22,11 +22,12 @@

package com.github.mizosoft.methanol.internal.decoder;

import com.github.mizosoft.methanol.decoder.AsyncDecoder;
import com.github.mizosoft.methanol.internal.annotations.DefaultProvider;

/** {@code BodyDecoder.Factory} for "deflate". */
@DefaultProvider
public final class DeflateBodyDecoderFactory extends ZLibBodyDecoderFactory {
public final class DeflateBodyDecoderFactory extends AsyncBodyDecoderFactory {

/**
* Creates a new {@code DeflateBodyDecoderFactory}. Meant to be called by {@code ServiceLoader}.
Expand All @@ -35,11 +36,11 @@ public DeflateBodyDecoderFactory() {}

@Override
public String encoding() {
return "deflate";
return DeflateDecoder.ENCODING;
}

@Override
ZLibDecoder newDecoder() {
AsyncDecoder newDecoder() {
return new DeflateDecoder();
}
}
Expand Up @@ -22,19 +22,65 @@

package com.github.mizosoft.methanol.internal.decoder;

import com.github.mizosoft.methanol.decoder.AsyncDecoder;
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.concurrent.atomic.AtomicReference;
import java.util.zip.Inflater;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;

/** {@code AsyncDecoder} for deflate. */
final class DeflateDecoder extends ZLibDecoder {
final class DeflateDecoder implements AsyncDecoder {
static final String ENCODING = "deflate";

DeflateDecoder() {
super(WrapMode.DEFLATE);
private static final int CM_MASK = 0x0F00; // Mask for CM half byte
private static final int CM_DEFLATE = 0x0800; // Deflate CM (8) shifted to CM half byte position

// A tombstone for inflaterReference indicating the decoder has been closed
private static final Object CLOSED = new Object();

// Ideally, we'd have a final Inflater field created with nowrap set to false,
// as the `deflate` content encoding is defined to be in zlib format (zlib-wrapped).
// However, some broken servers send raw deflate bytes that aren't zlib-wrapped,
// so the first two bytes (zlib header) must be peeked first to know if the Inflater
// is to be created with nowrap set or not (see https://github.com/mizosoft/methanol/issues/25)
private final AtomicReference<@MonotonicNonNull Object> inflaterReference =
new AtomicReference<>();

DeflateDecoder() {}

@Override
public String encoding() {
return ENCODING;
}

@Override
public void decode(ByteSource source, ByteSink sink) throws IOException {
inflateSource(source, sink);
Inflater inflater;
var inflaterPlaceholder = inflaterReference.get();
if (inflaterPlaceholder == null) {
if (source.remaining() < Short.BYTES) {
return; // Expect more bytes next round
}

var header = ByteBuffer.allocate(Short.BYTES);
source.pullBytes(header);
header.flip();
boolean nowrap = !isProbablyZLibHeader(header.getShort());
inflater = new Inflater(nowrap);
if (!inflaterReference.compareAndSet(null, inflater)) {
inflater.end();
return;
}
inflater.setInput(header.rewind()); // The inflater still has to consume the peeked header
} else if (inflaterPlaceholder != CLOSED) {
inflater = (Inflater) inflaterPlaceholder;
} else {
return; // The decoder is closed
}

InflaterUtils.inflateSource(inflater, source, sink);
if (inflater.finished()) {
if (source.hasRemaining()) {
throw new IOException("deflate stream finished prematurely");
Expand All @@ -44,4 +90,17 @@ public void decode(ByteSource source, ByteSink sink) throws IOException {
throw new EOFException("unexpected end of deflate stream");
}
}

@Override
public void close() {
var inflater = inflaterReference.getAndSet(CLOSED);
if (inflater instanceof Inflater) { // Not null or CLOSED
((Inflater) inflater).end();
}
}

private static boolean isProbablyZLibHeader(short header) {
// See section 2.2 of https://tools.ietf.org/html/rfc1950
return (header & CM_MASK) == CM_DEFLATE && header % 31 == 0;
}
}
Expand Up @@ -22,22 +22,23 @@

package com.github.mizosoft.methanol.internal.decoder;

import com.github.mizosoft.methanol.decoder.AsyncDecoder;
import com.github.mizosoft.methanol.internal.annotations.DefaultProvider;

/** {@code BodyDecoder.Factory} for "gzip". */
@DefaultProvider
public final class GzipBodyDecoderFactory extends ZLibBodyDecoderFactory {
public final class GzipBodyDecoderFactory extends AsyncBodyDecoderFactory {

/** Creates a new {@code GzipBodyDecoderFactory}. Meant to be called by {@code ServiceLoader}. */
public GzipBodyDecoderFactory() {}

@Override
public String encoding() {
return "gzip";
return GzipDecoder.ENCODING;
}

@Override
ZLibDecoder newDecoder() {
AsyncDecoder newDecoder() {
return new GzipDecoder();
}
}
Expand Up @@ -24,15 +24,18 @@

import static java.lang.String.format;

import com.github.mizosoft.methanol.decoder.AsyncDecoder;
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.zip.CRC32;
import java.util.zip.Inflater;
import java.util.zip.ZipException;

/** {@code AsyncDecoder} for gzip. */
final class GzipDecoder extends ZLibDecoder {
final class GzipDecoder implements AsyncDecoder {
static final String ENCODING = "gzip";

private static final int GZIP_MAGIC = 0x8B1F; // ID1 and ID2 as a little-endian ordered short
private static final int CM_DEFLATE = 8;
Expand All @@ -45,6 +48,7 @@ final class GzipDecoder extends ZLibDecoder {
private static final int SHORT_MASK = 0xFFFF;
private static final long INT_MASK = 0xFFFFFFFFL;

private final Inflater inflater = new Inflater(true); // gzip has it's own wrapping method
private final ByteBuffer tempBuffer;
private final CRC32 crc;
private boolean computeCrc; // Whether to compute crc (in case FHCRC is enabled)
Expand All @@ -56,14 +60,18 @@ final class GzipDecoder extends ZLibDecoder {
private int fieldPosition;

GzipDecoder() {
super(WrapMode.GZIP);
tempBuffer =
ByteBuffer.allocate(TEMP_BUFFER_SIZE)
.order(ByteOrder.LITTLE_ENDIAN); // Multi-byte gzip values are little-endian/unsigned
crc = new CRC32();
state = State.BEGIN;
}

@Override
public String encoding() {
return ENCODING;
}

@Override
public void decode(ByteSource source, ByteSink sink) throws IOException {
outerLoop:
Expand Down Expand Up @@ -116,7 +124,7 @@ public void decode(ByteSource source, ByteSink sink) throws IOException {
// fallthrough

case DEFLATED:
inflateSource(source, sink);
InflaterUtils.inflateSourceWithChecksum(inflater, source, sink, crc);
if (!inflater.finished()) {
break outerLoop;
}
Expand Down Expand Up @@ -177,6 +185,11 @@ public void decode(ByteSource source, ByteSink sink) throws IOException {
}
}

@Override
public void close() {
inflater.end();
}

private ByteBuffer fillTempBuffer(ByteSource source, int bytes) {
source.pullBytes(tempBuffer.rewind().limit(bytes));
assert !tempBuffer.hasRemaining();
Expand Down Expand Up @@ -267,17 +280,6 @@ private boolean tryConsumeToZeroByte(ByteSource source) {
return false;
}

// Override to compute crc32 of the inflated bytes
@Override
void inflateBlock(ByteBuffer in, ByteBuffer out) throws IOException {
int prePos = out.position();
super.inflateBlock(in, out);
int postPos = out.position();
int originalLimit = out.limit();
crc.update(out.position(prePos).limit(postPos));
out.limit(originalLimit);
}

private static void checkValue(long expected, long found, String msg) throws IOException {
if (expected != found) {
throw new ZipException(format("%s; expected: %#x, found: %#x", msg, expected, found));
Expand Down Expand Up @@ -344,7 +346,6 @@ private static State fromFlags(GzipDecoder dec) {
}

/** Options in the FLG byte. */
@SuppressWarnings("unused") // Most are not explicitly used
private enum FlagOption {
// Order of declaration is important!

Expand Down

0 comments on commit 8a897a7

Please sign in to comment.