Skip to content

Commit

Permalink
add final logic; will result in rename
Browse files Browse the repository at this point in the history
Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
  • Loading branch information
nknize committed Jul 27, 2023
1 parent 05bbb5c commit 0e159f9
Showing 1 changed file with 113 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,14 @@

package org.opensearch.core.xcontent;

import org.opensearch.core.common.bytes.BytesArray;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.xcontent.spi.MediaTypeProvider;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -218,6 +222,115 @@ public static MediaType xContentType(CharSequence content) {
return null;
}

/**
* Guesses the content type based on the provided input stream without consuming it.
*
* @deprecated the content type should not be guessed except for few cases where we effectively don't know the content type.
* The REST layer should move to reading the Content-Type header instead. There are other places where auto-detection may be needed.
* This method is deprecated to prevent usages of it from spreading further without specific reasons.
*/
@Deprecated
public static MediaType xContentType(InputStream si) throws IOException {
/*
* We need to guess the content type. To do this, we look for the first non-whitespace character and then try to guess the content
* type on the GUESS_HEADER_LENGTH bytes that follow. We do this in a way that does not modify the initial read position in the
* underlying input stream. This is why the input stream must support mark/reset and why we repeatedly mark the read position and
* reset.
*/
if (si.markSupported() == false) {
throw new IllegalArgumentException("Cannot guess the xcontent type without mark/reset support on " + si.getClass());
}
si.mark(Integer.MAX_VALUE);
try {
// scan until we find the first non-whitespace character or the end of the stream
int current;
do {
current = si.read();
if (current == -1) {
return null;
}
} while (Character.isWhitespace((char) current));
// now guess the content type off the next GUESS_HEADER_LENGTH bytes including the current byte
final byte[] firstBytes = new byte[GUESS_HEADER_LENGTH];
firstBytes[0] = (byte) current;
int read = 1;
while (read < GUESS_HEADER_LENGTH) {
final int r = si.read(firstBytes, read, GUESS_HEADER_LENGTH - read);
if (r == -1) {
break;
}
read += r;
}
return mediaTypeFromBytes(firstBytes, 0, read);
} finally {
si.reset();
}

}

/**
* Guesses the content type based on the provided bytes.
*
* @deprecated the content type should not be guessed except for few cases where we effectively don't know the content type.
* The REST layer should move to reading the Content-Type header instead. There are other places where auto-detection may be needed.
* This method is deprecated to prevent usages of it from spreading further without specific reasons.
*/
@Deprecated
public static MediaType xContentType(BytesReference bytes) {
if (bytes instanceof BytesArray) {
final BytesArray array = (BytesArray) bytes;
return mediaTypeFromBytes(array.array(), array.offset(), array.length());
}
try {
final InputStream inputStream = bytes.streamInput();
assert inputStream.markSupported();
return xContentType(inputStream);
} catch (IOException e) {
assert false : "Should not happen, we're just reading bytes from memory";
throw new UncheckedIOException(e);
}
}

/**
* Guesses the content type based on the provided bytes.
*
* @deprecated the content type should not be guessed except for few cases where we effectively don't know the content type.
* The REST layer should move to reading the Content-Type header instead. There are other places where auto-detection may be needed.
* This method is deprecated to prevent usages of it from spreading further without specific reasons.
*/
@Deprecated
public static MediaType mediaTypeFromBytes(final byte[] data, int offset, int length) {
int totalLength = data.length;
if (totalLength == 0 || length == 0) {
return null;
} else if ((offset + length) > totalLength) {
return null;
}
for (var mediaType : formatToMediaType.values()) {
if (mediaType.detectedXContent(data, offset, length)) {
return mediaType;
}
}

// a last chance for JSON
int jsonStart = 0;
// JSON may be preceded by UTF-8 BOM
if (length > 3 && data[offset] == (byte) 0xEF && data[offset + 1] == (byte) 0xBB && data[offset + 2] == (byte) 0xBF) {
jsonStart = 3;
}

for (int i = jsonStart; i < length; i++) {
byte b = data[offset + i];
if (b == '{') {
return fromMediaType("application/json");
}
if (Character.isWhitespace(b) == false) {
break;
}
}

return null;
}

/**
* parsing media type that follows https://tools.ietf.org/html/rfc7231#section-3.1.1.1
Expand Down

0 comments on commit 0e159f9

Please sign in to comment.