Skip to content

Commit

Permalink
Merge a2e6f8a into c867a0a
Browse files Browse the repository at this point in the history
  • Loading branch information
dr0i committed Aug 24, 2020
2 parents c867a0a + a2e6f8a commit b09cb81
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 42 deletions.
68 changes: 55 additions & 13 deletions core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java
Expand Up @@ -9,14 +9,17 @@
import java.io.StringWriter;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.net.URL;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.http.Header;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
Expand Down Expand Up @@ -344,18 +347,7 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient)
// Accept headers as it's likely to be file: or jar:
in = url.openStream();
} else {
final HttpUriRequest request = new HttpGet(url.toExternalForm());
// We prefer application/ld+json, but fallback to
// application/json
// or whatever is available
request.addHeader("Accept", ACCEPT_HEADER);

response = httpClient.execute(request);
final int status = response.getStatusLine().getStatusCode();
if (status != 200 && status != 203) {
throw new IOException("Can't retrieve " + url + ", status code: " + status);
}
in = response.getEntity().getContent();
in = getJsonLdViaHttpUri(url, httpClient, response);
}
return fromInputStream(in);
} finally {
Expand All @@ -371,6 +363,56 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient)
}
}

private static InputStream getJsonLdViaHttpUri(final URL url, final CloseableHttpClient httpClient,
CloseableHttpResponse response) throws IOException {
final HttpUriRequest request = new HttpGet(url.toExternalForm());
// We prefer application/ld+json, but fallback to application/json
// or whatever is available
request.addHeader("Accept", ACCEPT_HEADER);
response = httpClient.execute(request);

final int status = response.getStatusLine().getStatusCode();
if (status != 200 && status != 203) {
throw new IOException("Can't retrieve " + url + ", status code: " + status);
}
// follow alternate document location
// https://www.w3.org/TR/json-ld11/#alternate-document-location
URL alternateLink = alternateLink(url, response);
if (alternateLink != null) {
return getJsonLdViaHttpUri(alternateLink, httpClient, response);
}
return response.getEntity().getContent();
}

private static URL alternateLink(URL url, CloseableHttpResponse response)
throws MalformedURLException, IOException {
if (response.getEntity().getContentLength() > 0
&& !response.getEntity().getContentType().getValue().equals("application/ld+json")) {
for (Header header : response.getAllHeaders()) {
if (header.getName().equalsIgnoreCase("link")) {
String alternateLink = "";
boolean relAlternate = false;
boolean jsonld = false;
for (String value : header.getValue().split(";")) {
if (value.trim().startsWith("<")) {
alternateLink = value.replaceAll("<(.*)>", "$1");
}
if (value.trim().startsWith("type=\"application/ld+json\"")) {
jsonld = true;
}
if (value.trim().startsWith("rel=\"alternate\"")) {
relAlternate = true;
}
}
if (jsonld && relAlternate && !alternateLink.isEmpty()) {
return new URL(url.getProtocol() + "://" + url.getAuthority() + alternateLink);
}
}
}
}
return null;
}

/**
* Fallback method directly using the {@link java.net.HttpURLConnection}
* class for cases where servers do not interoperate correctly with Apache
Expand All @@ -384,7 +426,7 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient)
* @throws IOException
* If there was an IO error during parsing.
*/
public static Object fromURLJavaNet(java.net.URL url) throws JsonParseException, IOException {
public static Object fromURLJavaNet(URL url) throws JsonParseException, IOException {
final HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
urlConn.addRequestProperty("Accept", ACCEPT_HEADER);

Expand Down
Expand Up @@ -12,9 +12,6 @@
import java.nio.charset.StandardCharsets;

import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.RequestAcceptEncoding;
import org.apache.http.client.protocol.ResponseContentEncoding;
import org.apache.http.impl.client.CloseableHttpClient;
Expand All @@ -26,6 +23,7 @@
import org.junit.Test;

import com.github.jsonldjava.utils.JarCacheStorage;
import com.github.jsonldjava.utils.JsonUtils;

public class MinimalSchemaOrgRegressionTest {

Expand Down Expand Up @@ -59,10 +57,13 @@ private void verifyInputStream(InputStream directStream) throws IOException {
output.flush();
}
final String outputString = output.toString();
// System.out.println(outputString);
checkBasicConditions(outputString);
}

private void checkBasicConditions(final String outputString) {
// Test for some basic conditions without including the JSON/JSON-LD
// parsing code here
// assertTrue(outputString, outputString.endsWith("}"));
assertTrue(outputString, outputString.endsWith("}"));
assertFalse("Output string should not be empty: " + outputString.length(),
outputString.isEmpty());
assertTrue("Unexpected length: " + outputString.length(), outputString.length() > 100000);
Expand Down Expand Up @@ -90,30 +91,8 @@ public void testApacheHttpClient() throws Exception {
// use system defaults for proxy etc.
.useSystemProperties().build();

try {
final HttpUriRequest request = new HttpGet(url.toExternalForm());
// We prefer application/ld+json, but fallback to application/json
// or whatever is available
request.addHeader("Accept", ACCEPT_HEADER);

final CloseableHttpResponse response = httpClient.execute(request);
try {
final int status = response.getStatusLine().getStatusCode();
if (status != 200 && status != 203) {
throw new IOException("Can't retrieve " + url + ", status code: " + status);
}
final InputStream content = response.getEntity().getContent();
verifyInputStream(content);
} finally {
if (response != null) {
response.close();
}
}
} finally {
if (httpClient != null) {
httpClient.close();
}
}
Object content = JsonUtils.fromURL(url, httpClient);
checkBasicConditions(content.toString());
}

}

0 comments on commit b09cb81

Please sign in to comment.