Skip to content

Commit

Permalink
Add autoclosing content
Browse files Browse the repository at this point in the history
  • Loading branch information
emmanuel-keller committed Oct 22, 2017
1 parent a951437 commit 9a24805
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/main/java/com/qwazr/crawler/web/WebCrawlThread.java
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ protected void runner()
registerScriptGlobalObject("driver", driver);
script(EventEnum.before_session, null);
if (crawlDefinition.preUrl != null && !crawlDefinition.preUrl.isEmpty())
driver.body(WebRequestDefinition.of(crawlDefinition.preUrl).build());
driver.body(WebRequestDefinition.of(crawlDefinition.preUrl).build()).close();
final Set<URI> crawledURIs = new HashSet<>();
if (crawlDefinition.urls != null && !crawlDefinition.urls.isEmpty())
crawlUrlMap(driver, crawledURIs, crawlDefinition.urls);
Expand Down
20 changes: 17 additions & 3 deletions src/main/java/com/qwazr/crawler/web/driver/QwazrDriver.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.logging.Logger;
import java.util.stream.Collectors;
Expand All @@ -65,6 +66,8 @@ public class QwazrDriver implements DriverInterface {

private final String userAgent;

private final ConcurrentHashMap.KeySetView<Body, Boolean> bodies;

QwazrDriver(final WebCrawlDefinition definition) throws IOException {
final OkHttpClient.Builder builder =
new OkHttpClient.Builder().followRedirects(false).followSslRedirects(false);
Expand All @@ -91,6 +94,7 @@ public class QwazrDriver implements DriverInterface {
builder.cookieJar(new Cookies(definition.cookies));

userAgent = StringUtils.isBlank(definition.userAgent) ? null : definition.userAgent;
bodies = ConcurrentHashMap.newKeySet();
client = builder.build();
}

Expand Down Expand Up @@ -124,18 +128,27 @@ public Head head(WebRequestDefinition request) throws IOException {
public Body body(WebRequestDefinition request) throws IOException {
final WebRequestDefinition.HttpMethod method =
request.method == null ? WebRequestDefinition.HttpMethod.GET : request.method;
final Body body;
switch (method) {
case GET:
return new GetImpl(request);
body = new GetImpl(request);
break;
case POST:
return new PostImpl(request);
body = new PostImpl(request);
break;
default:
throw new NotImplementedException("Method not supported: " + method);
}
bodies.add(body);
return body;
}

@Override
public void close() throws IOException {
final List<Body> toClose = new ArrayList<>(bodies);
for (Body body : toClose)
body.close();
assert bodies.isEmpty();
}

static Long buildContentLength(String header) {
Expand Down Expand Up @@ -264,6 +277,7 @@ public Content getContent() {
public void close() throws IOException {
if (content != null && !content.isClosed())
content.close();
bodies.remove(this);
}

public synchronized Document getHtmlDocument() throws IOException {
Expand Down Expand Up @@ -351,7 +365,7 @@ public void close() throws IOException {

@Override
public boolean isClosed() {
return !Files.exists(contentCache);
return contentCache == null || !Files.exists(contentCache);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class DriverInterfaceTest {
@Test
public void test() throws IOException {
final DriverInterface driver = DriverInterface.of(WebCrawlDefinition.of().build());
final WebRequestDefinition request = WebRequestDefinition.of("https://www.qwazr.com").build();
final WebRequestDefinition request = WebRequestDefinition.of("http://www.opensearchserver.com/").build();
Assert.assertNotNull(driver.body(request));
}
}

0 comments on commit 9a24805

Please sign in to comment.