Skip to content

Commit

Permalink
Update htmlunit
Browse files Browse the repository at this point in the history
  • Loading branch information
emmanuel-keller committed May 2, 2017
1 parent dae4aba commit 80b4ff7
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 59 deletions.
44 changes: 40 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
</parent>

<properties>
<selenium.version>3.3.1</selenium.version>
<htmlunit.version>2.25</htmlunit.version>
<selenium.version>3.4.0</selenium.version>
<htmlunit.version>2.26</htmlunit.version>
<jetty.version>9.4.3.v20170317</jetty.version>
</properties>

<scm>
Expand All @@ -26,6 +27,41 @@
<url>git@github.com:qwazr/crawlers.git</url>
</scm>

<dependencyManagement>
<dependencies>
<dependency>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
<version>1.4.01</version>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-support</artifactId>
<version>${selenium.version}</version>
</dependency>
<dependency>
<groupId>org.roaringbitmap</groupId>
<artifactId>RoaringBitmap</artifactId>
<version>0.6.43</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.10</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-io</artifactId>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
<version>${jetty.version}</version>
</dependency>
</dependencies>
</dependencyManagement>

<dependencies>
<dependency>
<groupId>com.qwazr</groupId>
Expand Down Expand Up @@ -94,7 +130,7 @@
<dependency>
<groupId>com.codeborne</groupId>
<artifactId>phantomjsdriver</artifactId>
<version>1.4.1</version>
<version>1.4.2</version>
<exclusions>
<exclusion>
<artifactId>selenium-api</artifactId>
Expand Down Expand Up @@ -131,7 +167,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>${shade.version}</version>
<version>3.0.0</version>
<executions>
<execution>
<phase>package</phase>
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/qwazr/crawler/web/WebCrawlerServer.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ private WebCrawlerServer(final ServerConfiguration configuration) throws IOExcep
.registerProtocolListener(builder)
.registerWebService(builder);
final LibraryManager libraryManager =
new LibraryManager(null, configuration.dataDirectory, configuration.getEtcFiles()).registerWebService(
new LibraryManager(configuration.dataDirectory, configuration.getEtcFiles()).registerWebService(
builder);
final ScriptManager scriptManager = new ScriptManager(executorService, clusterManager, libraryManager,
configuration.dataDirectory).registerWebService(builder);
configuration.dataDirectory).registerWebService(builder);
final WebCrawlerManager webCrawlerManager =
new WebCrawlerManager(clusterManager, scriptManager, executorService).registerWebService(builder);
serviceBuilder = new WebCrawlerServiceBuilder(clusterManager, webCrawlerManager);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,12 @@ interface SaveBinaryFile {
void saveBinaryFile(File file) throws IOException;
}

interface SetAttribute {

void setAttribute(WebElement element, String name, String value);
}

interface WebRequest {

void request(WebRequestDefinition request);
}

interface All extends ResponseHeader, SafeText, InnerHtml, SaveBinaryFile, SetAttribute, WebRequest {
interface All extends ResponseHeader, SafeText, InnerHtml, SaveBinaryFile, WebRequest {

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -343,10 +343,7 @@ public List<WebElement> findElementsByCssSelector(String cssSelector) {
}

public void setAttribute(WebElement element, String name, String value) {
if (driver instanceof AdditionalCapabilities.SetAttribute)
((AdditionalCapabilities.SetAttribute) driver).setAttribute(element, name, value);
else
this.executeScript("arguments[0].setAttribute(arguments[1], arguments[2])", false, element, name, value);
this.executeScript("arguments[0].setAttribute(arguments[1], arguments[2])", false, element, name, value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.openqa.selenium.Proxy;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriverService;
import org.openqa.selenium.remote.BrowserType;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;

Expand Down Expand Up @@ -102,8 +103,8 @@ public BrowserDriver build() throws ReflectiveOperationException, SecurityExcept
// Setup the language
if (crawlDefinition.browser_language != null) {
capabilities = checkCapabilities(capabilities);
capabilities
.setCapability(AdditionalCapabilities.QWAZR_BROWSER_LANGUAGE, crawlDefinition.browser_language);
capabilities.setCapability(AdditionalCapabilities.QWAZR_BROWSER_LANGUAGE,
crawlDefinition.browser_language);
if (browserType == BrowserDriverEnum.phantomjs)
capabilities.setCapability(
PhantomJSDriverService.PHANTOMJS_PAGE_CUSTOMHEADERS_PREFIX + "Accept-Language",
Expand All @@ -115,16 +116,16 @@ public BrowserDriver build() throws ReflectiveOperationException, SecurityExcept
capabilities = checkCapabilities(capabilities);
if (browserType == BrowserDriverEnum.phantomjs)
capabilities.setCapability(PhantomJSDriverService.PHANTOMJS_PAGE_SETTINGS_PREFIX + "loadImages",
crawlDefinition.download_images);
crawlDefinition.download_images);
}

// Web security
if (crawlDefinition.web_security != null) {
capabilities = checkCapabilities(capabilities);
if (browserType == BrowserDriverEnum.phantomjs)
capabilities
.setCapability(PhantomJSDriverService.PHANTOMJS_PAGE_SETTINGS_PREFIX + "webSecurityEnabled",
crawlDefinition.web_security);
capabilities.setCapability(
PhantomJSDriverService.PHANTOMJS_PAGE_SETTINGS_PREFIX + "webSecurityEnabled",
crawlDefinition.web_security);
}

// Choose a browser name
Expand All @@ -143,9 +144,9 @@ public BrowserDriver build() throws ReflectiveOperationException, SecurityExcept
capabilities = checkCapabilities(capabilities);
capabilities.setJavascriptEnabled(crawlDefinition.javascript_enabled);
if (browserType == BrowserDriverEnum.phantomjs)
capabilities
.setCapability(PhantomJSDriverService.PHANTOMJS_PAGE_SETTINGS_PREFIX + "javascriptEnabled",
crawlDefinition.javascript_enabled);
capabilities.setCapability(
PhantomJSDriverService.PHANTOMJS_PAGE_SETTINGS_PREFIX + "javascriptEnabled",
crawlDefinition.javascript_enabled);

}

Expand All @@ -154,15 +155,22 @@ public BrowserDriver build() throws ReflectiveOperationException, SecurityExcept

if (browserType == BrowserDriverEnum.phantomjs) {
capabilities = checkCapabilities(capabilities);
capabilities.setCapability(PhantomJSDriverService.PHANTOMJS_CLI_ARGS,
new String[] { "--webdriver-loglevel=none", "--ignore-ssl-errors=true", "--ssl-protocol=any" });
capabilities.setCapability(PhantomJSDriverService.PHANTOMJS_CLI_ARGS, new String[] {
"--webdriver-loglevel=none",
"--ignore-ssl-errors=true",
"--ssl-protocol=any" });
}

if (browserType == BrowserDriverEnum.html_unit) {
capabilities = checkCapabilities(capabilities);
capabilities.setBrowserName(BrowserType.HTMLUNIT);
}

final WebDriver driver = browserType.getNewInstance(capabilities);
try {
final BrowserDriver browserDriver = new BrowserDriver(browserType, driver, proxyDef);
browserDriver.setTimeouts(crawlDefinition.implicitly_wait, crawlDefinition.page_load_timeout,
crawlDefinition.script_timeout);
crawlDefinition.script_timeout);

if (crawlDefinition.cookies != null)
for (Map.Entry<String, String> cookie : crawlDefinition.cookies.entrySet())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,13 @@

public class HtmlUnitBrowserDriver extends HtmlUnitDriverWebClient
implements AdditionalCapabilities.ResponseHeader, AdditionalCapabilities.SafeText,
AdditionalCapabilities.InnerHtml, AdditionalCapabilities.SaveBinaryFile, AdditionalCapabilities.SetAttribute,
AdditionalCapabilities.WebRequest {
AdditionalCapabilities.InnerHtml, AdditionalCapabilities.SaveBinaryFile, AdditionalCapabilities.WebRequest {

protected static final Logger logger = LoggerFactory.getLogger(HtmlUnitBrowserDriver.class);

public HtmlUnitBrowserDriver() {
}

public HtmlUnitBrowserDriver(Capabilities capabilities) {
super(capabilities);
}
Expand Down Expand Up @@ -156,8 +155,4 @@ public String getTextSafe(WebElement webElement) {
}
}

@Override
public void setAttribute(WebElement element, String name, String value) {
((HtmlUnitDriverWebElement) element).getDomElement().setAttribute(name, value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,12 @@
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.WebWindow;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.util.NameValuePair;
import com.qwazr.crawler.web.WebRequestDefinition;
import org.openqa.selenium.Capabilities;
import org.openqa.selenium.TimeoutException;
import org.openqa.selenium.WebDriverException;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.openqa.selenium.htmlunit.HtmlUnitWebElement;

import java.net.ConnectException;
import java.net.MalformedURLException;
Expand Down Expand Up @@ -79,25 +76,6 @@ public WebClient getWebClient() {
return super.getWebClient();
}

WebElement convertNode(DomElement element) {
return newHtmlUnitWebElement(element);
}

protected WebElement newHtmlUnitWebElement(DomElement element) {
return new HtmlUnitDriverWebElement(this, element);
}

public class HtmlUnitDriverWebElement extends HtmlUnitWebElement {

public HtmlUnitDriverWebElement(HtmlUnitDriver parent, DomElement element) {
super(parent, element);
}

DomElement getDomElement() {
return super.getElement();
}
}

public void request(final WebRequestDefinition webRequestDef) {
final WebRequest request;
try {
Expand All @@ -111,7 +89,7 @@ public void request(final WebRequestDefinition webRequestDef) {
window.setEnclosedPage(page);
} catch (UnknownHostException e) {
window.setEnclosedPage(new UnexpectedPage(new StringWebResponse("Unknown host", request.getUrl()),
getCurrentWindow().getTopWindow()));
getCurrentWindow().getTopWindow()));
} catch (ConnectException e) {
// This might be expected
} catch (SocketTimeoutException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public void test400CrawlEvent() throws InterruptedException {
final WebCrawlDefinition webCrawl = getNewWebCrawl();
webCrawl.scripts = new HashMap<>();
webCrawl.scripts.put(WebCrawlDefinition.EventEnum.before_crawl,
new WebCrawlDefinition.Script(BeforeCrawl.class.getName()));
new WebCrawlDefinition.Script(BeforeCrawl.class.getName()));
remote.runSession(sessionName, webCrawl);
crawlWait(sessionName, 3);
Assert.assertEquals(4, BeforeCrawl.count.get());
Expand Down

0 comments on commit 80b4ff7

Please sign in to comment.