Skip to content
This repository has been archived by the owner on Mar 9, 2021. It is now read-only.

Commit

Permalink
Merge pull request #44 from kinow/issue-43
Browse files Browse the repository at this point in the history
Fix issue #10 allow users to set a proxy
  • Loading branch information
karussell committed Feb 4, 2015
2 parents 665d54a + 6f6fadb commit a5fe61e
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 2 deletions.
19 changes: 17 additions & 2 deletions src/main/java/de/jetwick/snacktory/HtmlFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.zip.GZIPInputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -79,6 +80,7 @@ public static void main(String[] args) throws Exception {
private String accept = "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
private String charset = "UTF-8";
private SCache cache;
private Proxy proxy = null;
private AtomicInteger cacheCounter = new AtomicInteger(0);
private int maxTextLength = -1;
private ArticleTextExtractor extractor = new ArticleTextExtractor();
Expand Down Expand Up @@ -200,6 +202,18 @@ public String getCharset() {
return charset;
}

public void setProxy(Proxy proxy) {
this.proxy = proxy;
}

public Proxy getProxy() {
return (proxy != null ? proxy : Proxy.NO_PROXY);
}

public boolean isProxySet() {
return getProxy() != null;
}

public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
String originalUrl = url;
url = SHelper.removeHashbang(url);
Expand Down Expand Up @@ -363,7 +377,7 @@ public String getResolvedUrl(String urlAsString, int timeout) {
return urlAsString;

} catch (Exception ex) {
logger.warn("getResolvedUrl:" + urlAsString + " Error:" + ex.getMessage());
logger.warn("getResolvedUrl:" + urlAsString + " Error:" + ex.getMessage(), ex);
return "";
} finally {
if (logger.isDebugEnabled())
Expand Down Expand Up @@ -395,7 +409,8 @@ protected HttpURLConnection createUrlConnection(String urlAsStr, int timeout,
boolean includeSomeGooseOptions) throws MalformedURLException, IOException {
URL url = new URL(urlAsStr);
//using proxy may increase latency
HttpURLConnection hConn = (HttpURLConnection) url.openConnection(Proxy.NO_PROXY);
Proxy proxy = getProxy();
HttpURLConnection hConn = (HttpURLConnection) url.openConnection(proxy);
hConn.setRequestProperty("User-Agent", userAgent);
hConn.setRequestProperty("Accept", accept);

Expand Down
49 changes: 49 additions & 0 deletions src/test/java/de/jetwick/snacktory/HtmlFetcherProxyTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright 2015 Peter Karich
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.snacktory;

import static org.junit.Assert.assertEquals;

import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.Proxy.Type;

import org.junit.Test;

/**
* Tests for HtmlFetcher proxy feature.
*/
public class HtmlFetcherProxyTest {

public HtmlFetcherProxyTest() {
}

@Test
public void testSocksProxy() {
HtmlFetcher fetcher = new HtmlFetcher();
Proxy proxy = new Proxy(Type.valueOf("SOCKS"), new InetSocketAddress("127.0.0.1", 3128));
fetcher.setProxy(proxy);

assertEquals("Invalid SOCKS proxy type name", "SOCKS", fetcher.getProxy().type().name());
}

@Test
public void testNoProxy() {
HtmlFetcher fetcher = new HtmlFetcher();
assertEquals("HtmlFetch proxy server was not a NO_PROXY proxy", Proxy.NO_PROXY, fetcher.getProxy());
}

}

0 comments on commit a5fe61e

Please sign in to comment.