Skip to content

Commit

Permalink
feat: configurable source reload delay from env var
Browse files Browse the repository at this point in the history
  • Loading branch information
Dainius Jocas committed May 29, 2019
1 parent 162410e commit 5435ac1
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,32 @@

import com.digitalpebble.stormcrawler.bolt.FeedParserBolt;
import com.digitalpebble.stormcrawler.bolt.SiteMapParserBolt;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.TimeUnit;

public class CrawlerConstants {
private static final Logger LOG = LoggerFactory.getLogger(CrawlerConstants.class);

private static long getReloadDelayInSeconds() {
long reloadDelay = 300;
String envVar = System.getenv("DEFAULT_SOURCE_RELOAD_DELAY");
if (envVar != null) {
try {
reloadDelay = Long.parseLong(envVar);
} catch (NumberFormatException e) {
LOG.warn("Environment variable 'DEFAULT_SOURCE_RELOAD_DELAY' is not a number '{}'", envVar);
}
}
return reloadDelay;
}

public static final long MIN_FETCH_DELAY = TimeUnit.MINUTES.toMillis(1);
public static final long DEFAULT_URL_FETCH_DELAY = TimeUnit.MINUTES.toMillis(10);
public static final long DEFAULT_FEED_FETCH_DELAY = TimeUnit.MINUTES.toMillis(10);
public static final long DEFAULT_SITEMAP_FETCH_DELAY = TimeUnit.MINUTES.toMillis(30);
public static final long DEFAULT_SOURCE_RELOAD_DELAY = TimeUnit.MINUTES.toMillis(5);
public static final long DEFAULT_SOURCE_RELOAD_DELAY = TimeUnit.SECONDS.toMillis(getReloadDelayInSeconds());

public static final String META_IS_SITEMAP = SiteMapParserBolt.isSitemapKey;
public static final String META_IS_FEED = FeedParserBolt.isFeedKey;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

public class HttpSourceConfiguration {

private static final Logger LOG = LoggerFactory.getLogger(UrlGeneratorSpout.class);
private static final Logger LOG = LoggerFactory.getLogger(HttpSourceConfiguration.class);

private final List<HttpSource> sources;
private final List<String> sourceUrls;
Expand Down Expand Up @@ -83,7 +83,8 @@ public static HttpSourceConfiguration reload(HttpSourceConfiguration current, Li
}

public static boolean needsReload() {
LOG.info("Checking reloading timeout. Remaining milliseconds: {}", lastReloadMillis + CrawlerConstants.DEFAULT_SOURCE_RELOAD_DELAY - currentTimeMillis());
LOG.info("Checking reloading timeout. Remaining milliseconds: {}",
lastReloadMillis + CrawlerConstants.DEFAULT_SOURCE_RELOAD_DELAY - currentTimeMillis());
return lastReloadMillis + CrawlerConstants.DEFAULT_SOURCE_RELOAD_DELAY < currentTimeMillis();
}

Expand Down
2 changes: 2 additions & 0 deletions docker-compose.run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ services:
ports: ["8081:8081"]
crawler:
image: registry.gitlab.com/tokenmill/crawling-framework/crawler:latest
environment:
DEFAULT_SOURCE_RELOAD_DELAY: 10

0 comments on commit 5435ac1

Please sign in to comment.