Skip to content

Commit

Permalink
Made metadata cache directory configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
csrster committed May 21, 2021
1 parent 9205ce7 commit 70bf4d5
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,13 @@ public class CommonSettings {
* The default number of jobs to show in the harvest status section, on one result page.
*/
public static String HARVEST_STATUS_DFT_PAGE_SIZE = "settings.common.webinterface.harvestStatus.defaultPageSize";


/**
* A directory where the webinterface can store cached crawl logs and indexes to minimise the number
* of hadoop jobs needed
*/
public static String METADATA_CACHE = "settings.common.webinterface.metadata_cache_dir";

/**
* <b>settings.common.topLevelDomains.tld</b>: <br>
* Extra valid top level domain, like .co.uk, .dk, .org., not part of current embedded public_suffix_list.dat file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ public static List<CDXRecord> getMetadataCDXRecordsForJob(long jobid) {
}

private static File getCDXCacheFile(long jobid) {
String cacheDir = "metadata_cache";
String cacheDir = Settings.get(CommonSettings.METADATA_CACHE);
String cdxcache = "cdxcache";
File cdxdir = new File(new File(cacheDir), cdxcache);
cdxdir.mkdirs();
Expand Down Expand Up @@ -365,7 +365,7 @@ public static File getCrawlLoglinesMatchingRegexp(long jobid, String regexp) {
}

private static File getCrawlLogCache(long jobid) {
String cacheDir = "metadata_cache";
String cacheDir = Settings.get(CommonSettings.METADATA_CACHE);
String crawllog_cache = "crawllog_cache";
File crawllog_dir = new File(new File(cacheDir), crawllog_cache);
crawllog_dir.mkdirs();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ National Library.
<webinterface>
<maxCrawlLogInBrowser>1000</maxCrawlLogInBrowser>
<runningjobsFilteringMethod>database</runningjobsFilteringMethod>
<metadata_cache_dir>metadata_cache</metadata_cache_dir>
</webinterface>
</harvester>
</settings>

0 comments on commit 70bf4d5

Please sign in to comment.