/
CrawlLogDataCache.java
44 lines (38 loc) · 1.53 KB
/
CrawlLogDataCache.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/*
* #%L
* Netarchivesuite - harvester
* %%
* Copyright (C) 2005 - 2018 The Royal Danish Library,
* the National Library of France and the Austrian National Library.
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 2.1 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Lesser Public License for more details.
*
* You should have received a copy of the GNU General Lesser Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/lgpl-2.1.html>.
* #L%
*/
package dk.netarkivet.harvester.indexserver;
import java.util.regex.Pattern;
import dk.netarkivet.harvester.harvesting.metadata.MetadataFile;
/**
* This class implements the low-level cache for crawl log Lucene indexing. It will get the crawl logs for individual
* jobs as files.
*/
public class CrawlLogDataCache extends RawMetadataCache {
/**
* Create a new CrawlLogDataCache. For a given job ID, this will fetch and cache crawl.log files from metadata files
* (<ID>-metadata-[0-9]+.arc).
*/
public CrawlLogDataCache() {
super("crawllog", Pattern.compile(MetadataFile.CRAWL_LOG_PATTERN), Pattern.compile("text/plain"));
}
}