Skip to content
Permalink
Browse files

ending of the migration of metadata prefix naming for h1 and h3

  • Loading branch information...
scheylord committed Jul 13, 2016
1 parent c2b3e63 commit 16fa8bb6573ddfa6ff34e923b74330e33b8b43db
@@ -142,7 +142,7 @@ private void doGetMetadata(Request request, Response response) {
try {
Long id = Long.parseLong(idString);
FileBatchJob job = new GetFileBatchJob();
job.processOnlyFilesMatching(id + Constants.METADATA_FILE_PATTERN_SUFFIX);
job.processOnlyFilesMatching(".*" + id + ".*" + Constants.METADATA_FILE_PATTERN_SUFFIX);
BatchStatus b = client.batch(job, Settings.get(CommonSettings.USE_REPLICA_ID));
if (b.getNoOfFilesProcessed() > b.getFilesFailed().size() && b.hasResultFile()) {
b.appendResults(response.getOutputStream());
@@ -78,7 +78,7 @@ private Reporting() {
ArgumentNotValid.checkPositive(jobid, "jobid");
FileBatchJob fileListJob = new FileListJob();
List<String> acceptedPatterns = new ArrayList<String>();
acceptedPatterns.add(jobid + metadatafile_suffix);
acceptedPatterns.add(".*" + jobid + ".*" + metadatafile_suffix);
acceptedPatterns.add(harvestprefix + archivefile_suffix);
fileListJob.processOnlyFilesMatching(acceptedPatterns);

@@ -117,7 +117,7 @@ public ArchiveBatchFilter getFilter() {
return ArchiveBatchFilter.EXCLUDE_NON_WARCINFO_RECORDS;
}
};
cdxJob.processOnlyFilesMatching(jobid + metadatafile_suffix);
cdxJob.processOnlyFilesMatching(".*"+jobid + ".*" + metadatafile_suffix);

File f;
try {
@@ -159,7 +159,7 @@ public static File getCrawlLogForDomainInJob(String domain, int jobid) {
ArgumentNotValid.checkPositive(jobid, "jobid");
ArgumentNotValid.checkNotNullOrEmpty(domain, "String domain");
FileBatchJob urlsForDomainBatchJob = new HarvestedUrlsForDomainBatchJob(domain);
urlsForDomainBatchJob.processOnlyFilesMatching(jobid + metadatafile_suffix);
urlsForDomainBatchJob.processOnlyFilesMatching(".*"+jobid + ".*" + metadatafile_suffix);
return getResultFile(urlsForDomainBatchJob);
}

@@ -200,7 +200,7 @@ public static File getCrawlLoglinesMatchingRegexp(int jobid, String regexp) {
ArgumentNotValid.checkPositive(jobid, "jobid");
ArgumentNotValid.checkNotNullOrEmpty(regexp, "String regexp");
FileBatchJob crawlLogBatchJob = new CrawlLogLinesMatchingRegexp(regexp);
crawlLogBatchJob.processOnlyFilesMatching(jobid + metadatafile_suffix);
crawlLogBatchJob.processOnlyFilesMatching(".*"+jobid + ".*" + metadatafile_suffix);
return getResultFile(crawlLogBatchJob);
}

@@ -179,7 +179,7 @@ private File getIndexFile(Set<Long> jobIDs) {
private void retrieveIndex(Set<Long> jobIDs, OutputStream out) {
List<String> metadataFiles = new ArrayList<String>();
for (Long jobID : jobIDs) {
metadataFiles.add(jobID + Constants.METADATA_FILE_PATTERN_SUFFIX);
metadataFiles.add(".*" + jobID + ".*" + Constants.METADATA_FILE_PATTERN_SUFFIX);
}
ARCBatchJob job = new CDXCacheBatchJob();
job.processOnlyFilesMatching(metadataFiles);
@@ -130,7 +130,7 @@ public void testDocumentHarvestOrdinaryCase() throws IOException {
HarvestDocumentation.documentHarvest(inf);

// Verify that the new file exists.
MetadataFileWriter.getMetadataArchiveFileName(Heritrix1ControllerTestInfo.ARC_JOB_ID);
MetadataFileWriter.getMetadataArchiveFileName(Heritrix1ControllerTestInfo.ARC_JOB_ID, Heritrix1ControllerTestInfo.ARC_HARVEST_ID);

List<File> fs = inf.getMetadataArcFiles();
assertEquals("Should have created exactly one file ", 1, fs.size());
@@ -209,15 +209,15 @@ public void testDocumentHarvestExceptionalCases() {
List<File> metadataFiles = OkIngestables.getMetadataArcFiles();
File metadataDir = new File(WORKING_DIR, "metadata");
File target1 = new File(metadataDir, MetadataFileWriter.getMetadataArchiveFileName(Long
.toString(Heritrix1ControllerTestInfo.JOB_ID)));
.toString(Heritrix1ControllerTestInfo.JOB_ID), Heritrix1ControllerTestInfo.HARVEST_ID));
assertEquals("Should generate exactly one metadata file", 1, metadataFiles.size());
assertTrue("Should generate file " + target1 + " but found only " + metadataFiles.toString(),
metadataFiles.contains(target1));
}

@Test (expected = ArgumentNotValid.class)
public void testGetMetadataARCNullFileName() {
MetadataFileWriter.getMetadataArchiveFileName(null);
MetadataFileWriter.getMetadataArchiveFileName(null, null);
}

/**
@@ -227,7 +227,8 @@ public void testGetMetadataARCNullFileName() {
@Test
public void testGetMetadataARCFileName() {
String job = "7";
String fn = MetadataFileWriter.getMetadataArchiveFileName(job);
Long harvestId = 43L;
String fn = MetadataFileWriter.getMetadataArchiveFileName(job, harvestId);
assertTrue("File name should end on '-1.arc' - was " + fn, fn.endsWith("-1.arc"));
assertTrue("File name should contain jobID - was " + fn, fn.contains(job));
assertTrue("File name should contain the string 'metadata' - was " + fn, fn.contains("metadata"));
@@ -434,7 +435,7 @@ public void testDocumentHarvestBug722() throws Exception {
FileUtils.remove(new File(arcsDir, "42-117-20051212141241-00001-sb-test-har-001.statsbiblioteket.dk.arc"));

String metadataDirPath = new File(WORKING_DIR, IngestableFiles.METADATA_SUB_DIR).getAbsolutePath();
String filename = MetadataFileWriter.getMetadataArchiveFileName("" + Heritrix1ControllerTestInfo.JOB_ID);
String filename = MetadataFileWriter.getMetadataArchiveFileName("" + Heritrix1ControllerTestInfo.JOB_ID, Heritrix1ControllerTestInfo.HARVEST_ID);

lr.assertLogContains("Should have issued warning about existing metadata-arcfile", "The metadata-file '"
+ metadataDirPath + "/" + filename + "' already exists, so we don't make another one!");

0 comments on commit 16fa8bb

Please sign in to comment.
You can’t perform that action at this time.