Skip to content

Commit

Permalink
Fix NAS-2648 - support for crawllogs with old duplicate annotations (…
Browse files Browse the repository at this point in the history
…also NARK-1338)
  • Loading branch information
svcarlsen authored and Knud Åge Hansen committed Oct 20, 2017
1 parent 72d378b commit eb12cd4
Show file tree
Hide file tree
Showing 41 changed files with 49 additions and 44 deletions.
2 changes: 1 addition & 1 deletion archive/archive-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>archive</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>archive-core</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion archive/archive-test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>archive</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>archive-test</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion archive/bitpreservation-gui/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>archive</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>bitpreservation-gui</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion archive/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>netarchivesuite</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>archive</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion build-tools/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>netarchivesuite</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>build-tools</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion common/common-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>common</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>common-core</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion common/common-test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>common</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>common-test</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion common/netarchivesuite-test-utils/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>common</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>netarchivesuite</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>common</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion deploy/deploy-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>deploy</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>deploy-core</artifactId>
Expand Down
4 changes: 2 additions & 2 deletions deploy/deploy-test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>deploy</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>deploy-test</artifactId>
Expand All @@ -31,7 +31,7 @@
<dependency>
<groupId>org.netarchivesuite</groupId>
<artifactId>common-test</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
<scope>test</scope>
</dependency>

Expand Down
2 changes: 1 addition & 1 deletion deploy/distribution/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>deploy</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>distribution</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion deploy/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>netarchivesuite</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>deploy</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/harvest-scheduler/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>harvester</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>harvest-scheduler</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/harvestchannel-gui/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>harvester</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>harvestchannel-gui</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/harvestdefinition-gui/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>harvester</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>harvestdefinition-gui</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/harvester-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>harvester</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>harvester-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ private void migrateDuplicates(Long id, String replicaUsed, String specifiedPatt
log.info("Doing migration for {}", id);
try {
final List<String> migrationLines = org.apache.commons.io.FileUtils.readLines(migration);
log.info("{} migration records found.", migrationLines.size());
log.info("{} migration records found for job {}", migrationLines.size(), id);
for (String line : migrationLines) {
String[] splitLine = StringUtils.split(line);
lookup.put(new Pair<String, Long>(splitLine[0], Long.parseLong(splitLine[1])),
Expand All @@ -223,13 +223,17 @@ private void migrateDuplicates(Long id, String replicaUsed, String specifiedPatt
}
originalBatchJob.copyResults(crawllog);
try {
int matches = 0;
int errors = 0;
for (String line : org.apache.commons.io.FileUtils.readLines(crawllog)) {
Matcher m = duplicatePattern.matcher(line);
if (m.matches()) {
matches++;
Long newOffset = lookup.get(new Pair<String, Long>(m.group(1), Long.parseLong(m.group(2))));
if (newOffset == null) {
log.warn("Could not migrate duplicate in " + line);
FileUtils.appendToFile(cacheFileName, line);
errors++;
} else {
String newLine = line.substring(0, m.start(2)) + newOffset + line.substring(m.end(2));
newLine = newLine.replace(m.group(1), m.group(1) + ".gz");
Expand All @@ -239,6 +243,7 @@ private void migrateDuplicates(Long id, String replicaUsed, String specifiedPatt
FileUtils.appendToFile(cacheFileName, line);
}
}
log.info("Found and migrated {} duplicate lines for job {} with {} errors", matches, id, errors);
} catch (IOException e) {
throw new IOFailure("Could not read " + crawllog.getAbsolutePath());
} finally {
Expand Down
2 changes: 1 addition & 1 deletion harvester/harvester-test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>harvester</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>harvester-test</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/heritrix1/heritrix1-controller/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>heritrix1</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>heritrix1-controller</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/heritrix1/heritrix1-extensions/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>heritrix1</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>heritrix1-extensions</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/heritrix1/heritrix1-frontier/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>heritrix1</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>heritrix1-frontier</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/heritrix1/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>harvester</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion harvester/heritrix3/heritrix3-bundler/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>heritrix3</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>heritrix3-bundler</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/heritrix3/heritrix3-controller/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>heritrix3</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>heritrix3-controller</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/heritrix3/heritrix3-extensions/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>heritrix3</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>heritrix3-extensions</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/heritrix3/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>harvester</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion harvester/history-gui/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>harvester</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>history-gui</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>netarchivesuite</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>harvester</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion harvester/qa-gui/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>netarchivesuite</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
<relativePath>../../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion integration-test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>netarchivesuite</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>integration-test</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion integration-test/system-test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>integration-test</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>system-test</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion monitor/monitor-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>monitor</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>monitor-core</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion monitor/monitor-test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>monitor</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>monitor-test</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion monitor/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>netarchivesuite</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>monitor</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion monitor/status-gui/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>monitor</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>status-gui</artifactId>
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

<groupId>org.netarchivesuite</groupId>
<artifactId>netarchivesuite</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
<packaging>pom</packaging>

<name>NetarchiveSuite</name>
Expand Down Expand Up @@ -852,7 +852,7 @@
<dependency>
<groupId>org.netarchivesuite</groupId>
<artifactId>build-tools</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</dependency>
</dependencies>
</plugin>
Expand Down
2 changes: 1 addition & 1 deletion wayback/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>netarchivesuite</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>wayback</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion wayback/wayback-indexer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.netarchivesuite</groupId>
<artifactId>wayback</artifactId>
<version>5.4-SNAPSHOT</version>
<version>5.3.2-RC1</version>
</parent>

<artifactId>wayback-indexer</artifactId>
Expand Down
Loading

0 comments on commit eb12cd4

Please sign in to comment.