Skip to content

Commit

Permalink
Fixed regex checking of dedup enabled.
Browse files Browse the repository at this point in the history
  • Loading branch information
csrster committed Nov 25, 2016
1 parent 7b942ec commit 9a3c503
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 88 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.List;
import java.util.regex.Pattern;

import javax.servlet.jsp.JspWriter;

Expand Down Expand Up @@ -79,8 +80,9 @@ public class H3HeritrixTemplate extends HeritrixTemplate implements Serializable
public static final String MAX_TIME_SECONDS_PLACEHOLDER = "%{MAX_TIME_SECONDS_PLACEHOLDER}";
public static final String CRAWLERTRAPS_PLACEHOLDER = "%{CRAWLERTRAPS_PLACEHOLDER}";

public static final String DEDUPLICATION_BEAN_REFERENCE_PATTERN = ".*ref.*bean.*DeDuplicator.*/>";
public static final String DEDUPLICATION_BEAN_PATTERN = ".*bean.*id.*DeDuplicator.*";
public static final Pattern DEDUPLICATION_BEAN_REFERENCE_PATTERN = Pattern.compile(".*ref.*bean.*DeDuplicator.*", Pattern.DOTALL);

public static final Pattern DEDUPLICATION_BEAN_PATTERN = Pattern.compile(".*bean.*id.*DeDuplicator.*", Pattern.DOTALL);
public static final String DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER
= "%{DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER}";

Expand Down Expand Up @@ -207,8 +209,8 @@ public boolean isValid() {
// - a DeDuplicator reference bean is present in the template
public boolean IsDeduplicationEnabled() {
return (template.contains(DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER)
&& template.matches(DEDUPLICATION_BEAN_PATTERN)
&& template.matches(DEDUPLICATION_BEAN_REFERENCE_PATTERN));
&& DEDUPLICATION_BEAN_PATTERN.matcher(template).matches()
&& DEDUPLICATION_BEAN_REFERENCE_PATTERN.matcher(template).matches());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,40 @@
import static org.junit.Assert.*;

import java.io.File;
import java.io.IOException;
import java.net.URL;

import org.junit.Test;

import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IllegalState;
import dk.netarkivet.common.utils.FileUtils;

public class H3HeritrixTemplateTester {

String basicArchiveFilePrefix = "netarkivet-1-1";
String correctTemplateName = "crawler-beans_with_placeholders.cxml";
String incorrectTemplateName = "crawler-beans_no_placeholders.cxml";
String incorrectSignature = "crawler-beans_without_h3_signature.cxml";


@Test
public void testIsDeduplicationEnabled() throws IOException {
URL url = this.getClass().getClassLoader().getResource("heritrix3");
File basedir = new File(url.getFile());
File beansWithPlaceholders = new File(basedir, correctTemplateName);
H3HeritrixTemplate template = new H3HeritrixTemplate(100L, FileUtils.readFile(beansWithPlaceholders));
assertTrue("Should be dedup enabled.", template.IsDeduplicationEnabled());
}

@Test
public void testIsDeduplicationEnabledNot() throws IOException {
URL url = this.getClass().getClassLoader().getResource("heritrix3");
File basedir = new File(url.getFile());
File beansWithPlaceholders = new File(basedir, incorrectTemplateName);
H3HeritrixTemplate template = new H3HeritrixTemplate(100L, FileUtils.readFile(beansWithPlaceholders));
assertFalse("Should not be dedup enabled", template.IsDeduplicationEnabled());
}

@Test
public void testArchiveFilePrefixSetter() {
URL url = this.getClass().getClassLoader().getResource("heritrix3");
Expand Down
Loading

0 comments on commit 9a3c503

Please sign in to comment.