Skip to content
Permalink
Browse files

follow up after review of NAS-2410

  • Loading branch information...
svcarlsen committed Sep 2, 2015
1 parent 56f5674 commit d339820ca04b4c64c2f0a2a223646fd51f979218
@@ -79,7 +79,6 @@
public void setRejectRelativeMatchingRegexList(List<Pattern> patterns) {
kp.put("rejectRelativeMatchingRegexList", patterns);
}


// finds whitespace-free strings in Javascript
// (areas between paired ' or " characters, possibly backslash-quoted
@@ -95,7 +94,6 @@ public void setRejectRelativeMatchingRegexList(List<Pattern> patterns) {
// begins and ends with either '/' or a word-char)
static final String STRING_URI_DETECTOR =
"(?:\\w|[\\.]{0,2}/)[\\S&&[^<>]]*(?:\\.|/)[\\S&&[^<>]]*(?:\\w|/)";


protected long numberOfCURIsHandled = 0;

@@ -109,12 +107,11 @@ public void setRejectRelativeMatchingRegexList(List<Pattern> patterns) {
};

/**
* @param name
* Constructor.
*/
public IcelandicExtractorJS() {
}


protected boolean shouldExtract(CrawlURI uri) {

// special-cases, for when we know our current JS extractor does poorly.
@@ -154,7 +151,6 @@ protected boolean shouldExtract(CrawlURI uri) {
return s.startsWith("script");
}


@Override
protected boolean innerExtract(CrawlURI curi) {
this.numberOfCURIsHandled++;
@@ -205,8 +201,7 @@ public long considerStrings(Extractor ext,
if (!falsePositive) {
falsePositive = shouldIgnorePossibleRelativeLink(string);
}



if (falsePositive) {
foundFalsePositives++;
} else {
@@ -250,19 +245,14 @@ private boolean shouldIgnorePossibleRelativeLink(String str) {
return true;
}
}

return false;
}



@Override
public String report() {
StringBuffer report = new StringBuffer();
report.append(super.report());
report.append(" False positives eliminated: " + foundFalsePositives + "\n");
return report.toString();
}


}
}

0 comments on commit d339820

Please sign in to comment.
You can’t perform that action at this time.