NAS-2463: Fix of unit test

netarchivesuite · Dec 19, 2017 · 38e4903 · 38e4903
1 parent b581dd3
commit 38e4903
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 20 deletions.
diff --git a/...ester-core/src/main/java/dk/netarkivet/harvester/webinterface/servlet/NASEnvironment.java b/...ester-core/src/main/java/dk/netarkivet/harvester/webinterface/servlet/NASEnvironment.java
@@ -248,7 +248,9 @@ public Stream<String> getCrawledUrls(long jobId, Heritrix3JobMonitor h3Job) {
 
         try {
             Stream<String> attemptedHarvestedUrlsFromCrawllog = Files.lines(Paths.get(crawlLogPath),
-                    Charset.forName("UTF-8")).filter(line -> urlInLineIsAttemptedHarvested(line));
+                    Charset.forName("UTF-8"))
+                    .filter(line -> urlInLineIsAttemptedHarvested(line))
+                    .map(line -> line.split("\\s+")[3]);
 
             return attemptedHarvestedUrlsFromCrawllog;
         } catch (java.io.IOException e) {
@@ -290,12 +292,12 @@ private String normalizeDomainUrl(String url) {
      * @param domainName The domain
      * @return whether the given job harvests given domain
      */
-    public boolean jobHarvestsDomain(long jobId, String domainName) {
+    public boolean jobHarvestsDomain(long jobId, String domainName, Heritrix3JobMonitor h3Job) {
         // Normalize search URL
         String searchedDomain = normalizeDomainUrl(domainName);
 
         // Return whether or not the crawled URLs contain the searched URL
-        return getCrawledUrls(jobId, null)
+        return getCrawledUrls(jobId, h3Job)
                 .map(url -> normalizeDomainUrl(url))
                 .anyMatch(url -> searchedDomain.equalsIgnoreCase(url));
     }

diff --git a/...test/src/test/java/dk/netarkivet/harvester/webinterface/servlet/NASEnvironmentTester.java b/...test/src/test/java/dk/netarkivet/harvester/webinterface/servlet/NASEnvironmentTester.java
@@ -42,9 +42,9 @@ public void testGetCrawledUrls() throws Exception {
 
         // Create a mock crawllog file
         String mockCrawllogContent
-                = "2005-05-06T11:47:26.550Z     1         53 dns:www.kb.dk P http://www.kb.dk/ text/dns #002 20050506114726441+2 - -\n"
+                = "2005-05-06T11:47:26.550Z     1         53 dns:www.sb.dk P http://www.sb.dk/ text/dns #002 20050506114726441+2 - -\n"
                 + "2005-05-06T11:47:28.464Z   404        278 http://www.netarkivet.dk/robots.txt P http://www.netarkivet.dk/ text/html #028 20050506114728458+5 NYN2HPNQGIPJTPMGAV4QPBUCVJVNMM54 -\n"
-                + "2005-05-06T11:47:34.753Z -9998          - https://rex.kb.dk/F L http://www.kb.dk/ no-type #030 - - 3t\n"
+                + "2005-05-06T11:47:34.753Z -9998          - https://rex.qb.dk/F L http://www.qb.dk/ no-type #030 - - 3t\n"
                 + "2005-05-06T11:47:30.544Z   200      13750 http://www.kb.dk/ - - text/html #001 20050506114730466+32 U4X3Z5EGCNUYTMIXST6BJXGA5SBKTEAJ 3t\n";
 
         File tempFile = File.createTempFile("NASEnvironmentTest-mock-crawllog-", ".tmp");
@@ -68,20 +68,10 @@ public void testGetCrawledUrls() throws Exception {
         Heritrix3JobMonitor h3Job = new Heritrix3JobMonitor();
         h3Job.setCrawlLogFilePath(crawlLogFilePath);
 
-        Stream<String> crawledUrls = environment.getCrawledUrls(1, h3Job);
-        List<String> crawled = crawledUrls.collect(Collectors.toList());
-
-        // Check whether output corresponds to the input crawllog-mock-file
-        if (crawled.size() != 2) {
-            fail("Wrong amount (" + crawled.size() + ") of URLs extracted from crawllog!");
-            return;
-        }
-
-        if (!crawled.get(0).equalsIgnoreCase("http://www.netarkivet.dk/robots.txt")
-                || !crawled.get(1).equalsIgnoreCase("http://www.kb.dk/")) {
-            fail("URL(s) extracted from crawllog do not match!");
-            return;
-        }
+        assertTrue(environment.jobHarvestsDomain(1, "netarkivet.dk", h3Job));
+        assertTrue(environment.jobHarvestsDomain(1, "kb.dk", h3Job));
+        assertFalse(environment.jobHarvestsDomain(1, "rex.qb.dk", h3Job));
+        assertFalse(environment.jobHarvestsDomain(1, "sb.dk", h3Job));
     }
 
 }
diff --git a/harvester/history-gui/src/main/webapp/Harveststatus-running.jsp b/harvester/history-gui/src/main/webapp/Harveststatus-running.jsp
@@ -304,7 +304,7 @@ This page displays a list of running jobs.
             if (searchedDomainName != null && !searchedDomainName.equals("")) {
                 // Something's been searched for, so let's see if this job should be skipped according to the search...
                 if (HistoryServlet.environment != null
-                        && !HistoryServlet.environment.jobHarvestsDomain(jobId, searchedDomainName)) {
+                        && !HistoryServlet.environment.jobHarvestsDomain(jobId, searchedDomainName, null)) {
                     // Current job doesn't harvest searched domain, so don't show it. Continue from the next job.
                     continue;
                 }