From c3940ef711c0cf7a14b9968e253308db76c254e6 Mon Sep 17 00:00:00 2001 From: Steffen Rehberg Date: Wed, 4 Nov 2020 17:53:15 +0100 Subject: [PATCH] SISIS: Fix regex for covers loaded via AJAX to work with relative and root-relative urls. Refactor code to avoid duplicate code. Provide tests with real-world examples for both cases. --- .../geeksfactory/opacclient/apis/SISIS.java | 28 +++++++---- .../opacclient/apis/SISISTest.java | 47 +++++++++++++++++++ 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/opacclient/libopac/src/main/java/de/geeksfactory/opacclient/apis/SISIS.java b/opacclient/libopac/src/main/java/de/geeksfactory/opacclient/apis/SISIS.java index eb7b5e40b..141b1e103 100644 --- a/opacclient/libopac/src/main/java/de/geeksfactory/opacclient/apis/SISIS.java +++ b/opacclient/libopac/src/main/java/de/geeksfactory/opacclient/apis/SISIS.java @@ -196,8 +196,6 @@ public class SISIS extends OkHttpBaseApi implements OpacApi { protected long logged_in; protected Account logged_in_as; protected static final String ENCODING = "UTF-8"; - protected static final Pattern coverPattern = Pattern.compile( - "\\$\\.ajax\\(\\{\\s*url:\\s*'(?:/webOPACClient/)?(jsp/result/cover.jsp\\?[^']+)'"); protected String getDefaultEncoding() { return ENCODING; @@ -474,9 +472,8 @@ public SearchRequestResult parse_search(String html, int page) // covers loaded with AJAX (seen in Wuppertal) if (tr.children().size() > 3 && tr.child(3).html().contains("jsp/result/cover.jsp")) { - Matcher matcher = coverPattern.matcher(tr.child(3).html()); - if (matcher.find()) { - String url = opac_url + "/" + matcher.group(1); + String url = getAjaxCoverUrl(tr.child(3).html()); + if (url != null ) { futures.add(CompletableFuture.runAsync(() -> { try { String result = httpGet(url, getDefaultEncoding()); @@ -694,6 +691,21 @@ public SearchRequestResult parse_search(String html, int page) return new SearchRequestResult(results, results_total, page); } + String getAjaxCoverUrl(String html) { + final Pattern coverPattern = Pattern.compile( + "\\$\\.ajax\\(\\{\\s*url:\\s*'(.*jsp/result/cover.jsp\\?[^']+)'"); + Matcher matcher = coverPattern.matcher(html); + String url = null; + if (matcher.find()) { + try { + url = new URI(opac_url + "/").resolve(matcher.group(1)).toString(); + } catch (URISyntaxException | IllegalArgumentException ignoreBadUrl) { + // ignore bad url and return null + } + } + return url; + } + @Override public DetailedItem getResultById(String id, String homebranch) throws IOException { @@ -744,9 +756,9 @@ protected DetailedItem loadDetail(String html) throws IOException { ENCODING); String coverJs = null; - Matcher coverMatcher = coverPattern.matcher(html); - if (coverMatcher.find()) { - coverJs = httpGet(opac_url + "/" + coverMatcher.group(1), ENCODING); + String url = getAjaxCoverUrl(html); + if (url != null) { + coverJs = httpGet(url, ENCODING); } DetailedItem result = parseDetail(html, html2, html3, coverJs, data, stringProvider); diff --git a/opacclient/libopac/src/test/java/de/geeksfactory/opacclient/apis/SISISTest.java b/opacclient/libopac/src/test/java/de/geeksfactory/opacclient/apis/SISISTest.java index 1292ea763..109489c9a 100644 --- a/opacclient/libopac/src/test/java/de/geeksfactory/opacclient/apis/SISISTest.java +++ b/opacclient/libopac/src/test/java/de/geeksfactory/opacclient/apis/SISISTest.java @@ -11,6 +11,7 @@ import de.geeksfactory.opacclient.objects.AccountData; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.mockito.Matchers.anyString; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; @@ -105,4 +106,50 @@ public void testParseCoverJs() { "http://webopac.wuppertal.de/showMVBCover.do?token=2aa75c57-40a7-4c99-b501-d49b39ada7a9", url); } + + @Test + public void testGetAjaxCoverUrlRelPath() { + // as found at Stadt- und Regionalbibliothek Erfurt + String html = " "; + String actual = sisis.getAjaxCoverUrl(html); + assertEquals("https://opac.erfurt.de/webOPACClient/jsp/result/cover.jsp?localImg=&isbns=%5B978-3-8317-3282-1%5D&asins=%5B%5D&size=medium&pos=-1_1", actual); + } + + @Test + public void testGetAjaxCoverUrlAbsPath() { + // as found at Städtischen Bibliotheken Dresden + String html = " "; + String actual = sisis.getAjaxCoverUrl(html); + assertEquals("https://opac.erfurt.de/webOPACClient/jsp/result/cover.jsp?localImg=&isbns=%5B978-3-8317-3282-1%5D&asins=%5B%5D&size=medium&pos=cover_-1_1", actual); + } + + @Test + public void testGetAjaxCoverUrlNoPath() { + // as found at Stadtbibliothek Riesa (cover images come from amazon) + String actual = sisis.getAjaxCoverUrl(""); + assertNull(actual); + } + + @Test + public void testGetAjaxCoverUrlBadPath() { + // made up example of url with unencoded space + String html = " "; + String actual = sisis.getAjaxCoverUrl(html); + assertNull(actual); + } }