Skip to content

Commit

Permalink
SISIS: Fix regex for covers loaded via AJAX
Browse files Browse the repository at this point in the history
to work with relative and root-relative urls.
Refactor code to avoid duplicate code.
Provide tests with real-world examples for both cases.
  • Loading branch information
StefRe committed Nov 4, 2020
1 parent 1e582f9 commit c3940ef
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,6 @@ public class SISIS extends OkHttpBaseApi implements OpacApi {
protected long logged_in;
protected Account logged_in_as;
protected static final String ENCODING = "UTF-8";
protected static final Pattern coverPattern = Pattern.compile(
"\\$\\.ajax\\(\\{\\s*url:\\s*'(?:/webOPACClient/)?(jsp/result/cover.jsp\\?[^']+)'");

protected String getDefaultEncoding() {
return ENCODING;
Expand Down Expand Up @@ -474,9 +472,8 @@ public SearchRequestResult parse_search(String html, int page)

// covers loaded with AJAX (seen in Wuppertal)
if (tr.children().size() > 3 && tr.child(3).html().contains("jsp/result/cover.jsp")) {
Matcher matcher = coverPattern.matcher(tr.child(3).html());
if (matcher.find()) {
String url = opac_url + "/" + matcher.group(1);
String url = getAjaxCoverUrl(tr.child(3).html());
if (url != null ) {
futures.add(CompletableFuture.runAsync(() -> {
try {
String result = httpGet(url, getDefaultEncoding());
Expand Down Expand Up @@ -694,6 +691,21 @@ public SearchRequestResult parse_search(String html, int page)
return new SearchRequestResult(results, results_total, page);
}

String getAjaxCoverUrl(String html) {
final Pattern coverPattern = Pattern.compile(
"\\$\\.ajax\\(\\{\\s*url:\\s*'(.*jsp/result/cover.jsp\\?[^']+)'");
Matcher matcher = coverPattern.matcher(html);
String url = null;
if (matcher.find()) {
try {
url = new URI(opac_url + "/").resolve(matcher.group(1)).toString();
} catch (URISyntaxException | IllegalArgumentException ignoreBadUrl) {
// ignore bad url and return null
}
}
return url;
}

@Override
public DetailedItem getResultById(String id, String homebranch)
throws IOException {
Expand Down Expand Up @@ -744,9 +756,9 @@ protected DetailedItem loadDetail(String html) throws IOException {
ENCODING);

String coverJs = null;
Matcher coverMatcher = coverPattern.matcher(html);
if (coverMatcher.find()) {
coverJs = httpGet(opac_url + "/" + coverMatcher.group(1), ENCODING);
String url = getAjaxCoverUrl(html);
if (url != null) {
coverJs = httpGet(url, ENCODING);
}

DetailedItem result = parseDetail(html, html2, html3, coverJs, data, stringProvider);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import de.geeksfactory.opacclient.objects.AccountData;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doNothing;
Expand Down Expand Up @@ -105,4 +106,50 @@ public void testParseCoverJs() {
"http://webopac.wuppertal.de/showMVBCover.do?token=2aa75c57-40a7-4c99-b501-d49b39ada7a9",
url);
}

@Test
public void testGetAjaxCoverUrlRelPath() {
// as found at Stadt- und Regionalbibliothek Erfurt
String html = " <!--\n" +
" $.ajax({\n" +
" url: 'jsp/result/cover.jsp?localImg=&isbns=%5B978-3-8317-3282-1%5D&asins=%5B%5D&size=medium&pos=-1_1',\n" +
" dataType: 'script'\n" +
" });\n" +
" //-->";
String actual = sisis.getAjaxCoverUrl(html);
assertEquals("https://opac.erfurt.de/webOPACClient/jsp/result/cover.jsp?localImg=&isbns=%5B978-3-8317-3282-1%5D&asins=%5B%5D&size=medium&pos=-1_1", actual);
}

@Test
public void testGetAjaxCoverUrlAbsPath() {
// as found at Städtischen Bibliotheken Dresden
String html = " <!--\n" +
" $.ajax({\n" +
" url: '/webOPACClient/jsp/result/cover.jsp?localImg=&isbns=%5B978-3-8317-3282-1%5D&asins=%5B%5D&size=medium&pos=cover_-1_1',\n" +
" dataType: 'script'\n" +
" });\n" +
" //-->";
String actual = sisis.getAjaxCoverUrl(html);
assertEquals("https://opac.erfurt.de/webOPACClient/jsp/result/cover.jsp?localImg=&isbns=%5B978-3-8317-3282-1%5D&asins=%5B%5D&size=medium&pos=cover_-1_1", actual);
}

@Test
public void testGetAjaxCoverUrlNoPath() {
// as found at Stadtbibliothek Riesa (cover images come from amazon)
String actual = sisis.getAjaxCoverUrl("");
assertNull(actual);
}

@Test
public void testGetAjaxCoverUrlBadPath() {
// made up example of url with unencoded space
String html = " <!--\n" +
" $.ajax({\n" +
" url: 'jsp/result/cover.jsp?localImg=&isbns= %5B978-3-8317-3282-1%5D&asins=%5B%5D&size=medium&pos=-1_1',\n" +
" dataType: 'script'\n" +
" });\n" +
" //-->";
String actual = sisis.getAjaxCoverUrl(html);
assertNull(actual);
}
}

0 comments on commit c3940ef

Please sign in to comment.