/
MdrFilmPageDeserializer.java
53 lines (40 loc) · 1.66 KB
/
MdrFilmPageDeserializer.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
package de.mediathekview.mserver.crawler.mdr.parser;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.reflect.TypeToken;
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
import java.lang.reflect.Type;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class MdrFilmPageDeserializer {
private static final String FILM_ENTRY_SELECTOR = "div.sectionDetailPage div.mediaCon";
private static final String ATTRIBUTE_DATA_CTRL_PLAYER = "data-ctrl-player";
private static final Type TYPE_STRING_OPTIONAL = new TypeToken<Optional<String>>() {
}.getType();
private final Gson gson;
private final String baseUrl;
public MdrFilmPageDeserializer(final String aBaseUrl) {
baseUrl = aBaseUrl;
gson = new GsonBuilder()
.registerTypeAdapter(TYPE_STRING_OPTIONAL, new MdrFilmPlayerJsonDeserializer())
.create();
}
public Set<CrawlerUrlDTO> deserialize(final Document aDocument) {
Set<CrawlerUrlDTO> filmEntries = new HashSet<>();
Elements entryElements = aDocument.select(FILM_ENTRY_SELECTOR);
for (Element entryElement : entryElements) {
if (entryElement.hasAttr(ATTRIBUTE_DATA_CTRL_PLAYER)) {
addFilmUrl(filmEntries, entryElement.attr(ATTRIBUTE_DATA_CTRL_PLAYER));
}
}
return filmEntries;
}
private void addFilmUrl(final Set<CrawlerUrlDTO> aFilmEntries, final String attr) {
Optional<String> entryUrl = gson.fromJson(attr, TYPE_STRING_OPTIONAL);
entryUrl.ifPresent(s -> aFilmEntries.add(new CrawlerUrlDTO(baseUrl + s)));
}
}