diff --git a/CHANGELOG.md b/CHANGELOG.md index fae3c5febd..3b153334c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,11 +17,23 @@ All notable changes to this project will be documented in this file. Take a look * Scroll mode: jumping between two EPUB resources with a horizontal swipe triggers the `Navigator.Listener.onJumpToLocator()` callback. * This can be used to allow the user to go back to their previous location if they swiped across chapters by mistake. -#### Changed +#### Streamer + +* The EPUB content iterator now returns `audio` and `video` elements. + +### Changed + +#### Navigator * `EpubNavigatorFragment.firstVisibleElementLocator()` now returns the first *block* element that is visible on the screen, even if it starts on previous pages. * This is used to make sure the user will not miss any context when restoring a TTS session in the middle of a resource. +### Fixed + +#### Streamer + +* Fix issue with the TTS starting from the beginning of the chapter instead of the current position. + ## [2.3.0] ### Added diff --git a/readium/navigator/Makefile b/Makefile similarity index 52% rename from readium/navigator/Makefile rename to Makefile index bf591dc1ae..91ab04c167 100644 --- a/readium/navigator/Makefile +++ b/Makefile @@ -1,18 +1,13 @@ -SCRIPTS_PATH := src/main/assets/_scripts +SCRIPTS_PATH := readium/navigator/src/main/assets/_scripts help: @echo "Usage: make \n\n\ - install\tDownload NPM dependencies\n\ - scripts\tBundle EPUB scripts with Webpack\n\ - lint-scripts\tCheck quality of EPUB scripts\n\ + scripts\tBundle the Navigator EPUB scripts\n\ " -install: - yarn --cwd "$(SCRIPTS_PATH)" install --frozen-lockfile - +.PHONY: scripts scripts: + yarn --cwd "$(SCRIPTS_PATH)" install --frozen-lockfile yarn --cwd "$(SCRIPTS_PATH)" run format - yarn --cwd "$(SCRIPTS_PATH)" run bundle - -lint-scripts: yarn --cwd "$(SCRIPTS_PATH)" run lint + yarn --cwd "$(SCRIPTS_PATH)" run bundle diff --git a/readium/shared/src/main/java/org/readium/r2/shared/publication/services/content/Content.kt b/readium/shared/src/main/java/org/readium/r2/shared/publication/services/content/Content.kt index 7f2ac53895..baafea867d 100644 --- a/readium/shared/src/main/java/org/readium/r2/shared/publication/services/content/Content.kt +++ b/readium/shared/src/main/java/org/readium/r2/shared/publication/services/content/Content.kt @@ -242,7 +242,6 @@ interface Content { /** * Extracts the full raw text, or returns null if no text content can be found. - * * @param separator Separator to use between individual elements. Defaults to newline. */ suspend fun text(separator: String = "\n"): String? = diff --git a/readium/shared/src/main/java/org/readium/r2/shared/publication/services/content/iterators/HtmlResourceContentIterator.kt b/readium/shared/src/main/java/org/readium/r2/shared/publication/services/content/iterators/HtmlResourceContentIterator.kt index e581b772f1..33007b08ac 100644 --- a/readium/shared/src/main/java/org/readium/r2/shared/publication/services/content/iterators/HtmlResourceContentIterator.kt +++ b/readium/shared/src/main/java/org/readium/r2/shared/publication/services/content/iterators/HtmlResourceContentIterator.kt @@ -27,7 +27,7 @@ import org.readium.r2.shared.util.Language import org.readium.r2.shared.util.mediatype.MediaType import org.readium.r2.shared.util.use -// FIXME: Support custom skipped elements +// FIXME: Support custom skipped elements? /** * Iterates an HTML [resource], starting from the given [locator]. @@ -36,11 +36,14 @@ import org.readium.r2.shared.util.use * [Locator.Locations] object. * * If you want to start from the end of the resource, the [locator] must have a `progression` of 1.0. + * + * Locators will contain a `before` context of up to `beforeMaxLength` characters. */ @ExperimentalReadiumApi class HtmlResourceContentIterator( private val resource: Resource, - private val locator: Locator + private val locator: Locator, + private val beforeMaxLength: Int = 50 ) : Content.Iterator { companion object { @@ -57,47 +60,50 @@ class HtmlResourceContentIterator( /** * [Content.Element] loaded with [hasPrevious] or [hasNext], associated with the move delta. */ - private data class ContentWithDelta( + private data class ElementWithDelta( val element: Content.Element, val delta: Int ) - private var currentElement: ContentWithDelta? = null + private var requestedElement: ElementWithDelta? = null override suspend fun hasPrevious(): Boolean { - currentElement = nextBy(-1) - return currentElement != null + if (requestedElement?.delta == -1) return true + + val index = currentIndex() - 1 + val element = elements().elements.getOrNull(index) ?: return false + currentIndex = index + requestedElement = ElementWithDelta(element, -1) + return true } override fun previous(): Content.Element = - currentElement + requestedElement ?.takeIf { it.delta == -1 }?.element + ?.also { requestedElement = null } ?: throw IllegalStateException("Called previous() without a successful call to hasPrevious() first") override suspend fun hasNext(): Boolean { - currentElement = nextBy(+1) - return currentElement != null + if (requestedElement?.delta == 1) return true + + val index = currentIndex() + val element = elements().elements.getOrNull(index) ?: return false + currentIndex = index + 1 + requestedElement = ElementWithDelta(element, +1) + return true } override fun next(): Content.Element = - currentElement - ?.takeIf { it.delta == +1 }?.element + requestedElement + ?.takeIf { it.delta == 1 }?.element + ?.also { requestedElement = null } ?: throw IllegalStateException("Called next() without a successful call to hasNext() first") - private suspend fun nextBy(delta: Int): ContentWithDelta? { - val elements = elements() - val index = currentIndex?.let { it + delta } - ?: elements.startIndex - - val content = elements.elements.getOrNull(index) - ?: return null - - currentIndex = index - return ContentWithDelta(content, delta) - } - private var currentIndex: Int? = null + private suspend fun currentIndex(): Int = + currentIndex ?: elements().startIndex + private suspend fun elements(): ParsedElements = parsedElements ?: parseElements().also { parsedElements = it } @@ -116,14 +122,15 @@ class HtmlResourceContentIterator( // The JS third-party library used to generate the CSS Selector sometimes adds // :root >, which doesn't work with JSoup. tryOrNull { body.selectFirst(it.removePrefix(":root > ")) } - } + }, + beforeMaxLength = beforeMaxLength ) NodeTraversor.traverse(contentParser, body) return contentParser.result() } /** - * Holds the result of parsing the HTML resource into a list of [ContentElement]. + * Holds the result of parsing the HTML resource into a list of `ContentElement`. * * The [startIndex] will be calculated from the element matched by the base [locator], if * possible. Defaults to 0. @@ -136,6 +143,7 @@ class HtmlResourceContentIterator( private class ContentParser( private val baseLocator: Locator, private val startElement: Element?, + private val beforeMaxLength: Int ) : NodeVisitor { fun result() = ParsedElements( @@ -146,51 +154,48 @@ class HtmlResourceContentIterator( private val elements = mutableListOf() private var startIndex = 0 - private var currentElement: Element? = null private val segmentsAcc = mutableListOf() private var textAcc = StringBuilder() - private var wholeRawTextAcc: String = "" + private var wholeRawTextAcc: String? = null private var elementRawTextAcc: String = "" private var rawTextAcc: String = "" private var currentLanguage: String? = null private var currentCssSelector: String? = null - private var ignoredNode: Node? = null - override fun head(node: Node, depth: Int) { - if (ignoredNode != null) return - - if (node.isHidden) { - ignoredNode = node - return - } + /** LIFO stack of the current element's block ancestors. */ + private val breadcrumbs = mutableListOf() + override fun head(node: Node, depth: Int) { if (node is Element) { - currentElement = node + if (node.isBlock) { + breadcrumbs.add(node) + } val tag = node.normalName() + val elementLocator: Locator by lazy { + baseLocator.copy( + locations = Locator.Locations( + otherLocations = buildMap { + put("cssSelector", node.cssSelector() as Any) + } + ) + ) + } + when { tag == "br" -> { flushText() } + tag == "img" -> { flushText() - val href = node.attr("src") - .takeIf { it.isNotBlank() } - ?.let { Href(it, baseLocator.href).string } - - if (href != null) { + node.srcRelativeToHref(baseLocator.href)?.let { href -> elements.add( - Content.ImageElement( - locator = baseLocator.copy( - locations = Locator.Locations( - otherLocations = buildMap { - put("cssSelector", node.cssSelector() as Any) - } - ) - ), + ImageElement( + locator = elementLocator, embeddedLink = Link(href = href), caption = null, // FIXME: Get the caption from figcaption attributes = buildList { @@ -203,6 +208,34 @@ class HtmlResourceContentIterator( ) } } + + tag == "audio" || tag == "video" -> { + flushText() + + val href = node.srcRelativeToHref(baseLocator.href) + val link: Link? = + if (href != null) { + Link(href = href) + } else { + val sources = node.select("source") + .mapNotNull { source -> + source.srcRelativeToHref(baseLocator.href)?.let { href -> + Link(href = href, type = source.attr("type").takeUnless { it.isBlank() }) + } + } + + sources.firstOrNull()?.copy(alternates = sources.drop(1)) + } + + if (link != null) { + when (tag) { + "audio" -> elements.add(AudioElement(locator = elementLocator, embeddedLink = link, attributes = emptyList())) + "video" -> elements.add(VideoElement(locator = elementLocator, embeddedLink = link, attributes = emptyList())) + else -> {} + } + } + } + node.isBlock -> { segmentsAcc.clear() textAcc.clear() @@ -214,10 +247,6 @@ class HtmlResourceContentIterator( } override fun tail(node: Node, depth: Int) { - if (ignoredNode == node) { - ignoredNode = null - } - if (node is TextNode) { val language = node.language if (currentLanguage != language) { @@ -229,7 +258,9 @@ class HtmlResourceContentIterator( appendNormalisedText(node) } else if (node is Element) { if (node.isBlock) { + assert(breadcrumbs.last() == node) flushText() + breadcrumbs.removeLast() } } } @@ -246,7 +277,7 @@ class HtmlResourceContentIterator( flushSegment() if (segmentsAcc.isEmpty()) return - if (startElement != null && currentElement == startElement) { + if (startElement != null && breadcrumbs.lastOrNull() == startElement) { startIndex = elements.size } elements.add( @@ -259,7 +290,10 @@ class HtmlResourceContentIterator( } } ), - text = Locator.Text(highlight = elementRawTextAcc) + text = Locator.Text( + before = segmentsAcc.firstOrNull()?.locator?.text?.before, + highlight = elementRawTextAcc, + ) ), role = TextElement.Role.Body, segments = segmentsAcc.toList() @@ -296,7 +330,7 @@ class HtmlResourceContentIterator( ), text = Locator.Text( highlight = rawTextAcc, - before = wholeRawTextAcc.takeLast(50) // FIXME: custom length + before = wholeRawTextAcc?.takeLast(beforeMaxLength) ) ), text = text, @@ -309,18 +343,22 @@ class HtmlResourceContentIterator( ) } - wholeRawTextAcc += rawTextAcc - elementRawTextAcc += rawTextAcc + if (rawTextAcc != "") { + wholeRawTextAcc = (wholeRawTextAcc ?: "") + rawTextAcc + elementRawTextAcc += rawTextAcc + } rawTextAcc = "" textAcc.clear() } } } -// FIXME: Setup ignore conditions -private val Node.isHidden: Boolean get() = false - private val Node.language: String? get() = attr("xml:lang").takeUnless { it.isBlank() } ?: attr("lang").takeUnless { it.isBlank() } ?: parent()?.language + +private fun Node.srcRelativeToHref(baseHref: String): String? = + attr("src") + .takeIf { it.isNotBlank() } + ?.let { Href(it, baseHref).string } diff --git a/readium/shared/src/test/java/org/readium/r2/shared/publication/services/content/iterators/HtmlResourceContentIteratorTest.kt b/readium/shared/src/test/java/org/readium/r2/shared/publication/services/content/iterators/HtmlResourceContentIteratorTest.kt new file mode 100644 index 0000000000..e923ad2594 --- /dev/null +++ b/readium/shared/src/test/java/org/readium/r2/shared/publication/services/content/iterators/HtmlResourceContentIteratorTest.kt @@ -0,0 +1,407 @@ +@file:OptIn(ExperimentalReadiumApi::class, ExperimentalCoroutinesApi::class) + +package org.readium.r2.shared.publication.services.content.iterators + +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.test.runTest +import org.junit.Assert.* +import org.junit.Test +import org.junit.runner.RunWith +import org.readium.r2.shared.ExperimentalReadiumApi +import org.readium.r2.shared.fetcher.StringResource +import org.readium.r2.shared.publication.Link +import org.readium.r2.shared.publication.Locator +import org.readium.r2.shared.publication.services.content.Content +import org.readium.r2.shared.publication.services.content.Content.Attribute +import org.readium.r2.shared.publication.services.content.Content.AttributeKey.Companion.ACCESSIBILITY_LABEL +import org.readium.r2.shared.publication.services.content.Content.AttributeKey.Companion.LANGUAGE +import org.readium.r2.shared.publication.services.content.Content.TextElement +import org.readium.r2.shared.publication.services.content.Content.TextElement.Segment +import org.readium.r2.shared.util.Language +import org.robolectric.RobolectricTestRunner + +@OptIn(ExperimentalReadiumApi::class) +@RunWith(RobolectricTestRunner::class) +class HtmlResourceContentIteratorTest { + + private val link = Link(href = "/dir/res.xhtml", type = "application/xhtml+xml") + private val locator = Locator(href = "/dir/res.xhtml", type = "application/xhtml+xml") + + private val html = """ + + + + + Section IV: FAIRY STORIES—MODERN FANTASTIC TALES + + + +
+
171
+

INTRODUCTORY

+ +

The difficulties of classification are very apparent here, and once more it must be noted that illustrative and practical purposes rather than logical ones are served by the arrangement adopted. The modern fanciful story is here placed next to the real folk story instead of after all the groups of folk products. The Hebrew stories at the beginning belong quite as well, perhaps even better, in Section V, while the stories at the end of Section VI shade off into the more modern types of short tales.

+

The child's natural literature. The world has lost certain secrets as the price of an advancing civilization.

+

Without discussing the limits of the culture-epoch theory of human development as a complete guide in education, it is clear that the young child passes through a period when his mind looks out upon the world in a manner analogous to that of the folk as expressed in their literature.

+
+ + + """ + + private val elements: List = listOf( + TextElement( + locator = locator( + selector = "#pgepubid00498 > div.center", + before = null, + highlight = "171" + ), + role = TextElement.Role.Body, + segments = listOf( + Segment( + locator = locator( + selector = "#pgepubid00498 > div.center", + before = null, + highlight = "171" + ), + text = "171", + attributes = listOf(Attribute(LANGUAGE, Language("en"))) + ) + ) + ), + TextElement( + locator = locator( + selector = "#pgepubid00498 > h3", + before = "171", + highlight = "INTRODUCTORY" + ), + role = TextElement.Role.Body, + segments = listOf( + Segment( + locator = locator( + selector = "#pgepubid00498 > h3", + before = "171", + highlight = "INTRODUCTORY" + ), + text = "INTRODUCTORY", + attributes = listOf(Attribute(LANGUAGE, Language("en"))) + ), + ) + ), + TextElement( + locator = locator( + selector = "#pgepubid00498 > p:nth-child(3)", + before = "171INTRODUCTORY", + highlight = "The difficulties of classification are very apparent here, and once more it must be noted that illustrative and practical purposes rather than logical ones are served by the arrangement adopted. The modern fanciful story is here placed next to the real folk story instead of after all the groups of folk products. The Hebrew stories at the beginning belong quite as well, perhaps even better, in Section V, while the stories at the end of Section VI shade off into the more modern types of short tales." + ), + role = TextElement.Role.Body, + segments = listOf( + Segment( + locator = locator( + selector = "#pgepubid00498 > p:nth-child(3)", + before = "171INTRODUCTORY", + highlight = "The difficulties of classification are very apparent here, and once more it must be noted that illustrative and practical purposes rather than logical ones are served by the arrangement adopted. The modern fanciful story is here placed next to the real folk story instead of after all the groups of folk products. The Hebrew stories at the beginning belong quite as well, perhaps even better, in Section V, while the stories at the end of Section VI shade off into the more modern types of short tales." + ), + text = "The difficulties of classification are very apparent here, and once more it must be noted that illustrative and practical purposes rather than logical ones are served by the arrangement adopted. The modern fanciful story is here placed next to the real folk story instead of after all the groups of folk products. The Hebrew stories at the beginning belong quite as well, perhaps even better, in Section V, while the stories at the end of Section VI shade off into the more modern types of short tales.", + attributes = listOf(Attribute(LANGUAGE, Language("en"))), + ), + ) + ), + TextElement( + locator = locator( + selector = "#pgepubid00498 > p:nth-child(4)", + before = "ade off into the more modern types of short tales.", + highlight = "The child's natural literature. The world has lost certain secrets as the price of an advancing civilization." + ), + role = TextElement.Role.Body, + segments = listOf( + Segment( + locator = locator( + selector = "#pgepubid00498 > p:nth-child(4)", + before = "ade off into the more modern types of short tales.", + highlight = "The child's natural literature. The world has lost certain secrets as the price of an advancing civilization." + ), + text = "The child's natural literature. The world has lost certain secrets as the price of an advancing civilization.", + attributes = listOf(Attribute(LANGUAGE, Language("en"))) + ), + ) + ), + TextElement( + locator = locator( + selector = "#pgepubid00498 > p:nth-child(5)", + before = "secrets as the price of an advancing civilization.", + highlight = "Without discussing the limits of the culture-epoch theory of human development as a complete guide in education, it is clear that the young child passes through a period when his mind looks out upon the world in a manner analogous to that of the folk as expressed in their literature." + ), + role = TextElement.Role.Body, + segments = listOf( + Segment( + locator = locator( + selector = "#pgepubid00498 > p:nth-child(5)", + before = "secrets as the price of an advancing civilization.", + highlight = "Without discussing the limits of the culture-epoch theory of human development as a complete guide in education, it is clear that the young child passes through a period when his mind looks out upon the world in a manner analogous to that of the folk as expressed in their literature." + ), + text = "Without discussing the limits of the culture-epoch theory of human development as a complete guide in education, it is clear that the young child passes through a period when his mind looks out upon the world in a manner analogous to that of the folk as expressed in their literature.", + attributes = listOf(Attribute(LANGUAGE, Language("en"))) + ), + ) + ) + ) + + private fun locator( + selector: String? = null, + before: String? = null, + highlight: String? = null, + after: String? = null + ): Locator = + locator.copy( + locations = Locator.Locations( + otherLocations = buildMap { + selector?.let { put("cssSelector", it) } + } + ), + text = Locator.Text(before = before, highlight = highlight, after = after) + ) + + private fun iterator(html: String, startLocator: Locator = locator): HtmlResourceContentIterator = + HtmlResourceContentIterator(StringResource(link, html), startLocator) + + private suspend fun HtmlResourceContentIterator.elements(): List = + buildList { + while (hasNext()) { + add(next()) + } + } + + @Test + fun `cannot call previous() without first hasPrevious()`() = runTest { + val iter = iterator(html) + iter.hasNext(); iter.next() + + assertThrows(IllegalStateException::class.java) { iter.previous() } + iter.hasPrevious() + iter.previous() + } + + @Test + fun `cannot call next() without first hasNext()`() = runTest { + val iter = iterator(html) + assertThrows(IllegalStateException::class.java) { iter.next() } + iter.hasNext() + iter.next() + } + + @Test + fun `iterate from start to finish`() = runTest { + assertEquals(elements, iterator(html).elements()) + } + + @Test + fun `previous() is null from the beginning`() = runTest { + val iter = iterator(html) + assertFalse(iter.hasPrevious()) + } + + @Test + fun `next() returns the first element from the beginning`() = runTest { + val iter = iterator(html) + assertTrue(iter.hasNext()) + assertEquals(elements[0], iter.next()) + } + + @Test + fun `next() then previous() returns the first element`() = runTest { + val iter = iterator(html) + assertTrue(iter.hasNext()) + assertEquals(elements[0], iter.next()) + assertTrue(iter.hasPrevious()) + assertEquals(elements[0], iter.previous()) + } + + @Test + fun `calling hasPrevious() several times doesn't move the index`() = runTest { + val iter = iterator(html) + iter.hasNext(); iter.next() + assertTrue(iter.hasPrevious()) + assertTrue(iter.hasPrevious()) + assertTrue(iter.hasPrevious()) + assertEquals(elements[0], iter.previous()) + } + + @Test + fun `calling hasNext() several times doesn't move the index`() = runTest { + val iter = iterator(html) + assertTrue(iter.hasNext()) + assertTrue(iter.hasNext()) + assertTrue(iter.hasNext()) + assertEquals(elements[0], iter.next()) + } + + @Test + fun `starting from a CSS selector`() = runTest { + val iter = iterator(html, locator(selector = "#pgepubid00498 > p:nth-child(3)")) + assertEquals(elements.subList(2, elements.size), iter.elements()) + } + + @Test + fun `calling previous() when starting from a CSS selector`() = runTest { + val iter = iterator(html, locator(selector = "#pgepubid00498 > p:nth-child(3)")) + assertTrue(iter.hasPrevious()) + assertEquals(elements[1], iter.previous()) + } + + @Test + fun `starting from a CSS selector to a block element containing an inline element`() = runTest { + val nbspHtml = """ + + + +

Tout au loin sur la chaussée, aussi loin qu’on pouvait voir

+

Lui, notre colonel, savait peut-être pourquoi ces deux gens-là tiraient [...] On buvait de la bière sucrée.

+ + + """ + + val iter = iterator(nbspHtml, locator(selector = ":root > :nth-child(2) > :nth-child(2)")) + assertTrue(iter.hasNext()) + assertEquals( + TextElement( + locator = locator( + selector = "html > body > p:nth-child(2)", + before = "oin sur la chaussée, aussi loin qu’on pouvait voir", + highlight = "Lui, notre colonel, savait peut-être pourquoi ces deux gens-là tiraient [...] On buvait de la bière sucrée." + ), + role = TextElement.Role.Body, + segments = listOf( + Segment( + locator = locator( + selector = "html > body > p:nth-child(2)", + before = "oin sur la chaussée, aussi loin qu’on pouvait voir", + highlight = "Lui, notre colonel, savait peut-être pourquoi ces deux gens-là tiraient [...] On buvait de la bière sucrée.", + ), + text = "Lui, notre colonel, savait peut-être pourquoi ces deux gens-là tiraient [...] On buvait de la bière sucrée.", + attributes = listOf(Attribute(LANGUAGE, Language("fr"))) + ) + ) + ), + iter.next() + ) + } + + @Test + fun `iterating over image elements`() = runTest { + val html = """ + + + + + Accessibility description + + + """ + + assertEquals( + listOf( + Content.ImageElement( + locator = locator( + selector = "html > body > img:nth-child(1)" + ), + embeddedLink = Link(href = "/dir/image.png"), + caption = null, + attributes = emptyList() + ), + Content.ImageElement( + locator = locator( + selector = "html > body > img:nth-child(2)" + ), + embeddedLink = Link(href = "/cover.jpg"), + caption = null, + attributes = listOf(Attribute(ACCESSIBILITY_LABEL, "Accessibility description")) + ) + ), + iterator(html).elements() + ) + } + + @Test + fun `iterating over audio elements`() = runTest { + val html = """ + + + +