From b7f6e55cf040a961203b1030c3d6e1c90ee2e7c3 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 15 Sep 2023 16:54:38 +0300 Subject: [PATCH] Pass explicit output of mobile-html to the zim creator --- src/Downloader.ts | 2 +- src/renderers/wikimedia-mobile.renderer.ts | 180 ++------------------- 2 files changed, 13 insertions(+), 169 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index 6e414178..ebe02302 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -170,9 +170,9 @@ class Downloader { if (!forceRender) { //* Objects order in array matters! this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ + { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, ]) //* Objects order in array matters! diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 776aebae..9089cab8 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,45 +1,37 @@ -import * as domino from 'domino' import * as logger from '../Logger.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' -import { categoriesTemplate, leadSectionTemplate, sectionTemplate, subCategoriesTemplate, subPagesTemplate, subSectionTemplate } from '../Templates.js' -// Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/' +// Represent 'https://{wikimedia-wiki}/api/rest_v1/page/mobile-html/' export class WikimediaMobileRenderer extends Renderer { constructor() { super() } - private async retrieveHtml(renderOpts: RenderOpts): Promise { - const { data, articleId, articleDetail, isMainPage } = renderOpts + private getStrippedTitle(renderOpts: RenderOpts): string { + const { data, articleId } = renderOpts - const html = isMainPage ? data : super.injectH1TitleToHtml(data, articleDetail) - const strippedTitle = getStrippedTitleFromHtml(html) - const displayTitle = strippedTitle || articleId.replace('_', ' ') - - return { html, displayTitle } + const strippedTitle = getStrippedTitleFromHtml(data) + return strippedTitle || articleId.replace('_', ' ') } public async render(renderOpts: RenderOpts): Promise { try { const result: RenderOutput = [] - const { data, articleId, webp, _moduleDependencies, isMainPage, dump } = renderOpts + const { data, articleId, webp, _moduleDependencies, dump } = renderOpts const articleDetail = await renderOpts.articleDetailXId.get(articleId) - const { html, displayTitle } = await this.retrieveHtml(renderOpts) - if (html) { - let dataWithHeader = '' - if (!isMainPage) { - dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) - } - // TODO: do mobile page transformations before applying other treatments - const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) + const displayTitle = this.getStrippedTitle(renderOpts) + if (data) { + // TODO: Apply mobile page transformations before applying other treatments + const { subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) + // TODO: styles, scripts and most of content are not visible in Kiwix app, but enabled when use Kiwix server result.push({ articleId, displayTitle, - html: finalHTML, + html: data, mediaDependencies, subtitles, }) @@ -50,152 +42,4 @@ export class WikimediaMobileRenderer extends Renderer { throw new Error(err.message) } } - - // TODO: work in progress - private treatSections(data: any, dump: Dump, articleId: string, displayTitle: string, articleDetail: ArticleDetail): string { - const doc = domino.createDocument(data) - - let html = '' - - // set the first section (open by default) - html += leadSectionTemplate({ - lead_display_title: displayTitle, - lead_section_text: doc.querySelector('section[data-mw-section-id="0"]').innerHTML, - strings: dump.strings, - }) - - // Get only top remain setions except first one - // Calculate toclevel to handle nesting. To do so you need to traverse DOM recursevely - - // set all other section (closed by default) - if (!dump.nodet && json.remaining.sections.length > 0) { - const firstTocLevel = json.remaining.sections[0].toclevel - json.remaining.sections.forEach((oneSection: any, i: number) => { - if (oneSection.toclevel === firstTocLevel) { - html = html.replace(`__SUB_LEVEL_SECTION_${i}__`, '') // remove unused anchor for subsection - html += sectionTemplate({ - section_index: i + 1, - section_id: oneSection.id, // Get from data-mw-section-id attribute - section_anchor: oneSection.anchor, // Anchor of the heading, id attr of the heading in html - section_line: oneSection.line, // this is the textContent() from the title. Check mobileapps for reference (checked) - section_text: oneSection.text, // this is the innerHTML of the section, refer to mobileapps - strings: dump.strings, // TODO: investigate - }) - } else { - html = html.replace( - `__SUB_LEVEL_SECTION_${i}__`, - subSectionTemplate({ - section_index: i + 1, - section_toclevel: oneSection.toclevel + 1, - section_id: oneSection.id, - section_anchor: oneSection.anchor, - section_line: oneSection.line, - section_text: oneSection.text, - strings: dump.strings, - }), - ) - } - }) - } - - // For section index - /** - * Iterate over parent and nested sections separately and set section_index. For parent = 1, and nested 2..n respectively - */ - - // For id - /** - * const sectionNumberString = sectionElement && sectionElement.getAttribute('data-mw-section-id'); - return sectionNumberString ? parseInt(sectionNumberString, 10) : undefined; - */ - - // For line - /** - * node.innerHTML.trim() - */ - - // For section text - /** - if (node.nodeType === NodeType.TEXT_NODE) { - currentSection.text += node.textContent; - } else { - currentSection.text += node.outerHTML; - } - */ - - // For anchor - /** - * node.getAttribute('id'); - */ - - const articleResourceNamespace = 'A' - const categoryResourceNamespace = 'U' - const slashesInUrl = articleId.split('/').length - 1 - const upStr = '../'.repeat(slashesInUrl + 1) - if (articleDetail.subCategories && articleDetail.subCategories.length) { - const subCategories = articleDetail.subCategories.map((category) => { - return { - name: category.title.split(':').slice(1).join(':'), - url: `${upStr}${categoryResourceNamespace}/${category.title}`, - } - }) - - const groups = this.groupAlphabetical(subCategories) - - html += subCategoriesTemplate({ - strings: dump.strings, - groups, - prevArticleUrl: articleDetail.prevArticleId ? `${upStr}${categoryResourceNamespace}/${articleDetail.prevArticleId}` : null, - nextArticleUrl: articleDetail.nextArticleId ? `${upStr}${categoryResourceNamespace}/${articleDetail.nextArticleId}` : null, - }) - } - - if (articleDetail.pages && articleDetail.pages.length) { - const pages = articleDetail.pages.map((page) => { - return { - name: page.title, - url: `${upStr}${articleResourceNamespace}/${page.title}`, - } - }) - - const groups = this.groupAlphabetical(pages) - - html += subPagesTemplate({ - strings: dump.strings, - groups, - }) - } - - if (articleDetail.categories && articleDetail.categories.length) { - const categories = articleDetail.categories.map((category) => { - return { - name: category.title.split(':').slice(1).join(':'), - url: `${upStr}${categoryResourceNamespace}/${category.title}`, - } - }) - html += categoriesTemplate({ - strings: dump.strings, - categories, - }) - } - html = html.replace(`__SUB_LEVEL_SECTION_${json.remaining.sections.length}__`, '') // remove the last subcestion anchor (all other anchor are removed in the forEach) - return html - } - - private groupAlphabetical(items: PageRef[]) { - const groupsAlphabetical = items.reduce((acc: any, item) => { - const groupId = item.name[0].toLocaleUpperCase() - acc[groupId] = (acc[groupId] || []).concat(item) - return acc - }, {}) - - return Object.keys(groupsAlphabetical) - .sort() - .map((letter) => { - return { - title: letter, - items: groupsAlphabetical[letter], - } - }) - } }