From 8b9a66ffc28f88ad8c1b086607d710ea51261a11 Mon Sep 17 00:00:00 2001 From: Charles Teague Date: Wed, 8 Jun 2022 10:19:44 -0400 Subject: [PATCH 1/2] Automatically discover meta description --- src/core/text.ts | 16 +++++++++ .../types/website/util/discover-meta.ts | 13 +++++++ src/project/types/website/website-meta.ts | 34 +++++++++++++++++-- 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/src/core/text.ts b/src/core/text.ts index 26b79cac121..bd07782f72a 100644 --- a/src/core/text.ts +++ b/src/core/text.ts @@ -47,3 +47,19 @@ export function formatLineRange( lines: result, }; } + +export function truncateText(text: string, length: number) { + if (text.length < length) { + return text; + } else { + // Since we'll insert elips, trim an extra space + const clipLength = length - 1; + const clipped = text.substring(0, clipLength); + const lastSpace = clipped.lastIndexOf(" "); + if (lastSpace > 0) { + return clipped.substring(0, lastSpace) + "…"; + } else { + return clipped + "…"; + } + } +} diff --git a/src/project/types/website/util/discover-meta.ts b/src/project/types/website/util/discover-meta.ts index 0449c1d95f7..319546aeb55 100644 --- a/src/project/types/website/util/discover-meta.ts +++ b/src/project/types/website/util/discover-meta.ts @@ -27,6 +27,19 @@ const kMdPreviewClassRegex = RegExp(kPreviewClassPattern); const kMdNamedImageRegex = RegExp(kMdNamedImagePattern); const kMarkdownImg = /!\[[^\]]*\]\((.*?)(?:\".*\")?\)(?:\{(?:[^\|]*)\})?/; +export function findDescription(doc: Document): string | undefined { + const paras = doc.querySelectorAll( + "main.content > p, main.content > section > p", + ); + for (const para of paras) { + const paraEl = para as Element; + if (paraEl.innerText) { + return paraEl.innerText; + } + } + return undefined; +} + export function findPreviewImg( doc: Document, ): string | undefined { diff --git a/src/project/types/website/website-meta.ts b/src/project/types/website/website-meta.ts index b12ad99771e..4d257322a89 100644 --- a/src/project/types/website/website-meta.ts +++ b/src/project/types/website/website-meta.ts @@ -36,7 +36,7 @@ import { createMarkdownPipeline, MarkdownPipeline, } from "./website-pipeline-md.ts"; -import { findPreviewImg } from "./util/discover-meta.ts"; +import { findDescription, findPreviewImg } from "./util/discover-meta.ts"; import { isAbsoluteRef } from "../../../core/http.ts"; import { kHtmlEmptyPostProcessResult, @@ -45,6 +45,7 @@ import { HtmlPostProcessResult } from "../../../command/render/types.ts"; import { imageSize } from "../../../core/image.ts"; import { writeMetaTag } from "../../../format/html/format-html-shared.ts"; import { joinUrl } from "../../../core/url.ts"; +import { truncateText } from "../../../core/text.ts"; const kCard = "card"; @@ -53,6 +54,7 @@ interface SocialMetadataProvider { prefix: string; metadata: Metadata; filter?: (key: string) => string; + resolveValue?: (key: string, value: string) => string; resolveDefaults?: (finalMetadata: Metadata) => void; } @@ -101,6 +103,14 @@ export function metadataHtmlPostProcessor( } return key; }, + resolveValue: (key: string, value: string) => { + // Limit to 300 chars for Open Graph + if ([kDescription].includes(key)) { + return truncateText(value, 300); + } + + return value; + }, }; // The twitter card provider @@ -122,6 +132,14 @@ export function metadataHtmlPostProcessor( return key; }, + resolveValue: (key: string, value: string) => { + // Limit to 200 chars for Twitter + if ([kDescription].includes(key)) { + return truncateText(value, 200); + } + + return value; + }, resolveDefaults: (finalMetadata: Metadata) => { if (finalMetadata[kCardStyle] === undefined) { finalMetadata[kCardStyle] = finalMetadata[kImage] @@ -156,6 +174,11 @@ export function metadataHtmlPostProcessor( metadata[kImage] = findPreviewImg(doc); } + // cook up a description if one is not provided + if (metadata[kDescription] === undefined) { + metadata[kDescription] = findDescription(doc); + } + // Convert image to absolute href and add height and width resolveImageMetadata(source, project, format, metadata); @@ -167,11 +190,18 @@ export function metadataHtmlPostProcessor( // Append the metadata Object.keys(metadata).forEach((key) => { if (metadata[key] !== undefined) { + // Resolve the value const data = metadata[key] as string; + const value = provider.resolveValue + ? provider.resolveValue(key, data) + : data; + + // Filter the key if (provider.filter) { key = provider.filter(key); } - writeMetaTag(`${provider.prefix}:${key}`, data, doc); + + writeMetaTag(`${provider.prefix}:${key}`, value, doc); } }); }); From e1b54178310ecc4b70e0e8dd8e48bacb81f21231 Mon Sep 17 00:00:00 2001 From: Charles Teague Date: Wed, 8 Jun 2022 13:55:56 -0400 Subject: [PATCH 2/2] Improve text truncation for descriptions and listings --- src/core/text.ts | 97 +++++++++++++++++-- .../listing/website-listing-template.ts | 27 ++---- src/project/types/website/website-meta.ts | 4 +- 3 files changed, 98 insertions(+), 30 deletions(-) diff --git a/src/core/text.ts b/src/core/text.ts index bd07782f72a..be0b8a7e6e8 100644 --- a/src/core/text.ts +++ b/src/core/text.ts @@ -48,18 +48,97 @@ export function formatLineRange( }; } -export function truncateText(text: string, length: number) { +const kLastPunctuationRegex = /([\S\s]*)[\.\?\!]/; +function trimSentence(text: string) { + const match = text.match(kLastPunctuationRegex); + if (match) { + return { + text: match[0], + trimmed: true, + }; + } else { + return { + text, + trimmed: false, + }; + } +} + +function trimLength(text: string, length: number) { if (text.length < length) { - return text; + return { + text, + trimmed: false, + }; + } else { + return { + text: text.substring(0, length), + trimmed: true, + }; + } +} + +function trimSpace(text: string) { + const lastSpace = text.lastIndexOf(" "); + if (lastSpace > 0) { + return { + text: text.substring(0, lastSpace), + trimmed: true, + }; + } else { + return { + text, + trimmed: false, + }; + } +} + +export function truncateText( + text: string, + length: number, + breakAt: "space" | "punctuation", +) { + const trimEnd = (text: string) => { + if ([",", "/", ":"].includes(text.charAt(text.length - 1))) { + return text.substring(0, text.length - 1); + } else { + return text; + } + }; + + const trimAtSpace = (text: string) => { + console.log(text); + const spaceResult = trimSpace( + text.substring(0, text.length - 1), + ); + console.log(spaceResult.text); + return trimEnd(spaceResult.text) + "…"; + }; + + const trimPunc = (text: string) => { + const puncResult = trimSentence(text); + if (puncResult.trimmed) { + return puncResult.text; + } else { + return trimAtSpace(puncResult.text); + } + }; + + const lengthResult = trimLength(text, length); + + if (lengthResult.trimmed) { + // This was shortened + if (breakAt === "punctuation") { + return trimPunc(lengthResult.text); + } else { + return trimAtSpace(lengthResult.text); + } } else { - // Since we'll insert elips, trim an extra space - const clipLength = length - 1; - const clipped = text.substring(0, clipLength); - const lastSpace = clipped.lastIndexOf(" "); - if (lastSpace > 0) { - return clipped.substring(0, lastSpace) + "…"; + // This wasn't shortened + if (breakAt === "punctuation") { + return trimPunc(lengthResult.text); } else { - return clipped + "…"; + return trimEnd(lengthResult.text); } } } diff --git a/src/project/types/website/listing/website-listing-template.ts b/src/project/types/website/listing/website-listing-template.ts index eab50982465..7d19da1a9a5 100644 --- a/src/project/types/website/listing/website-listing-template.ts +++ b/src/project/types/website/listing/website-listing-template.ts @@ -41,6 +41,7 @@ import { import { resourcePath } from "../../../../core/resources.ts"; import { localizedString } from "../../../../config/localization.ts"; import { formatDate, parsePandocDate } from "../../../../core/date.ts"; +import { truncateText } from "../../../../core/text.ts"; export const kDateFormat = "date-format"; @@ -111,7 +112,11 @@ export function templateMarkdownHandler( const maxDescLength = listing[kMaxDescLength] as number || -1; if (maxDescLength > 0) { - record.description = truncateText(item.description, maxDescLength); + record.description = truncateText( + item.description, + maxDescLength, + "space", + ); } } @@ -198,14 +203,14 @@ export function templateMarkdownHandler( if (content) { content.appendChild(listingEl); } else { - // Custom page layout doesn't have a main.content, so + // Custom page layout doesn't have a main.content, so // just use the quarto-content div directly const customContent = doc.querySelector("#quarto-content"); if (customContent) { customContent.appendChild(listingEl); } else { // Couldn't find anywhere to put the listing el, just - // stick at the bottom of the body + // stick at the bottom of the body doc.body.appendChild(listingEl); } } @@ -513,19 +518,3 @@ export function templateJsScript( `; return jsScript; } - -function truncateText(text: string, length: number) { - if (text.length < length) { - return text; - } else { - // Since we'll insert elips, trim an extra space - const clipLength = length - 1; - const clipped = text.substring(0, clipLength); - const lastSpace = clipped.lastIndexOf(" "); - if (lastSpace > 0) { - return clipped.substring(0, lastSpace) + "…"; - } else { - return clipped + "…"; - } - } -} diff --git a/src/project/types/website/website-meta.ts b/src/project/types/website/website-meta.ts index 4d257322a89..fb2f8598e26 100644 --- a/src/project/types/website/website-meta.ts +++ b/src/project/types/website/website-meta.ts @@ -106,7 +106,7 @@ export function metadataHtmlPostProcessor( resolveValue: (key: string, value: string) => { // Limit to 300 chars for Open Graph if ([kDescription].includes(key)) { - return truncateText(value, 300); + return truncateText(value, 200, "punctuation"); } return value; @@ -135,7 +135,7 @@ export function metadataHtmlPostProcessor( resolveValue: (key: string, value: string) => { // Limit to 200 chars for Twitter if ([kDescription].includes(key)) { - return truncateText(value, 200); + return truncateText(value, 200, "punctuation"); } return value;