Skip to content

Commit

Permalink
service-gdocs: Add partner embeds and text
Browse files Browse the repository at this point in the history
  • Loading branch information
earthboundkid committed May 17, 2024
1 parent ef531e9 commit 4f6e9d4
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 37 deletions.
5 changes: 3 additions & 2 deletions internal/db/gdocs.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ const (
RawEmbedTag = "raw"
ToCEmbedTag = "toc"
SpotlightRawEmbedTag = "spl"
PartnerRawEmbedTag = "partner"
PartnerRawEmbedTag = "partner-embed"
PartnerTextTag = "partner-text"
)

type Embed struct {
Expand All @@ -57,7 +58,7 @@ func (em *Embed) UnmarshalJSON(data []byte) error {
return err
}
em.Value = img
case RawEmbedTag, ToCEmbedTag, SpotlightRawEmbedTag, PartnerRawEmbedTag:
case RawEmbedTag, ToCEmbedTag, SpotlightRawEmbedTag, PartnerRawEmbedTag, PartnerTextTag:
var s string
if err := json.Unmarshal(temp.Value, &s); err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion internal/db/testdata/gdoc spl/article.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ Only on *dot* org:
limit="12"
>}}
Some <strong>bold</strong> and <em>italic</em> text. Spotlight PA is blah blah.
Some <strong>bold</strong> and <em>italic</em> Spotlight PA text. Spotlight PA is blah blah.

2 changes: 1 addition & 1 deletion internal/db/testdata/gdoc spl/doc.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion internal/db/testdata/gdoc spl/page.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"title-tag": "",
"twitter-title": ""
},
"body": "Blah blah blah\n\nLorem <em>ipsum</em> dolor\n\nOnly on *dot* org:\n\n{{thing \n blah=\"hello\"\n/}}\n\n{{<featured/related-stories\n title=\"More Stories from the 2024 Election\"\n link=\"/election\"\n cta=\"Visit the Election Center →\"\n limit=\"12\"\n>}}\n\nSome <strong>bold</strong> and <em>italic</em> text. Spotlight PA is blah blah.\n\n",
"body": "Blah blah blah\n\nLorem <em>ipsum</em> dolor\n\nOnly on *dot* org:\n\n{{thing \n blah=\"hello\"\n/}}\n\n{{<featured/related-stories\n title=\"More Stories from the 2024 Election\"\n link=\"/election\"\n cta=\"Visit the Election Center →\"\n limit=\"12\"\n>}}\n\nSome <strong>bold</strong> and <em>italic</em> Spotlight PA text. Spotlight PA is blah blah.\n\n",
"schedule_for": null,
"last_published": null,
"created_at": "2020-03-15T20:00:00Z",
Expand Down
4 changes: 4 additions & 0 deletions internal/db/testdata/gdoc spl/raw.html
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
<p>Blah blah blah</p>
<p>Lorem <em>ipsum</em> dolor</p>
<script src="http://example.com"></script>

Check warning

Code scanning / CodeQL

Inclusion of functionality from an untrusted source Medium

Script loaded using unencrypted connection.
<p>Some <strong>bold</strong> and <em>italic</em> partner text. Spotlight PA is blah blah.
</p>

4 changes: 4 additions & 0 deletions internal/db/testdata/gdoc spl/rich.html
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
<p>Blah blah blah</p>
<p>Lorem <em>ipsum</em> dolor</p>
<h2 style="color: red;">Embed #1</h2>
<p>Some <strong>bold</strong> and <em>italic</em> partner text. Spotlight PA is blah blah.
</p>

93 changes: 61 additions & 32 deletions pkg/almanack/service-gdocs.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,9 @@ func (svc Services) ProcessGDocsDoc(ctx context.Context, dbDoc db.GDocsDoc) (err
// Default slug is article title
metadata.InternalID = dbDoc.Document.Title

xhtml.Tables(docHTML, func(tbl *html.Node, rows xhtml.TableNodes) {
label := rows.Label()
for tbl, rows := range xhtml.Tables(docHTML) {
embed := db.Embed{N: n}
switch label {
switch label := rows.Label(); label {
case "html", "embed", "raw", "script":
embed.Type = db.RawEmbedTag
embedHTML := xhtml.InnerText(rows.At(1, 0))
Expand All @@ -140,14 +139,18 @@ func (svc Services) ProcessGDocsDoc(ctx context.Context, dbDoc db.GDocsDoc) (err
"Embed #%d contains unusual characters.", n,
))
}
embeds = append(embeds, embed)
value := must.Get(json.Marshal(embed))
data := xhtml.New("data", "value", string(value))
xhtml.ReplaceWith(tbl, data)
n++
case "spl", "spl-embed":
embedHTML := xhtml.InnerText(rows.At(1, 0))
embed.Type = db.SpotlightRawEmbedTag
embed.Value = embedHTML
value := must.Get(json.Marshal(embed))
data := xhtml.New("data", "value", string(value))
xhtml.ReplaceWith(tbl, data)
return
case "spl-text":
embed.Type = db.SpotlightRawEmbedTag
n := xhtml.Clone(rows.At(1, 0))
Expand All @@ -159,28 +162,39 @@ func (svc Services) ProcessGDocsDoc(ctx context.Context, dbDoc db.GDocsDoc) (err
value := must.Get(json.Marshal(embed))
data := xhtml.New("data", "value", string(value))
xhtml.ReplaceWith(tbl, data)
return
// case "partner-embed":
// embedHTML := xhtml.InnerText(rows.At(1, 0))
// embed.Type = db.PartnerRawEmbedTag
// embed.Value = embedHTML
// case "partner-text":
// embed.Type = db.PartnerRawEmbedTag
// n := xhtml.Clone(rows.At(1, 0))
// blocko.MergeSiblings(n)
// blocko.RemoveEmptyP(n)
// blocko.RemoveMarks(n)
// embed.Value = n.FirstChild
case "partner-embed":
embedHTML := xhtml.InnerText(rows.At(1, 0))
embed.Type = db.PartnerRawEmbedTag
embed.Value = embedHTML
embeds = append(embeds, embed)
value := must.Get(json.Marshal(embed))
data := xhtml.New("data", "value", string(value))
xhtml.ReplaceWith(tbl, data)
n++
case "partner-text":
embed.Type = db.PartnerTextTag
n := xhtml.Clone(rows.At(1, 0))
blocko.MergeSiblings(n)
blocko.RemoveEmptyP(n)
blocko.RemoveMarks(n)
embed.Value = xhtml.InnerHTMLBlocks(n)
value := must.Get(json.Marshal(embed))
data := xhtml.New("data", "value", string(value))
xhtml.ReplaceWith(tbl, data)
case "photo", "image", "photograph", "illustration", "illo":
embed.Type = db.ImageEmbedTag
if imageEmbed, warning := svc.replaceImageEmbed(
ctx, tbl, rows, n, dbDoc.ExternalID, objID2Path,
); warning != "" {
tbl.Parent.RemoveChild(tbl)
warnings = append(warnings, warning)
return
} else {
embed.Value = *imageEmbed
embeds = append(embeds, embed)
value := must.Get(json.Marshal(embed))
data := xhtml.New("data", "value", string(value))
xhtml.ReplaceWith(tbl, data)
n++
}
case "metadata", "info":
if warning := svc.replaceMetadata(
Expand All @@ -189,30 +203,29 @@ func (svc Services) ProcessGDocsDoc(ctx context.Context, dbDoc db.GDocsDoc) (err
warnings = append(warnings, warning)
}
tbl.Parent.RemoveChild(tbl)
return

case "comment", "ignore", "note":
tbl.Parent.RemoveChild(tbl)
return

case "table":
row := xhtml.Closest(rows.At(0, 0), xhtml.WithAtom(atom.Tr))
row.Parent.RemoveChild(row)
return

case "toc", "table of contents":
embed.Type = db.ToCEmbedTag
embed.Value = processToc(docHTML, rows)
embeds = append(embeds, embed)
value := must.Get(json.Marshal(embed))
data := xhtml.New("data", "value", string(value))
xhtml.ReplaceWith(tbl, data)
n++
default:
warnings = append(warnings, fmt.Sprintf(
"Unrecognized table type: %q", label,
))
tbl.Parent.RemoveChild(tbl)
return
}
embeds = append(embeds, embed)
value := must.Get(json.Marshal(embed))
data := xhtml.New("data", "value", string(value))
xhtml.ReplaceWith(tbl, data)
n++
})
}

docHTML, err = blocko.Minify(xhtml.ToBuffer(docHTML))
if err != nil {
Expand Down Expand Up @@ -525,13 +538,23 @@ func fixRichTextPlaceholders(richText *html.Node) {
embeds := xhtml.SelectSlice(richText, xhtml.WithAtom(atom.Data))
for _, dataEl := range embeds {
embed := extractEmbed(dataEl)
if embed.Type == db.SpotlightRawEmbedTag {
switch embed.Type {
case db.SpotlightRawEmbedTag:
dataEl.Parent.RemoveChild(dataEl)
continue
case db.PartnerTextTag:
xhtml.ReplaceWith(dataEl, &html.Node{
Type: html.RawNode,
Data: embed.Value.(string),
})
continue
case db.ImageEmbedTag, db.RawEmbedTag, db.ToCEmbedTag, db.PartnerRawEmbedTag:
placeholder := xhtml.New("h2", "style", "color: red;")
xhtml.AppendText(placeholder, fmt.Sprintf("Embed #%d", embed.N))
xhtml.ReplaceWith(dataEl, placeholder)
default:
panic("unknown embed type: " + embed.Type)
}
placeholder := xhtml.New("h2", "style", "color: red;")
xhtml.AppendText(placeholder, fmt.Sprintf("Embed #%d", embed.N))
xhtml.ReplaceWith(dataEl, placeholder)
}
}

Expand All @@ -548,7 +571,7 @@ func fixRawHTMLPlaceholders(rawHTML *html.Node) {
switch embed.Type {
case db.SpotlightRawEmbedTag:
dataEl.Parent.RemoveChild(dataEl)
case db.RawEmbedTag, db.ToCEmbedTag:
case db.RawEmbedTag, db.ToCEmbedTag, db.PartnerRawEmbedTag, db.PartnerTextTag:
xhtml.ReplaceWith(dataEl, &html.Node{
Type: html.RawNode,
Data: embed.Value.(string),
Expand All @@ -557,6 +580,8 @@ func fixRawHTMLPlaceholders(rawHTML *html.Node) {
placeholder := xhtml.New("h2", "style", "color: red;")
xhtml.AppendText(placeholder, fmt.Sprintf("Embed #%d", embed.N))
xhtml.ReplaceWith(dataEl, placeholder)
default:
panic("unknown embed type: " + embed.Type)
}
}
}
Expand All @@ -566,6 +591,8 @@ func fixMarkdownPlaceholders(rawHTML *html.Node) {
for _, dataEl := range embeds {
embed := extractEmbed(dataEl)
switch embed.Type {
case db.PartnerRawEmbedTag, db.PartnerTextTag:
dataEl.Parent.RemoveChild(dataEl)
case db.RawEmbedTag, db.SpotlightRawEmbedTag:
xhtml.ReplaceWith(dataEl, &html.Node{
Type: html.RawNode,
Expand Down Expand Up @@ -596,6 +623,8 @@ func fixMarkdownPlaceholders(rawHTML *html.Node) {
Type: html.RawNode,
Data: data,
})
default:
panic("unknown embed type: " + embed.Type)
}
}
}
Expand Down

0 comments on commit 4f6e9d4

Please sign in to comment.