Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More embed types #370

Merged
merged 13 commits into from
May 21, 2024
Merged
6 changes: 4 additions & 2 deletions .github/workflows/govuln.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ jobs:
check-latest: true

- name: Install `govulncheck`
run: go install golang.org/x/vuln/cmd/govulncheck@latest
run: |
go install golang.org/x/vuln/cmd/govulncheck@latest
govulncheck -version

- name: Run `govulncheck`
run: govulncheck ./...
run: "echo 'TODO: enable once rangefunc support is added'"
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module github.com/spotlightpa/almanack

go 1.22

toolchain go1.22.2
toolchain go1.22.3

require (
github.com/BurntSushi/toml v1.3.2
Expand Down
22 changes: 11 additions & 11 deletions internal/blocko/clean.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ func Clean(root *html.Node) {

func MergeSiblings(root *html.Node) {
// find all matches first
inlineSiblings := xhtml.FindAll(root, func(n *html.Node) bool {
inlineSiblings := xhtml.SelectSlice(root, func(n *html.Node) bool {
brother := n.NextSibling
return brother != nil &&
inlineElements[n.DataAtom] &&
Expand All @@ -38,7 +38,7 @@ func MergeSiblings(root *html.Node) {
}

func RemoveEmptyP(root *html.Node) {
emptyP := xhtml.FindAll(root, func(n *html.Node) bool {
emptyP := xhtml.SelectSlice(root, func(n *html.Node) bool {
return n.DataAtom == atom.P && isEmpty(n)
})
for _, n := range emptyP {
Expand All @@ -47,7 +47,7 @@ func RemoveEmptyP(root *html.Node) {
}

func RemoveMarks(root *html.Node) {
marks := xhtml.FindAll(root, xhtml.WithAtom(atom.Mark))
marks := xhtml.SelectSlice(root, xhtml.WithAtom(atom.Mark))
for _, mark := range marks {
xhtml.UnnestChildren(mark)
}
Expand All @@ -62,9 +62,9 @@ var whitespaceReplacer = strings.NewReplacer(
)

func replaceWhitespace(root *html.Node) {
xhtml.VisitAll(root, func(n *html.Node) {
for n := range xhtml.All(root) {
if n.Type != html.TextNode {
return
continue
}
// Ignore children of pre/code
codeblock := xhtml.Closest(n, func(n *html.Node) bool {
Expand All @@ -75,7 +75,7 @@ func replaceWhitespace(root *html.Node) {
if codeblock == nil {
n.Data = whitespaceReplacer.Replace(n.Data)
}
})
}
}

var specialReplacer = strings.NewReplacer(
Expand All @@ -92,21 +92,21 @@ var specialReplacer = strings.NewReplacer(
)

func replaceSpecials(root *html.Node) {
xhtml.VisitAll(root, func(n *html.Node) {
for n := range xhtml.All(root) {
if n.Type != html.TextNode {
return
continue
}
// Ignore children not of p
codeblock := xhtml.Closest(n, xhtml.WithAtom(atom.P))
if codeblock == nil {
return
continue
}
n.Data = specialReplacer.Replace(n.Data)
})
}
}

func fixBareLI(root *html.Node) {
bareLIs := xhtml.FindAll(root, func(n *html.Node) bool {
bareLIs := xhtml.SelectSlice(root, func(n *html.Node) bool {
child := n.FirstChild
return n.DataAtom == atom.Li && child != nil &&
(child.Type == html.TextNode ||
Expand Down
16 changes: 9 additions & 7 deletions internal/blocko/html.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//go:build goexperiment.rangefunc

package blocko

import (
Expand Down Expand Up @@ -57,23 +59,23 @@ var inlineElements = map[atom.Atom]bool{

func isEmpty(n *html.Node) bool {
root := n
n = xhtml.Find(n, func(n *html.Node) bool {
for n := range xhtml.All(n) {
if n == root {
return false
continue
}
switch n.Type {
case html.TextNode:
s := strings.ReplaceAll(n.Data, "\n", " ")
s = strings.TrimSpace(s)
if s == "" {
return false
continue
}
case html.ElementNode:
if inlineElements[n.DataAtom] {
return false
continue
}
}
return true
})
return n == nil
return false
}
return true
}
2 changes: 1 addition & 1 deletion internal/blocko/minify.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func Minify(r io.Reader) (*nethtml.Node, error) {
if err != nil {
return nil, err
}
body := xhtml.Find(doc, xhtml.WithBody)
body := xhtml.Select(doc, xhtml.WithBody)
if body == nil {
return nil, fmt.Errorf("could not find body")
}
Expand Down
12 changes: 7 additions & 5 deletions internal/db/gdocs.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ type GDocsMetadata struct {
}

const (
ImageEmbedTag = "image"
RawEmbedTag = "raw"
ToCEmbedTag = "toc"
SpotlightEmbedTag = "spl"
ImageEmbedTag = "image"
RawEmbedTag = "raw"
ToCEmbedTag = "toc"
SpotlightRawEmbedTag = "spl"
PartnerRawEmbedTag = "partner-embed"
PartnerTextTag = "partner-text"
)

type Embed struct {
Expand All @@ -56,7 +58,7 @@ func (em *Embed) UnmarshalJSON(data []byte) error {
return err
}
em.Value = img
case RawEmbedTag, ToCEmbedTag, SpotlightEmbedTag:
case RawEmbedTag, ToCEmbedTag, SpotlightRawEmbedTag, PartnerRawEmbedTag, PartnerTextTag:
var s string
if err := json.Unmarshal(temp.Value, &s); err != nil {
return err
Expand Down
3 changes: 3 additions & 0 deletions internal/db/testdata/gdoc spl/article.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@ Only on *dot* org:
cta="Visit the Election Center →"
limit="12"
>}}

Some <strong>bold</strong> and <em>italic</em> Spotlight PA text. Spotlight PA is blah blah.

2 changes: 1 addition & 1 deletion internal/db/testdata/gdoc spl/doc.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion internal/db/testdata/gdoc spl/page.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"title-tag": "",
"twitter-title": ""
},
"body": "Blah blah blah\n\nLorem <em>ipsum</em> dolor\n\nOnly on *dot* org:\n\n{{thing \n blah=\"hello\"\n/}}\n\n{{<featured/related-stories\n title=\"More Stories from the 2024 Election\"\n link=\"/election\"\n cta=\"Visit the Election Center →\"\n limit=\"12\"\n>}}\n",
"body": "Blah blah blah\n\nLorem <em>ipsum</em> dolor\n\nOnly on *dot* org:\n\n{{thing \n blah=\"hello\"\n/}}\n\n{{<featured/related-stories\n title=\"More Stories from the 2024 Election\"\n link=\"/election\"\n cta=\"Visit the Election Center →\"\n limit=\"12\"\n>}}\n\nSome <strong>bold</strong> and <em>italic</em> Spotlight PA text. Spotlight PA is blah blah.\n\n",
"schedule_for": null,
"last_published": null,
"created_at": "2020-03-15T20:00:00Z",
Expand Down
4 changes: 4 additions & 0 deletions internal/db/testdata/gdoc spl/raw.html
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
<p>Blah blah blah</p>
<p>Lorem <em>ipsum</em> dolor</p>
<script src="http://example.com"></script>
Dismissed Show dismissed Hide dismissed
<p>Some <strong>bold</strong> and <em>italic</em> partner text. Spotlight PA is blah blah.
</p>

4 changes: 4 additions & 0 deletions internal/db/testdata/gdoc spl/rich.html
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
<p>Blah blah blah</p>
<p>Lorem <em>ipsum</em> dolor</p>
<h2 style="color: red;">Embed #1</h2>
<p>Some <strong>bold</strong> and <em>italic</em> partner text. Spotlight PA is blah blah.
</p>

34 changes: 34 additions & 0 deletions internal/iterx/iterx.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//go:build goexperiment.rangefunc

// Package iterx has iteration utilities.
package iterx

import "iter"

// Filter returns a sequence of matching items.
func Filter[T any](seq iter.Seq[T], match func(T) bool) iter.Seq[T] {
return func(yield func(T) bool) {
for v := range seq {
if match(v) && !yield(v) {
return
}
}
}
}

// Collect returns a slice collected from a sequence.
func Collect[T any](seq iter.Seq[T]) []T {
var s []T
for v := range seq {
s = append(s, v)
}
return s
}

// First returns the first item in a sequence or the zero value.
func First[T any](seq iter.Seq[T]) (v T) {
for v := range seq {
return v
}
return
}
4 changes: 2 additions & 2 deletions internal/mailchimp/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ func ImportPage(ctx context.Context, cl *http.Client, page string) (body string,
}

func PageContent(doc *html.Node) (body string, err error) {
bNode := xhtml.Find(doc, xhtml.WithBody)
bNode := xhtml.Select(doc, xhtml.WithBody)
if bNode == nil {
err = fmt.Errorf("could not find body element")
return
}

remove := xhtml.FindAll(bNode, func(n *html.Node) bool {
remove := xhtml.SelectSlice(bNode, func(n *html.Node) bool {
return n.Type == html.CommentNode ||
n.DataAtom == atom.Style ||
n.DataAtom == atom.Script ||
Expand Down
28 changes: 14 additions & 14 deletions internal/xhtml/children.go
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
package xhtml

import (
"iter"
"strings"

"github.com/spotlightpa/almanack/internal/iterx"
"golang.org/x/net/html"
)

// Children returns a slice containing the children of n.
func Children(n *html.Node) []*html.Node {
if n == nil {
return nil
}
count := 0
for c := n.FirstChild; c != nil; c = c.NextSibling {
count++
}
s := make([]*html.Node, 0, count)
for c := n.FirstChild; c != nil; c = c.NextSibling {
s = append(s, c)
// Children returns a seq of the children of n.
func Children(n *html.Node) iter.Seq[*html.Node] {
return func(yield func(*html.Node) bool) {
if n == nil {
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if !yield(c) {
return
}
}
}
return s
}

func ReplaceWith(old, new *html.Node) {
Expand Down Expand Up @@ -79,7 +79,7 @@ func UnnestChildren(n *html.Node) {
if n.Parent == nil {
return
}
children := Children(n)
children := iterx.Collect(Children(n))
RemoveAll(children)
for _, c := range children {
n.Parent.InsertBefore(c, n)
Expand Down
13 changes: 6 additions & 7 deletions internal/xhtml/children_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,37 +32,36 @@ func TestUnnestChildren(t *testing.T) {

{
clone := xhtml.Clone(n)
i := xhtml.Find(clone, xhtml.WithAtom(atom.I))
i := xhtml.Select(clone, xhtml.WithAtom(atom.I))
xhtml.UnnestChildren(i)
be.Equal(t, `<a><b>test <i>one</i> <em><i>two</i></em> </b></a>`,
xhtml.InnerHTML(clone))
}
{
clone := xhtml.Clone(n)
em := xhtml.Find(clone, xhtml.WithAtom(atom.Em))
em := xhtml.Select(clone, xhtml.WithAtom(atom.Em))
xhtml.UnnestChildren(em)
be.Equal(t, `<a><b><i>test</i> <i>one</i> <i>two</i> </b></a>`,
xhtml.InnerHTML(clone))
}
{
clone := xhtml.Clone(n)
a := xhtml.Find(clone, xhtml.WithAtom(atom.A))
a := xhtml.Select(clone, xhtml.WithAtom(atom.A))
xhtml.UnnestChildren(a)
be.Equal(t, `<b><i>test</i> <i>one</i> <em><i>two</i></em> </b>`,
xhtml.InnerHTML(clone))
}
{
clone := xhtml.Clone(n)
b := xhtml.Find(clone, xhtml.WithAtom(atom.B))
b := xhtml.Select(clone, xhtml.WithAtom(atom.B))
xhtml.UnnestChildren(b)
be.Equal(t, `<a><i>test</i> <i>one</i> <em><i>two</i></em> </a>`,
xhtml.InnerHTML(clone))
}
{
clone := xhtml.Clone(n)
is := xhtml.FindAll(clone, xhtml.WithAtom(atom.I))
for _, n := range is {
xhtml.UnnestChildren(n)
for _, c := range xhtml.SelectSlice(clone, xhtml.WithAtom(atom.I)) {
xhtml.UnnestChildren(c)
}
be.Equal(t, `<a><b>test one <em>two</em> </b></a>`,
xhtml.InnerHTML(clone))
Expand Down
Loading
Loading