Skip to content

Commit

Permalink
Rewrote addSpansToNode to make it less hacky (fixes #10)
Browse files Browse the repository at this point in the history
  • Loading branch information
pgaskin committed Dec 16, 2017
1 parent 3249342 commit 950cd0e
Showing 1 changed file with 34 additions and 18 deletions.
52 changes: 34 additions & 18 deletions kepub/content.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package kepub

import (
"bytes"
"fmt"
"os"
"path/filepath"
Expand Down Expand Up @@ -63,18 +62,39 @@ func addDivs(doc *goquery.Document) error {
return nil
}

// createSpan creates a Kobo span
func createSpan(paragraph, segment int, text string) *html.Node {
span := &html.Node{
Type: html.ElementNode,
Data: "span",
Attr: []html.Attribute{
html.Attribute{
Key: "class",
Val: "koboSpan",
},
html.Attribute{
Key: "id",
Val: fmt.Sprintf("kobo.%v.%v", paragraph, segment),
},
},
}

span.AppendChild(&html.Node{
Type: html.TextNode,
Data: text,
})

return span
}

// addSpansToNode is a recursive helper function for addSpans.
func addSpansToNode(node *html.Node, paragraph *int, segment *int) {
sentencere := regexp.MustCompile(`((?m).*?[\.\!\?\:]['"”’“…]?\s*)`)
sentencere := regexp.MustCompile(`((?ms).*?[\.\!\?\:]['"”’“…]?\s*)`)

// Part 2 of hacky way of setting innerhtml of a textnode by double escaping everything, and deescaping once afterwards
newAttr := []html.Attribute{}
for _, a := range node.Attr {
a.Key = html.EscapeString(a.Key)
a.Val = html.EscapeString(a.Val)
newAttr = append(newAttr, a)
nextNodes := []*html.Node{}
for c := node.FirstChild; c != nil; c = c.NextSibling {
nextNodes = append(nextNodes, c)
}
node.Attr = newAttr

if node.Type == html.TextNode {
if node.Parent.Data == "pre" {
Expand All @@ -99,20 +119,18 @@ func addSpansToNode(node *html.Node, paragraph *int, segment *int) {
sentences = append(sentences, node.Data[lasti[1]:len(node.Data)])
}

var newhtml bytes.Buffer

for _, sentence := range sentences {
if strings.TrimSpace(sentence) != "" {
newhtml.WriteString(fmt.Sprintf(`<span class="koboSpan" id="kobo.%v.%v">%s</span>`, *paragraph, *segment, html.EscapeString(sentence)))
node.Parent.InsertBefore(createSpan(*paragraph, *segment, sentence), node)
*segment++
}
}

// Part 1 of hacky way of setting innerhtml of a textnode by double escaping everything, and deescaping once afterwards
node.Data = newhtml.String()
node.Parent.RemoveChild(node)

return
}

if node.Type != html.ElementNode {
return
}
Expand All @@ -123,7 +141,8 @@ func addSpansToNode(node *html.Node, paragraph *int, segment *int) {
*segment = 0
*paragraph++
}
for c := node.FirstChild; c != nil; c = c.NextSibling {

for _, c := range nextNodes {
addSpansToNode(c, paragraph, segment)
}
}
Expand Down Expand Up @@ -218,9 +237,6 @@ func process(content *string) error {
return err
}

// Part 3 of hacky way of setting innerhtml of a textnode by double escaping everything, and deescaping once afterwards. Must be done before further html processing
h = html.UnescapeString(h)

if err := openSelfClosingPs(&h); err != nil {
return err
}
Expand Down

0 comments on commit 950cd0e

Please sign in to comment.