Skip to content

Commit

Permalink
[chore] Text formatting overhaul (#1406)
Browse files Browse the repository at this point in the history
* Implement goldmark debug print for hashtags and mentions

* Minify HTML in FromPlain

* Convert plaintext status parser to goldmark

* Move mention/tag/emoji finding logic into formatter

* Combine mention and hashtag boundary characters

* Normalize unicode when rendering hashtags
  • Loading branch information
autumnull committed Feb 3, 2023
1 parent 271da01 commit 49beb17
Show file tree
Hide file tree
Showing 26 changed files with 821 additions and 1,309 deletions.
6 changes: 3 additions & 3 deletions internal/api/client/statuses/statuscreate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ func (suite *StatusCreateTestSuite) TestPostAnotherNewStatus() {
err = json.Unmarshal(b, statusReply)
suite.NoError(err)

suite.Equal("<p><a href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>test</span></a> alright, should be able to post <a href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>links</span></a> with fragments in them now, let&#39;s see........<br/><br/><a href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"noopener nofollow noreferrer\" target=\"_blank\">docs.gotosocial.org/en/latest/user_guide/posts/#links</a><br/><br/><a href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>gotosocial</span></a><br/><br/>(tobi remember to pull the docker image challenge)</p>", statusReply.Content)
suite.Equal("<p><a href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>test</span></a> alright, should be able to post <a href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>links</span></a> with fragments in them now, let's see........<br><br><a href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://docs.gotosocial.org/en/latest/user_guide/posts/#links</a><br><br><a href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>gotosocial</span></a><br><br>(tobi remember to pull the docker image challenge)</p>", statusReply.Content)
}

func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() {
Expand Down Expand Up @@ -252,7 +252,7 @@ func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() {
suite.NoError(err)

suite.Equal("", statusReply.SpoilerText)
suite.Equal("<p>here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow: <br/> here&#39;s an emoji that isn&#39;t in the db: :test_emoji:</p>", statusReply.Content)
suite.Equal("<p>here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:<br>here's an emoji that isn't in the db: :test_emoji:</p>", statusReply.Content)

suite.Len(statusReply.Emojis, 1)
apiEmoji := statusReply.Emojis[0]
Expand Down Expand Up @@ -371,7 +371,7 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() {
suite.NoError(err)

suite.Equal("", statusResponse.SpoilerText)
suite.Equal("<p>here&#39;s an image attachment</p>", statusResponse.Content)
suite.Equal("<p>here's an image attachment</p>", statusResponse.Content)
suite.False(statusResponse.Sensitive)
suite.Equal(apimodel.VisibilityPublic, statusResponse.Visibility)

Expand Down
63 changes: 30 additions & 33 deletions internal/db/bundb/bundb.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,43 +473,40 @@ func sqlitePragmas(ctx context.Context, conn *DBConn) error {
CONVERSION FUNCTIONS
*/

func (dbService *DBService) TagStringsToTags(ctx context.Context, tags []string, originAccountID string) ([]*gtsmodel.Tag, error) {
func (dbService *DBService) TagStringToTag(ctx context.Context, t string, originAccountID string) (*gtsmodel.Tag, error) {
protocol := config.GetProtocol()
host := config.GetHost()
now := time.Now()

newTags := []*gtsmodel.Tag{}
for _, t := range tags {
tag := &gtsmodel.Tag{}
// we can use selectorinsert here to create the new tag if it doesn't exist already
// inserted will be true if this is a new tag we just created
if err := dbService.conn.NewSelect().Model(tag).Where("LOWER(?) = LOWER(?)", bun.Ident("name"), t).Scan(ctx); err != nil {
if err == sql.ErrNoRows {
// tag doesn't exist yet so populate it
newID, err := id.NewRandomULID()
if err != nil {
return nil, err
}
tag.ID = newID
tag.URL = fmt.Sprintf("%s://%s/tags/%s", protocol, host, t)
tag.Name = t
tag.FirstSeenFromAccountID = originAccountID
tag.CreatedAt = time.Now()
tag.UpdatedAt = time.Now()
useable := true
tag.Useable = &useable
listable := true
tag.Listable = &listable
} else {
return nil, fmt.Errorf("error getting tag with name %s: %s", t, err)
}
}
tag := &gtsmodel.Tag{}
// we can use selectorinsert here to create the new tag if it doesn't exist already
// inserted will be true if this is a new tag we just created
if err := dbService.conn.NewSelect().Model(tag).Where("LOWER(?) = LOWER(?)", bun.Ident("name"), t).Scan(ctx); err != nil && err != sql.ErrNoRows {
return nil, fmt.Errorf("error getting tag with name %s: %s", t, err)
}

// bail already if the tag isn't useable
if !*tag.Useable {
continue
if tag.ID == "" {
// tag doesn't exist yet so populate it
newID, err := id.NewRandomULID()
if err != nil {
return nil, err
}
tag.LastStatusAt = time.Now()
newTags = append(newTags, tag)
tag.ID = newID
tag.URL = protocol + "://" + host + "/tags/" + t
tag.Name = t
tag.FirstSeenFromAccountID = originAccountID
tag.CreatedAt = now
tag.UpdatedAt = now
useable := true
tag.Useable = &useable
listable := true
tag.Listable = &listable
}

// bail already if the tag isn't useable
if !*tag.Useable {
return nil, fmt.Errorf("tag %s is not useable", t)
}
return newTags, nil
tag.LastStatusAt = now
return tag, nil
}
8 changes: 4 additions & 4 deletions internal/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ type DB interface {
USEFUL CONVERSION FUNCTIONS
*/

// TagStringsToTags takes a slice of deduplicated, lowercase tags in the form "somehashtag", which have been
// TagStringToTag takes a lowercase tag in the form "somehashtag", which has been
// used in a status. It takes the id of the account that wrote the status, and the id of the status itself, and then
// returns a slice of *apimodel.Tag corresponding to the given tags. If the tag already exists in database, that tag
// returns an *apimodel.Tag corresponding to the given tags. If the tag already exists in database, that tag
// will be returned. Otherwise a pointer to a new tag struct will be created and returned.
//
// Note: this func doesn't/shouldn't do any manipulation of the tags in the DB, it's just for checking
// Note: this func doesn't/shouldn't do any manipulation of tags in the DB, it's just for checking
// if they exist in the db already, and conveniently returning them, or creating new tag structs.
TagStringsToTags(ctx context.Context, tags []string, originAccountID string) ([]*gtsmodel.Tag, error)
TagStringToTag(ctx context.Context, tag string, originAccountID string) (*gtsmodel.Tag, error)
}
81 changes: 18 additions & 63 deletions internal/processing/account/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,12 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/ap"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/messages"
"github.com/superseriousbusiness/gotosocial/internal/text"
"github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/superseriousbusiness/gotosocial/internal/validate"
)

Expand All @@ -47,14 +45,20 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form
account.Bot = form.Bot
}

var updateEmojis bool
account.Emojis = []*gtsmodel.Emoji{}
account.EmojiIDs = []string{}

if form.DisplayName != nil {
if err := validate.DisplayName(*form.DisplayName); err != nil {
return nil, gtserror.NewErrorBadRequest(err)
}
account.DisplayName = text.SanitizePlaintext(*form.DisplayName)
updateEmojis = true

formatResult := p.formatter.FromPlainEmojiOnly(ctx, p.parseMention, account.ID, "", account.DisplayName)
for _, emoji := range formatResult.Emojis {
account.Emojis = append(account.Emojis, emoji)
account.EmojiIDs = append(account.EmojiIDs, emoji.ID)
}
}

if form.Note != nil {
Expand All @@ -66,36 +70,19 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form
account.NoteRaw = *form.Note

// Process note to generate a valid HTML representation
note, err := p.processNote(ctx, *form.Note, account)
if err != nil {
return nil, gtserror.NewErrorBadRequest(err)
var f text.FormatFunc
if account.StatusFormat == "markdown" {
f = p.formatter.FromMarkdown
} else {
f = p.formatter.FromPlain
}
formatted := f(ctx, p.parseMention, account.ID, "", *form.Note)

// Set updated HTML-ified note
account.Note = note
updateEmojis = true
}

if updateEmojis {
// account emojis -- treat the sanitized display name and raw
// note like one long text for the purposes of deriving emojis
accountEmojiShortcodes := util.DeriveEmojisFromText(account.DisplayName + "\n\n" + account.NoteRaw)
account.Emojis = make([]*gtsmodel.Emoji, 0, len(accountEmojiShortcodes))
account.EmojiIDs = make([]string, 0, len(accountEmojiShortcodes))

for _, shortcode := range accountEmojiShortcodes {
emoji, err := p.db.GetEmojiByShortcodeDomain(ctx, shortcode, "")
if err != nil {
if err != db.ErrNoEntries {
log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err)
}
continue
}

if *emoji.VisibleInPicker && !*emoji.Disabled {
account.Emojis = append(account.Emojis, emoji)
account.EmojiIDs = append(account.EmojiIDs, emoji.ID)
}
account.Note = formatted.HTML
for _, emoji := range formatted.Emojis {
account.Emojis = append(account.Emojis, emoji)
account.EmojiIDs = append(account.EmojiIDs, emoji.ID)
}
}

Expand Down Expand Up @@ -240,35 +227,3 @@ func (p *processor) UpdateHeader(ctx context.Context, header *multipart.FileHead

return processingMedia.LoadAttachment(ctx)
}

func (p *processor) processNote(ctx context.Context, note string, account *gtsmodel.Account) (string, error) {
if note == "" {
return "", nil
}

tagStrings := util.DeriveHashtagsFromText(note)
tags, err := p.db.TagStringsToTags(ctx, tagStrings, account.ID)
if err != nil {
return "", err
}

mentionStrings := util.DeriveMentionNamesFromText(note)
mentions := []*gtsmodel.Mention{}
for _, mentionString := range mentionStrings {
mention, err := p.parseMention(ctx, mentionString, account.ID, "")
if err != nil {
continue
}
mentions = append(mentions, mention)
}

// TODO: support emojis in account notes
// emojiStrings := util.DeriveEmojisFromText(note)
// emojis, err := p.db.EmojiStringsToEmojis(ctx, emojiStrings)

if account.StatusFormat == "markdown" {
return p.formatter.FromMarkdown(ctx, note, mentions, tags, nil), nil
}

return p.formatter.FromPlain(ctx, note, mentions, tags), nil
}
4 changes: 2 additions & 2 deletions internal/processing/account/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ func (suite *AccountUpdateTestSuite) TestAccountUpdateWithMention() {
var (
locked = true
displayName = "new display name"
note = "#hello here i am!\n\ngo check out @1happyturtle, they have a cool account!\n"
noteExpected = "<p><a href=\"http://localhost:8080/tags/hello\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hello</span></a> here i am!<br/><br/>go check out <span class=\"h-card\"><a href=\"http://localhost:8080/@1happyturtle\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>1happyturtle</span></a></span>, they have a cool account!</p>"
note = "#hello here i am!\n\ngo check out @1happyturtle, they have a cool account!"
noteExpected = "<p><a href=\"http://localhost:8080/tags/hello\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hello</span></a> here i am!<br><br>go check out <span class=\"h-card\"><a href=\"http://localhost:8080/@1happyturtle\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>1happyturtle</span></a></span>, they have a cool account!</p>"
)

form := &apimodel.UpdateCredentialsRequest{
Expand Down
12 changes: 0 additions & 12 deletions internal/processing/status/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,6 @@ func (p *processor) Create(ctx context.Context, account *gtsmodel.Account, appli
return nil, gtserror.NewErrorInternalError(err)
}

if err := p.ProcessMentions(ctx, form, account.ID, newStatus); err != nil {
return nil, gtserror.NewErrorInternalError(err)
}

if err := p.ProcessTags(ctx, form, account.ID, newStatus); err != nil {
return nil, gtserror.NewErrorInternalError(err)
}

if err := p.ProcessEmojis(ctx, form, account.ID, newStatus); err != nil {
return nil, gtserror.NewErrorInternalError(err)
}

if err := p.ProcessContent(ctx, form, account.ID, newStatus); err != nil {
return nil, gtserror.NewErrorInternalError(err)
}
Expand Down
3 changes: 0 additions & 3 deletions internal/processing/status/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,6 @@ type Processor interface {
ProcessReplyToID(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, thisAccountID string, status *gtsmodel.Status) gtserror.WithCode
ProcessMediaIDs(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, thisAccountID string, status *gtsmodel.Status) gtserror.WithCode
ProcessLanguage(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountDefaultLanguage string, status *gtsmodel.Status) error
ProcessMentions(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error
ProcessTags(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error
ProcessEmojis(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error
ProcessContent(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error
}

Expand Down
Loading

0 comments on commit 49beb17

Please sign in to comment.