Skip to content

Commit

Permalink
rework media processing a little bit (#191)
Browse files Browse the repository at this point in the history
* rework media processing a little bit

* review changes
  • Loading branch information
tsmethurst committed Sep 4, 2021
1 parent ff05046 commit 2b14b20
Show file tree
Hide file tree
Showing 13 changed files with 484 additions and 164 deletions.
5 changes: 1 addition & 4 deletions internal/ap/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,10 +384,7 @@ func ExtractAttachment(i Attachmentable) (*gtsmodel.MediaAttachment, error) {
attachment.RemoteURL = attachmentURL.String()

mediaType := i.GetActivityStreamsMediaType()
if mediaType == nil {
return nil, errors.New("no media type")
}
if mediaType.Get() == "" {
if mediaType == nil || mediaType.Get() == "" {
return nil, errors.New("no media type")
}
attachment.File.ContentType = mediaType.Get()
Expand Down
122 changes: 122 additions & 0 deletions internal/ap/extractattachments_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package ap_test

import (
"testing"

"github.com/go-fed/activity/streams"
"github.com/go-fed/activity/streams/vocab"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/testrig"
)

func document1() vocab.ActivityStreamsDocument {
document1 := streams.NewActivityStreamsDocument()

document1MediaType := streams.NewActivityStreamsMediaTypeProperty()
document1MediaType.Set("image/jpeg")
document1.SetActivityStreamsMediaType(document1MediaType)

document1URL := streams.NewActivityStreamsUrlProperty()
document1URL.AppendIRI(testrig.URLMustParse("https://s3-us-west-2.amazonaws.com/plushcity/media_attachments/files/106/867/380/219/163/828/original/88e8758c5f011439.jpg"))
document1.SetActivityStreamsUrl(document1URL)

document1Name := streams.NewActivityStreamsNameProperty()
document1Name.AppendXMLSchemaString("It's a cute plushie.")
document1.SetActivityStreamsName(document1Name)

document1Blurhash := streams.NewTootBlurhashProperty()
document1Blurhash.Set("UxQ0EkRP_4tRxtRjWBt7%hozM_ayV@oLf6WB")
document1.SetTootBlurhash(document1Blurhash)

return document1
}

func attachment1() vocab.ActivityStreamsAttachmentProperty {
attachment1 := streams.NewActivityStreamsAttachmentProperty()
attachment1.AppendActivityStreamsDocument(document1())
return attachment1
}

type ExtractTestSuite struct {
suite.Suite
}

func (suite *ExtractTestSuite) TestExtractAttachments() {
note := streams.NewActivityStreamsNote()
note.SetActivityStreamsAttachment(attachment1())

attachments, err := ap.ExtractAttachments(note)
suite.NoError(err)
suite.Len(attachments, 1)

attachment1 := attachments[0]
suite.Equal("image/jpeg", attachment1.File.ContentType)
suite.Equal("https://s3-us-west-2.amazonaws.com/plushcity/media_attachments/files/106/867/380/219/163/828/original/88e8758c5f011439.jpg", attachment1.RemoteURL)
suite.Equal("It's a cute plushie.", attachment1.Description)
suite.Empty(attachment1.Blurhash) // atm we discard blurhashes and generate them ourselves during processing
}

func (suite *ExtractTestSuite) TestExtractNoAttachments() {
note := streams.NewActivityStreamsNote()

attachments, err := ap.ExtractAttachments(note)
suite.NoError(err)
suite.Empty(attachments)
}

func (suite *ExtractTestSuite) TestExtractAttachmentsMissingContentType() {
d1 := document1()
d1.SetActivityStreamsMediaType(streams.NewActivityStreamsMediaTypeProperty())

a1 := streams.NewActivityStreamsAttachmentProperty()
a1.AppendActivityStreamsDocument(d1)

note := streams.NewActivityStreamsNote()
note.SetActivityStreamsAttachment(a1)

attachments, err := ap.ExtractAttachments(note)
suite.NoError(err)
suite.Empty(attachments)
}

func (suite *ExtractTestSuite) TestExtractAttachmentMissingContentType() {

d1 := document1()
d1.SetActivityStreamsMediaType(streams.NewActivityStreamsMediaTypeProperty())

attachment, err := ap.ExtractAttachment(d1)
suite.EqualError(err, "no media type")
suite.Nil(attachment)
}

func (suite *ExtractTestSuite) TestExtractAttachmentMissingURL() {
d1 := document1()
d1.SetActivityStreamsUrl(streams.NewActivityStreamsUrlProperty())

attachment, err := ap.ExtractAttachment(d1)
suite.EqualError(err, "could not extract url")
suite.Nil(attachment)
}

func TestExtractTestSuite(t *testing.T) {
suite.Run(t, &ExtractTestSuite{})
}
34 changes: 26 additions & 8 deletions internal/federation/dereferencing/attachment.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,23 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)

func (d *deref) GetRemoteAttachment(ctx context.Context, requestingUsername string, remoteAttachmentURI *url.URL, ownerAccountID string, statusID string, expectedContentType string) (*gtsmodel.MediaAttachment, error) {
func (d *deref) GetRemoteAttachment(ctx context.Context, requestingUsername string, minAttachment *gtsmodel.MediaAttachment) (*gtsmodel.MediaAttachment, error) {
if minAttachment.RemoteURL == "" {
return nil, fmt.Errorf("GetRemoteAttachment: minAttachment remote URL was empty")
}
remoteAttachmentURL := minAttachment.RemoteURL

l := d.log.WithFields(logrus.Fields{
"username": requestingUsername,
"remoteAttachmentURI": remoteAttachmentURI,
"remoteAttachmentURL": remoteAttachmentURL,
})

// return early if we already have the attachment somewhere
maybeAttachment := &gtsmodel.MediaAttachment{}
where := []db.Where{
{
Key: "remote_url",
Value: remoteAttachmentURI.String(),
Value: remoteAttachmentURL,
},
}

Expand All @@ -48,12 +54,11 @@ func (d *deref) GetRemoteAttachment(ctx context.Context, requestingUsername stri
return maybeAttachment, nil
}

a, err := d.RefreshAttachment(ctx, requestingUsername, remoteAttachmentURI, ownerAccountID, expectedContentType)
a, err := d.RefreshAttachment(ctx, requestingUsername, minAttachment)
if err != nil {
return nil, fmt.Errorf("GetRemoteAttachment: error refreshing attachment: %s", err)
}

a.StatusID = statusID
if err := d.db.Put(ctx, a); err != nil {
if err != db.ErrAlreadyExists {
return nil, fmt.Errorf("GetRemoteAttachment: error inserting attachment: %s", err)
Expand All @@ -63,19 +68,32 @@ func (d *deref) GetRemoteAttachment(ctx context.Context, requestingUsername stri
return a, nil
}

func (d *deref) RefreshAttachment(ctx context.Context, requestingUsername string, remoteAttachmentURI *url.URL, ownerAccountID string, expectedContentType string) (*gtsmodel.MediaAttachment, error) {
func (d *deref) RefreshAttachment(ctx context.Context, requestingUsername string, minAttachment *gtsmodel.MediaAttachment) (*gtsmodel.MediaAttachment, error) {
// it just doesn't exist or we have to refresh
if minAttachment.AccountID == "" {
return nil, fmt.Errorf("RefreshAttachment: minAttachment account ID was empty")
}

if minAttachment.File.ContentType == "" {
return nil, fmt.Errorf("RefreshAttachment: minAttachment.file.contentType was empty")
}

t, err := d.transportController.NewTransportForUsername(ctx, requestingUsername)
if err != nil {
return nil, fmt.Errorf("RefreshAttachment: error creating transport: %s", err)
}

attachmentBytes, err := t.DereferenceMedia(ctx, remoteAttachmentURI, expectedContentType)
derefURI, err := url.Parse(minAttachment.RemoteURL)
if err != nil {
return nil, err
}

attachmentBytes, err := t.DereferenceMedia(ctx, derefURI, minAttachment.File.ContentType)
if err != nil {
return nil, fmt.Errorf("RefreshAttachment: error dereferencing media: %s", err)
}

a, err := d.mediaHandler.ProcessAttachment(ctx, attachmentBytes, ownerAccountID, remoteAttachmentURI.String())
a, err := d.mediaHandler.ProcessAttachment(ctx, attachmentBytes, minAttachment)
if err != nil {
return nil, fmt.Errorf("RefreshAttachment: error processing attachment: %s", err)
}
Expand Down
106 changes: 106 additions & 0 deletions internal/federation/dereferencing/attachment_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package dereferencing_test

import (
"context"
"testing"

"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)

type AttachmentTestSuite struct {
DereferencerStandardTestSuite
}

func (suite *AttachmentTestSuite) TestDereferenceAttachmentOK() {
fetchingAccount := suite.testAccounts["local_account_1"]

attachmentOwner := "01FENS9F666SEQ6TYQWEEY78GM"
attachmentStatus := "01FENS9NTTVNEX1YZV7GB63MT8"
attachmentContentType := "image/jpeg"
attachmentURL := "https://s3-us-west-2.amazonaws.com/plushcity/media_attachments/files/106/867/380/219/163/828/original/88e8758c5f011439.jpg"
attachmentDescription := "It's a cute plushie."

minAttachment := &gtsmodel.MediaAttachment{
RemoteURL: attachmentURL,
AccountID: attachmentOwner,
StatusID: attachmentStatus,
File: gtsmodel.File{
ContentType: attachmentContentType,
},
Description: attachmentDescription,
}

attachment, err := suite.dereferencer.GetRemoteAttachment(context.Background(), fetchingAccount.Username, minAttachment)
suite.NoError(err)
suite.NotNil(attachment)

suite.Equal(attachmentOwner, attachment.AccountID)
suite.Equal(attachmentStatus, attachment.StatusID)
suite.Equal(attachmentURL, attachment.RemoteURL)
suite.NotEmpty(attachment.URL)
suite.NotEmpty(attachment.Blurhash)
suite.NotEmpty(attachment.ID)
suite.NotEmpty(attachment.CreatedAt)
suite.NotEmpty(attachment.UpdatedAt)
suite.Equal(1.336546184738956, attachment.FileMeta.Original.Aspect)
suite.Equal(2071680, attachment.FileMeta.Original.Size)
suite.Equal(1245, attachment.FileMeta.Original.Height)
suite.Equal(1664, attachment.FileMeta.Original.Width)
suite.Equal("LwQ9yKRP_4t8t7RjWBt7%hozM_ay", attachment.Blurhash)
suite.Equal(gtsmodel.ProcessingStatusProcessed, attachment.Processing)
suite.NotEmpty(attachment.File.Path)
suite.Equal(attachmentContentType, attachment.File.ContentType)
suite.Equal(attachmentDescription, attachment.Description)

suite.NotEmpty(attachment.Thumbnail.Path)
suite.NotEmpty(attachment.Type)

// attachment should also now be in the database
dbAttachment, err := suite.db.GetAttachmentByID(context.Background(), attachment.ID)
suite.NoError(err)
suite.NotNil(dbAttachment)

suite.Equal(attachmentOwner, dbAttachment.AccountID)
suite.Equal(attachmentStatus, dbAttachment.StatusID)
suite.Equal(attachmentURL, dbAttachment.RemoteURL)
suite.NotEmpty(dbAttachment.URL)
suite.NotEmpty(dbAttachment.Blurhash)
suite.NotEmpty(dbAttachment.ID)
suite.NotEmpty(dbAttachment.CreatedAt)
suite.NotEmpty(dbAttachment.UpdatedAt)
suite.Equal(1.336546184738956, dbAttachment.FileMeta.Original.Aspect)
suite.Equal(2071680, dbAttachment.FileMeta.Original.Size)
suite.Equal(1245, dbAttachment.FileMeta.Original.Height)
suite.Equal(1664, dbAttachment.FileMeta.Original.Width)
suite.Equal("LwQ9yKRP_4t8t7RjWBt7%hozM_ay", dbAttachment.Blurhash)
suite.Equal(gtsmodel.ProcessingStatusProcessed, dbAttachment.Processing)
suite.NotEmpty(dbAttachment.File.Path)
suite.Equal(attachmentContentType, dbAttachment.File.ContentType)
suite.Equal(attachmentDescription, dbAttachment.Description)

suite.NotEmpty(dbAttachment.Thumbnail.Path)
suite.NotEmpty(dbAttachment.Type)
}

func TestAttachmentTestSuite(t *testing.T) {
suite.Run(t, new(AttachmentTestSuite))
}
30 changes: 28 additions & 2 deletions internal/federation/dereferencing/dereferencer.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,34 @@ type Dereferencer interface {

GetRemoteInstance(ctx context.Context, username string, remoteInstanceURI *url.URL) (*gtsmodel.Instance, error)

GetRemoteAttachment(ctx context.Context, username string, remoteAttachmentURI *url.URL, ownerAccountID string, statusID string, expectedContentType string) (*gtsmodel.MediaAttachment, error)
RefreshAttachment(ctx context.Context, requestingUsername string, remoteAttachmentURI *url.URL, ownerAccountID string, expectedContentType string) (*gtsmodel.MediaAttachment, error)
// GetRemoteAttachment takes a minimal attachment struct and converts it into a fully fleshed out attachment, stored in the database and instance storage.
//
// The parameter minAttachment must have at least the following fields defined:
// * minAttachment.RemoteURL
// * minAttachment.AccountID
// * minAttachment.File.ContentType
//
// The returned attachment will have an ID generated for it, so no need to generate one beforehand.
// A blurhash will also be generated for the attachment.
//
// Most other fields will be preserved on the passed attachment, including:
// * minAttachment.StatusID
// * minAttachment.CreatedAt
// * minAttachment.UpdatedAt
// * minAttachment.FileMeta
// * minAttachment.AccountID
// * minAttachment.Description
// * minAttachment.ScheduledStatusID
// * minAttachment.Thumbnail.RemoteURL
// * minAttachment.Avatar
// * minAttachment.Header
//
// GetRemoteAttachment will return early if an attachment with the same value as minAttachment.RemoteURL
// is found in the database -- then that attachment will be returned and nothing else will be changed or stored.
GetRemoteAttachment(ctx context.Context, requestingUsername string, minAttachment *gtsmodel.MediaAttachment) (*gtsmodel.MediaAttachment, error)
// RefreshAttachment is like GetRemoteAttachment, but the attachment will always be dereferenced again,
// whether or not it was already stored in the database.
RefreshAttachment(ctx context.Context, requestingUsername string, minAttachment *gtsmodel.MediaAttachment) (*gtsmodel.MediaAttachment, error)

DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error
DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error
Expand Down

0 comments on commit 2b14b20

Please sign in to comment.