Skip to content

Commit

Permalink
[feature] Unused avatar and header cleanup (#574)
Browse files Browse the repository at this point in the history
* rename + tidy up remote pruning

* fix media attachment account join
see https://bun.uptrace.dev/guide/golang-orm.html#table-relationships

* update logging to new function name

* add get avatars and headers to bun

* add pruneallmeta function

* don't set uncached since we're deleting anyway

* fix totalPruned being in wrong place

* test pruning meta

* go fmt ./...

* rename mediaprune

* add meta pruning to routine mediaprune

* tidy up cleanup job scheduling

* rename adminmediaremoteprune

* update mediacleanup to use renamed prune func

* update swagger docs a little bit

* reuse cancel + context
  • Loading branch information
tsmethurst committed May 15, 2022
1 parent 6e947ff commit b143877
Show file tree
Hide file tree
Showing 17 changed files with 365 additions and 80 deletions.
2 changes: 1 addition & 1 deletion cmd/gotosocial/action/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ var Start action.GTSAction = func(ctx context.Context) error {
}

// perform initial media prune in case value of MediaRemoteCacheDays changed
if err := processor.AdminMediaRemotePrune(ctx, viper.GetInt(config.Keys.MediaRemoteCacheDays)); err != nil {
if err := processor.AdminMediaPrune(ctx, viper.GetInt(config.Keys.MediaRemoteCacheDays)); err != nil {
return fmt.Errorf("error during initial media prune: %s", err)
}

Expand Down
1 change: 1 addition & 0 deletions docs/api/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2580,6 +2580,7 @@ paths:
- application/json
- application/xml
- application/x-www-form-urlencoded
description: Also cleans up unused headers + avatars from the media cache.
operationId: mediaCleanup
parameters:
- description: |-
Expand Down
3 changes: 2 additions & 1 deletion internal/api/client/admin/mediacleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
// MediaCleanupPOSTHandler swagger:operation POST /api/v1/admin/media_cleanup mediaCleanup
//
// Clean up remote media older than the specified number of days.
// Also cleans up unused headers + avatars from the media cache.
//
// ---
// tags:
Expand Down Expand Up @@ -100,7 +101,7 @@ func (m *Module) MediaCleanupPOSTHandler(c *gin.Context) {
remoteCacheDays = 0
}

if errWithCode := m.processor.AdminMediaRemotePrune(c.Request.Context(), remoteCacheDays); errWithCode != nil {
if errWithCode := m.processor.AdminMediaPrune(c.Request.Context(), remoteCacheDays); errWithCode != nil {
l.Debugf("error starting prune of remote media: %s", errWithCode.Error())
c.JSON(errWithCode.Code(), gin.H{"error": errWithCode.Safe()})
return
Expand Down
26 changes: 26 additions & 0 deletions internal/db/bundb/media.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,29 @@ func (m *mediaDB) GetRemoteOlderThan(ctx context.Context, olderThan time.Time, l
}
return attachments, nil
}

func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachments := []*gtsmodel.MediaAttachment{}

q := m.newMediaQ(&attachments).
WhereGroup(" AND ", func(innerQ *bun.SelectQuery) *bun.SelectQuery {
return innerQ.
WhereOr("media_attachment.avatar = true").
WhereOr("media_attachment.header = true")
}).
Order("media_attachment.id DESC")

if maxID != "" {
q = q.Where("media_attachment.id < ?", maxID)
}

if limit != 0 {
q = q.Limit(limit)
}

if err := q.Scan(ctx); err != nil {
return nil, m.conn.ProcessError(err)
}

return attachments, nil
}
8 changes: 8 additions & 0 deletions internal/db/bundb/media_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ func (suite *MediaTestSuite) TestGetOlder() {
suite.Len(attachments, 2)
}

func (suite *MediaTestSuite) TestGetAvisAndHeaders() {
ctx := context.Background()

attachments, err := suite.db.GetAvatarsAndHeaders(ctx, "", 20)
suite.NoError(err)
suite.Len(attachments, 2)
}

func TestMediaTestSuite(t *testing.T) {
suite.Run(t, new(MediaTestSuite))
}
3 changes: 3 additions & 0 deletions internal/db/media.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,7 @@ type Media interface {
// The selected media attachments will be those with both a URL and a RemoteURL filled in.
// In other words, media attachments that originated remotely, and that we currently have cached locally.
GetRemoteOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, Error)
// GetAvatarsAndHeaders fetches limit n avatars and headers with an id < maxID. These headers
// and avis may be in use or not; the caller should check this if it's important.
GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
}
2 changes: 1 addition & 1 deletion internal/gtsmodel/mediaattachment.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ type MediaAttachment struct {
Type FileType `validate:"oneof=Image Gif Audio Video Unknown" bun:",nullzero,notnull"` // Type of file (image/gif/audio/video)
FileMeta FileMeta `validate:"required" bun:",embed:filemeta_,nullzero,notnull"` // Metadata about the file
AccountID string `validate:"required,ulid" bun:"type:CHAR(26),nullzero,notnull"` // To which account does this attachment belong
Account *Account `validate:"-" bun:"rel:has-one"` // Account corresponding to accountID
Account *Account `validate:"-" bun:"rel:belongs-to,join:account_id=id"` // Account corresponding to accountID
Description string `validate:"-" bun:""` // Description of the attachment (for screenreaders)
ScheduledStatusID string `validate:"omitempty,ulid" bun:"type:CHAR(26),nullzero"` // To which scheduled status does this attachment belong
Blurhash string `validate:"required_if=Type Image,required_if=Type Gif,required_if=Type Video" bun:",nullzero"` // What is the generated blurhash of this attachment
Expand Down
122 changes: 71 additions & 51 deletions internal/media/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/db"
)

// selectPruneLimit is the amount of media entries to select at a time from the db when pruning
const selectPruneLimit = 20

// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
type Manager interface {
// ProcessMedia begins the process of decoding and storing the given data as an attachment.
Expand Down Expand Up @@ -66,10 +69,19 @@ type Manager interface {
ProcessEmoji(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, shortcode string, id string, uri string, ai *AdditionalEmojiInfo) (*ProcessingEmoji, error)
// RecacheMedia refetches, reprocesses, and recaches an existing attachment that has been uncached via pruneRemote.
RecacheMedia(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, attachmentID string) (*ProcessingMedia, error)
// PruneRemote prunes all remote media cached on this instance that's older than the given amount of days.

// PruneAllRemote prunes all remote media attachments cached on this instance which are older than the given amount of days.
// 'Pruning' in this context means removing the locally stored data of the attachment (both thumbnail and full size),
// and setting 'cached' to false on the associated attachment.
PruneRemote(ctx context.Context, olderThanDays int) (int, error)
//
// The returned int is the amount of media that was pruned by this function.
PruneAllRemote(ctx context.Context, olderThanDays int) (int, error)
// PruneAllMeta prunes unused meta media -- currently, this means unused avatars + headers, but can also be extended
// to include things like attachments that were uploaded on this server but left unused, etc.
//
// The returned int is the amount of media that was pruned by this function.
PruneAllMeta(ctx context.Context) (int, error)

// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
// It will block until workers are finished processing.
Expand Down Expand Up @@ -128,53 +140,8 @@ func NewManager(database db.DB, storage *kv.KVStore) (Manager, error) {
return nil, err
}

// start remote cache cleanup cronjob if configured
cacheCleanupDays := viper.GetInt(config.Keys.MediaRemoteCacheDays)
if cacheCleanupDays != 0 {
// we need a way of cancelling running jobs if the media manager is told to stop
pruneCtx, pruneCancel := context.WithCancel(context.Background())

// create a new cron instance and add a function to it
c := cron.New(cron.WithLogger(&logrusWrapper{}))

pruneFunc := func() {
begin := time.Now()
pruned, err := m.PruneRemote(pruneCtx, cacheCleanupDays)
if err != nil {
logrus.Errorf("media manager: error pruning remote cache: %s", err)
return
}
logrus.Infof("media manager: pruned %d remote cache entries in %s", pruned, time.Since(begin))
}

// run every night
entryID, err := c.AddFunc("@midnight", pruneFunc)
if err != nil {
pruneCancel()
return nil, fmt.Errorf("error starting media manager remote cache cleanup job: %s", err)
}

// since we're running a cron job, we should define how the manager should stop them
m.stopCronJobs = func() error {
// try to stop any jobs gracefully by waiting til they're finished
cronCtx := c.Stop()

select {
case <-cronCtx.Done():
logrus.Infof("media manager: cron finished jobs and stopped gracefully")
case <-time.After(1 * time.Minute):
logrus.Infof("media manager: cron didn't stop after 60 seconds, will force close")
break
}

// whether the job is finished neatly or we had to wait a minute, cancel the context on the prune job
pruneCancel()
return nil
}

// now start all the cron stuff we've lined up
c.Start()
logrus.Infof("media manager: next scheduled remote cache cleanup is %q", c.Entry(entryID).Next)
if err := scheduleCleanupJobs(m); err != nil {
return nil, err
}

return m, nil
Expand Down Expand Up @@ -213,9 +180,7 @@ func (m *manager) Stop() error {
emojiErr := m.emojiWorker.Stop()

var cronErr error

if m.stopCronJobs != nil {
// only set if cache prune age > 0
cronErr = m.stopCronJobs()
}

Expand All @@ -224,5 +189,60 @@ func (m *manager) Stop() error {
} else if emojiErr != nil {
return emojiErr
}

return cronErr
}

func scheduleCleanupJobs(m *manager) error {
// create a new cron instance for scheduling cleanup jobs
c := cron.New(cron.WithLogger(&logrusWrapper{}))
pruneCtx, pruneCancel := context.WithCancel(context.Background())

if _, err := c.AddFunc("@midnight", func() {
begin := time.Now()
pruned, err := m.PruneAllMeta(pruneCtx)
if err != nil {
logrus.Errorf("media manager: error pruning meta: %s", err)
return
}
logrus.Infof("media manager: pruned %d meta entries in %s", pruned, time.Since(begin))
}); err != nil {
pruneCancel()
return fmt.Errorf("error starting media manager meta cleanup job: %s", err)
}

// start remote cache cleanup cronjob if configured
if mediaRemoteCacheDays := viper.GetInt(config.Keys.MediaRemoteCacheDays); mediaRemoteCacheDays > 0 {
if _, err := c.AddFunc("@midnight", func() {
begin := time.Now()
pruned, err := m.PruneAllRemote(pruneCtx, mediaRemoteCacheDays)
if err != nil {
logrus.Errorf("media manager: error pruning remote cache: %s", err)
return
}
logrus.Infof("media manager: pruned %d remote cache entries in %s", pruned, time.Since(begin))
}); err != nil {
pruneCancel()
return fmt.Errorf("error starting media manager remote cache cleanup job: %s", err)
}
}

// try to stop any jobs gracefully by waiting til they're finished
m.stopCronJobs = func() error {
cronCtx := c.Stop()

select {
case <-cronCtx.Done():
logrus.Infof("media manager: cron finished jobs and stopped gracefully")
case <-time.After(1 * time.Minute):
logrus.Infof("media manager: cron didn't stop after 60 seconds, will force close jobs")
break
}

pruneCancel()
return nil
}

c.Start()
return nil
}
2 changes: 2 additions & 0 deletions internal/media/media_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ type MediaStandardTestSuite struct {
storage *kv.KVStore
manager media.Manager
testAttachments map[string]*gtsmodel.MediaAttachment
testAccounts map[string]*gtsmodel.Account
}

func (suite *MediaStandardTestSuite) SetupSuite() {
Expand All @@ -48,6 +49,7 @@ func (suite *MediaStandardTestSuite) SetupTest() {
testrig.StandardStorageSetup(suite.storage, "../../testrig/media")
testrig.StandardDBSetup(suite.db, nil)
suite.testAttachments = testrig.NewTestAttachments()
suite.testAccounts = testrig.NewTestAccounts()
suite.manager = testrig.NewTestMediaManager(suite.db, suite.storage)
}

Expand Down
87 changes: 87 additions & 0 deletions internal/media/prunemeta.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package media

import (
"context"

"codeberg.org/gruf/go-store/storage"
"github.com/sirupsen/logrus"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)

func (m *manager) PruneAllMeta(ctx context.Context) (int, error) {
var totalPruned int
var maxID string
var attachments []*gtsmodel.MediaAttachment
var err error

// select 20 attachments at a time and prune them
for attachments, err = m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) {
// use the id of the last attachment in the slice as the next 'maxID' value
l := len(attachments)
logrus.Tracef("PruneAllMeta: got %d attachments with maxID < %s", l, maxID)
maxID = attachments[l-1].ID

// prune each attachment that meets one of the following criteria:
// - has no owning account in the database
// - is a header but isn't the owning account's current header
// - is an avatar but isn't the owning account's current avatar
for _, attachment := range attachments {
if attachment.Account == nil ||
(attachment.Header && attachment.ID != attachment.Account.HeaderMediaAttachmentID) ||
(attachment.Avatar && attachment.ID != attachment.Account.AvatarMediaAttachmentID) {
if err := m.pruneOneAvatarOrHeader(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
}

// make sure we don't have a real error when we leave the loop
if err != nil && err != db.ErrNoEntries {
return totalPruned, err
}

logrus.Infof("PruneAllMeta: finished pruning avatars + headers: pruned %d entries", totalPruned)
return totalPruned, nil
}

func (m *manager) pruneOneAvatarOrHeader(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
if attachment.File.Path != "" {
// delete the full size attachment from storage
logrus.Tracef("pruneOneAvatarOrHeader: deleting %s", attachment.File.Path)
if err := m.storage.Delete(attachment.File.Path); err != nil && err != storage.ErrNotFound {
return err
}
}

if attachment.Thumbnail.Path != "" {
// delete the thumbnail from storage
logrus.Tracef("pruneOneAvatarOrHeader: deleting %s", attachment.Thumbnail.Path)
if err := m.storage.Delete(attachment.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
return err
}
}

// delete the attachment entry completely
return m.db.DeleteByID(ctx, attachment.ID, &gtsmodel.MediaAttachment{})
}
Loading

0 comments on commit b143877

Please sign in to comment.