Skip to content

Commit

Permalink
[feature] Cleanup unattached local media (#680)
Browse files Browse the repository at this point in the history
* add localUnattached db function

* add parseOlderThan util function

* add pruneunusedlocalattachments to media manager

* add unusedlocal pruning to schedule + admin call

* set number of days to keep as a const

* fix test
  • Loading branch information
tsmethurst committed Jun 30, 2022
1 parent 07620ac commit 9e7d022
Show file tree
Hide file tree
Showing 9 changed files with 253 additions and 10 deletions.
26 changes: 26 additions & 0 deletions internal/db/bundb/media.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,29 @@ func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit

return attachments, nil
}

func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachments := []*gtsmodel.MediaAttachment{}

q := m.newMediaQ(&attachments).
Where("media_attachment.cached = true").
Where("media_attachment.avatar = false").
Where("media_attachment.header = false").
Where("media_attachment.created_at < ?", olderThan).
Where("media_attachment.remote_url IS NULL").
Where("media_attachment.status_id IS NULL")

if maxID != "" {
q = q.Where("media_attachment.id < ?", maxID)
}

if limit != 0 {
q = q.Limit(limit)
}

if err := q.Scan(ctx); err != nil {
return nil, m.conn.ProcessError(err)
}

return attachments, nil
}
9 changes: 9 additions & 0 deletions internal/db/bundb/media_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"time"

"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/testrig"
)

type MediaTestSuite struct {
Expand Down Expand Up @@ -51,6 +52,14 @@ func (suite *MediaTestSuite) TestGetAvisAndHeaders() {
suite.Len(attachments, 2)
}

func (suite *MediaTestSuite) TestGetLocalUnattachedOlderThan() {
ctx := context.Background()

attachments, err := suite.db.GetLocalUnattachedOlderThan(ctx, testrig.TimeMustParse("2090-06-04T13:12:00Z"), "", 10)
suite.NoError(err)
suite.Len(attachments, 1)
}

func TestMediaTestSuite(t *testing.T) {
suite.Run(t, new(MediaTestSuite))
}
5 changes: 5 additions & 0 deletions internal/db/media.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ type Media interface {
// GetAvatarsAndHeaders fetches limit n avatars and headers with an id < maxID. These headers
// and avis may be in use or not; the caller should check this if it's important.
GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
// GetLocalUnattachedOlderThan fetches limit n local media attachments, older than the given time, which
// aren't header or avatars, and aren't attached to a status. In other words, attachments which were uploaded
// but never used for whatever reason, or attachments that were attached to a status which was subsequently
// deleted.
GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
}
26 changes: 24 additions & 2 deletions internal/media/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ import (
// selectPruneLimit is the amount of media entries to select at a time from the db when pruning
const selectPruneLimit = 20

// UnusedLocalAttachmentCacheDays is the amount of days to keep local media in storage if it
// is not attached to a status, or was never attached to a status.
const UnusedLocalAttachmentCacheDays = 3

// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
type Manager interface {
// ProcessMedia begins the process of decoding and storing the given data as an attachment.
Expand Down Expand Up @@ -75,11 +79,16 @@ type Manager interface {
//
// The returned int is the amount of media that was pruned by this function.
PruneAllRemote(ctx context.Context, olderThanDays int) (int, error)
// PruneAllMeta prunes unused meta media -- currently, this means unused avatars + headers, but can also be extended
// to include things like attachments that were uploaded on this server but left unused, etc.
// PruneAllMeta prunes unused/out of date headers and avatars cached on this instance.
//
// The returned int is the amount of media that was pruned by this function.
PruneAllMeta(ctx context.Context) (int, error)
// PruneUnusedLocalAttachments prunes unused media attachments that were uploaded by
// a user on this instance, but never actually attached to a status, or attached but
// later detached.
//
// The returned int is the amount of media that was pruned by this function.
PruneUnusedLocalAttachments(ctx context.Context) (int, error)

// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
Expand Down Expand Up @@ -210,6 +219,19 @@ func scheduleCleanupJobs(m *manager) error {
return fmt.Errorf("error starting media manager meta cleanup job: %s", err)
}

if _, err := c.AddFunc("@midnight", func() {
begin := time.Now()
pruned, err := m.PruneUnusedLocalAttachments(pruneCtx)
if err != nil {
logrus.Errorf("media manager: error pruning unused local attachments: %s", err)
return
}
logrus.Infof("media manager: pruned %d unused local attachments in %s", pruned, time.Since(begin))
}); err != nil {
pruneCancel()
return fmt.Errorf("error starting media manager unused local attachments cleanup job: %s", err)
}

// start remote cache cleanup cronjob if configured
if mediaRemoteCacheDays := config.GetMediaRemoteCacheDays(); mediaRemoteCacheDays > 0 {
if _, err := c.AddFunc("@midnight", func() {
Expand Down
10 changes: 2 additions & 8 deletions internal/media/pruneremote.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ package media
import (
"context"
"fmt"
"time"

"codeberg.org/gruf/go-store/storage"
"github.com/sirupsen/logrus"
Expand All @@ -32,15 +31,10 @@ import (
func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) {
var totalPruned int

// convert days into a duration string
olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24)
// parse the duration string into a duration
olderThanHours, err := time.ParseDuration(olderThanHoursString)
olderThan, err := parseOlderThan(olderThanDays)
if err != nil {
return totalPruned, fmt.Errorf("PruneAllRemote: %d", err)
return totalPruned, fmt.Errorf("PruneAllRemote: error parsing olderThanDays %d: %s", olderThanDays, err)
}
// 'subtract' that from the time now to give our threshold
olderThan := time.Now().Add(-olderThanHours)
logrus.Infof("PruneAllRemote: pruning media older than %s", olderThan)

// select 20 attachments at a time and prune them
Expand Down
86 changes: 86 additions & 0 deletions internal/media/pruneunusedlocal.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package media

import (
"context"
"fmt"

"codeberg.org/gruf/go-store/storage"
"github.com/sirupsen/logrus"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)

func (m *manager) PruneUnusedLocalAttachments(ctx context.Context) (int, error) {
var totalPruned int
var maxID string
var attachments []*gtsmodel.MediaAttachment
var err error

olderThan, err := parseOlderThan(UnusedLocalAttachmentCacheDays)
if err != nil {
return totalPruned, fmt.Errorf("PruneUnusedLocalAttachments: error parsing olderThanDays %d: %s", UnusedLocalAttachmentCacheDays, err)
}
logrus.Infof("PruneUnusedLocalAttachments: pruning unused local attachments older than %s", olderThan)

// select 20 attachments at a time and prune them
for attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit) {
// use the id of the last attachment in the slice as the next 'maxID' value
l := len(attachments)
maxID = attachments[l-1].ID
logrus.Tracef("PruneUnusedLocalAttachments: got %d unused local attachments older than %s with maxID < %s", l, olderThan, maxID)

for _, attachment := range attachments {
if err := m.pruneOneLocal(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}

// make sure we don't have a real error when we leave the loop
if err != nil && err != db.ErrNoEntries {
return totalPruned, err
}

logrus.Infof("PruneUnusedLocalAttachments: finished pruning: pruned %d entries", totalPruned)
return totalPruned, nil
}

func (m *manager) pruneOneLocal(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
if attachment.File.Path != "" {
// delete the full size attachment from storage
logrus.Tracef("pruneOneLocal: deleting %s", attachment.File.Path)
if err := m.storage.Delete(attachment.File.Path); err != nil && err != storage.ErrNotFound {
return err
}
}

if attachment.Thumbnail.Path != "" {
// delete the thumbnail from storage
logrus.Tracef("pruneOneLocal: deleting %s", attachment.Thumbnail.Path)
if err := m.storage.Delete(attachment.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
return err
}
}

// delete the attachment completely
return m.db.DeleteByID(ctx, attachment.ID, attachment)
}
75 changes: 75 additions & 0 deletions internal/media/pruneunusedlocal_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package media_test

import (
"context"
"testing"

"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/db"
)

type PruneUnusedLocalTestSuite struct {
MediaStandardTestSuite
}

func (suite *PruneUnusedLocalTestSuite) TestPruneUnusedLocal() {
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
suite.True(testAttachment.Cached)

totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(1, totalPruned)

_, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}

func (suite *PruneUnusedLocalTestSuite) TestPruneRemoteTwice() {
totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(1, totalPruned)

// final prune should prune nothing, since the first prune already happened
totalPrunedAgain, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(0, totalPrunedAgain)
}

func (suite *PruneUnusedLocalTestSuite) TestPruneOneNonExistent() {
ctx := context.Background()
testAttachment := suite.testAttachments["local_account_1_unattached_1"]

// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID)
suite.NoError(err)
suite.True(media.Cached)
err = suite.storage.Delete(media.File.Path)
suite.NoError(err)

// Now attempt to prune for item with db entry no file
totalPruned, err := suite.manager.PruneUnusedLocalAttachments(ctx)
suite.NoError(err)
suite.Equal(1, totalPruned)
}

func TestPruneUnusedLocalTestSuite(t *testing.T) {
suite.Run(t, &PruneUnusedLocalTestSuite{})
}
17 changes: 17 additions & 0 deletions internal/media/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package media
import (
"errors"
"fmt"
"time"

"github.com/h2non/filetype"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -128,3 +129,19 @@ func (l *logrusWrapper) Info(msg string, keysAndValues ...interface{}) {
func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{}) {
logrus.Error("media manager cron logger: ", err, msg, keysAndValues)
}

func parseOlderThan(olderThanDays int) (time.Time, error) {
// convert days into a duration string
olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24)

// parse the duration string into a duration
olderThanHours, err := time.ParseDuration(olderThanHoursString)
if err != nil {
return time.Time{}, err
}

// 'subtract' that from the time now to give our threshold
olderThan := time.Now().Add(-olderThanHours)

return olderThan, nil
}
9 changes: 9 additions & 0 deletions internal/processing/admin/mediaprune.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ func (p *processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gt
}
}()

go func() {
pruned, err := p.mediaManager.PruneUnusedLocalAttachments(ctx)
if err != nil {
logrus.Errorf("MediaPrune: error pruning unused local cache: %s", err)
} else {
logrus.Infof("MediaPrune: pruned %d unused local cache entries", pruned)
}
}()

go func() {
pruned, err := p.mediaManager.PruneAllMeta(ctx)
if err != nil {
Expand Down

0 comments on commit 9e7d022

Please sign in to comment.