Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feature] Cleanup unattached local media #680

Merged
merged 6 commits into from
Jun 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions internal/db/bundb/media.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,29 @@ func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit

return attachments, nil
}

func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachments := []*gtsmodel.MediaAttachment{}

q := m.newMediaQ(&attachments).
Where("media_attachment.cached = true").
Where("media_attachment.avatar = false").
Where("media_attachment.header = false").
Where("media_attachment.created_at < ?", olderThan).
Where("media_attachment.remote_url IS NULL").
Where("media_attachment.status_id IS NULL")

if maxID != "" {
q = q.Where("media_attachment.id < ?", maxID)
}

if limit != 0 {
q = q.Limit(limit)
}

if err := q.Scan(ctx); err != nil {
return nil, m.conn.ProcessError(err)
}

return attachments, nil
}
9 changes: 9 additions & 0 deletions internal/db/bundb/media_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"time"

"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/testrig"
)

type MediaTestSuite struct {
Expand Down Expand Up @@ -51,6 +52,14 @@ func (suite *MediaTestSuite) TestGetAvisAndHeaders() {
suite.Len(attachments, 2)
}

func (suite *MediaTestSuite) TestGetLocalUnattachedOlderThan() {
ctx := context.Background()

attachments, err := suite.db.GetLocalUnattachedOlderThan(ctx, testrig.TimeMustParse("2090-06-04T13:12:00Z"), "", 10)
suite.NoError(err)
suite.Len(attachments, 1)
}

func TestMediaTestSuite(t *testing.T) {
suite.Run(t, new(MediaTestSuite))
}
5 changes: 5 additions & 0 deletions internal/db/media.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ type Media interface {
// GetAvatarsAndHeaders fetches limit n avatars and headers with an id < maxID. These headers
// and avis may be in use or not; the caller should check this if it's important.
GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
// GetLocalUnattachedOlderThan fetches limit n local media attachments, older than the given time, which
// aren't header or avatars, and aren't attached to a status. In other words, attachments which were uploaded
// but never used for whatever reason, or attachments that were attached to a status which was subsequently
// deleted.
GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
}
26 changes: 24 additions & 2 deletions internal/media/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ import (
// selectPruneLimit is the amount of media entries to select at a time from the db when pruning
const selectPruneLimit = 20

// UnusedLocalAttachmentCacheDays is the amount of days to keep local media in storage if it
// is not attached to a status, or was never attached to a status.
const UnusedLocalAttachmentCacheDays = 3

// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
type Manager interface {
// ProcessMedia begins the process of decoding and storing the given data as an attachment.
Expand Down Expand Up @@ -75,11 +79,16 @@ type Manager interface {
//
// The returned int is the amount of media that was pruned by this function.
PruneAllRemote(ctx context.Context, olderThanDays int) (int, error)
// PruneAllMeta prunes unused meta media -- currently, this means unused avatars + headers, but can also be extended
// to include things like attachments that were uploaded on this server but left unused, etc.
// PruneAllMeta prunes unused/out of date headers and avatars cached on this instance.
//
// The returned int is the amount of media that was pruned by this function.
PruneAllMeta(ctx context.Context) (int, error)
// PruneUnusedLocalAttachments prunes unused media attachments that were uploaded by
// a user on this instance, but never actually attached to a status, or attached but
// later detached.
//
// The returned int is the amount of media that was pruned by this function.
PruneUnusedLocalAttachments(ctx context.Context) (int, error)

// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
Expand Down Expand Up @@ -210,6 +219,19 @@ func scheduleCleanupJobs(m *manager) error {
return fmt.Errorf("error starting media manager meta cleanup job: %s", err)
}

if _, err := c.AddFunc("@midnight", func() {
begin := time.Now()
pruned, err := m.PruneUnusedLocalAttachments(pruneCtx)
if err != nil {
logrus.Errorf("media manager: error pruning unused local attachments: %s", err)
return
}
logrus.Infof("media manager: pruned %d unused local attachments in %s", pruned, time.Since(begin))
}); err != nil {
pruneCancel()
return fmt.Errorf("error starting media manager unused local attachments cleanup job: %s", err)
}

// start remote cache cleanup cronjob if configured
if mediaRemoteCacheDays := config.GetMediaRemoteCacheDays(); mediaRemoteCacheDays > 0 {
if _, err := c.AddFunc("@midnight", func() {
Expand Down
10 changes: 2 additions & 8 deletions internal/media/pruneremote.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ package media
import (
"context"
"fmt"
"time"

"codeberg.org/gruf/go-store/storage"
"github.com/sirupsen/logrus"
Expand All @@ -32,15 +31,10 @@ import (
func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) {
var totalPruned int

// convert days into a duration string
olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24)
// parse the duration string into a duration
olderThanHours, err := time.ParseDuration(olderThanHoursString)
olderThan, err := parseOlderThan(olderThanDays)
if err != nil {
return totalPruned, fmt.Errorf("PruneAllRemote: %d", err)
return totalPruned, fmt.Errorf("PruneAllRemote: error parsing olderThanDays %d: %s", olderThanDays, err)
}
// 'subtract' that from the time now to give our threshold
olderThan := time.Now().Add(-olderThanHours)
logrus.Infof("PruneAllRemote: pruning media older than %s", olderThan)

// select 20 attachments at a time and prune them
Expand Down
86 changes: 86 additions & 0 deletions internal/media/pruneunusedlocal.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package media

import (
"context"
"fmt"

"codeberg.org/gruf/go-store/storage"
"github.com/sirupsen/logrus"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)

func (m *manager) PruneUnusedLocalAttachments(ctx context.Context) (int, error) {
var totalPruned int
var maxID string
var attachments []*gtsmodel.MediaAttachment
var err error

olderThan, err := parseOlderThan(UnusedLocalAttachmentCacheDays)
if err != nil {
return totalPruned, fmt.Errorf("PruneUnusedLocalAttachments: error parsing olderThanDays %d: %s", UnusedLocalAttachmentCacheDays, err)
}
logrus.Infof("PruneUnusedLocalAttachments: pruning unused local attachments older than %s", olderThan)

// select 20 attachments at a time and prune them
for attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit) {
// use the id of the last attachment in the slice as the next 'maxID' value
l := len(attachments)
maxID = attachments[l-1].ID
logrus.Tracef("PruneUnusedLocalAttachments: got %d unused local attachments older than %s with maxID < %s", l, olderThan, maxID)

for _, attachment := range attachments {
if err := m.pruneOneLocal(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}

// make sure we don't have a real error when we leave the loop
if err != nil && err != db.ErrNoEntries {
return totalPruned, err
}

logrus.Infof("PruneUnusedLocalAttachments: finished pruning: pruned %d entries", totalPruned)
return totalPruned, nil
}

func (m *manager) pruneOneLocal(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
if attachment.File.Path != "" {
// delete the full size attachment from storage
logrus.Tracef("pruneOneLocal: deleting %s", attachment.File.Path)
if err := m.storage.Delete(attachment.File.Path); err != nil && err != storage.ErrNotFound {
return err
}
}

if attachment.Thumbnail.Path != "" {
// delete the thumbnail from storage
logrus.Tracef("pruneOneLocal: deleting %s", attachment.Thumbnail.Path)
if err := m.storage.Delete(attachment.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
return err
}
}

// delete the attachment completely
return m.db.DeleteByID(ctx, attachment.ID, attachment)
}
75 changes: 75 additions & 0 deletions internal/media/pruneunusedlocal_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package media_test

import (
"context"
"testing"

"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/db"
)

type PruneUnusedLocalTestSuite struct {
MediaStandardTestSuite
}

func (suite *PruneUnusedLocalTestSuite) TestPruneUnusedLocal() {
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
suite.True(testAttachment.Cached)

totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(1, totalPruned)

_, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}

func (suite *PruneUnusedLocalTestSuite) TestPruneRemoteTwice() {
totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(1, totalPruned)

// final prune should prune nothing, since the first prune already happened
totalPrunedAgain, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(0, totalPrunedAgain)
}

func (suite *PruneUnusedLocalTestSuite) TestPruneOneNonExistent() {
ctx := context.Background()
testAttachment := suite.testAttachments["local_account_1_unattached_1"]

// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID)
suite.NoError(err)
suite.True(media.Cached)
err = suite.storage.Delete(media.File.Path)
suite.NoError(err)

// Now attempt to prune for item with db entry no file
totalPruned, err := suite.manager.PruneUnusedLocalAttachments(ctx)
suite.NoError(err)
suite.Equal(1, totalPruned)
}

func TestPruneUnusedLocalTestSuite(t *testing.T) {
suite.Run(t, &PruneUnusedLocalTestSuite{})
}
17 changes: 17 additions & 0 deletions internal/media/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package media
import (
"errors"
"fmt"
"time"

"github.com/h2non/filetype"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -128,3 +129,19 @@ func (l *logrusWrapper) Info(msg string, keysAndValues ...interface{}) {
func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{}) {
logrus.Error("media manager cron logger: ", err, msg, keysAndValues)
}

func parseOlderThan(olderThanDays int) (time.Time, error) {
// convert days into a duration string
olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24)

// parse the duration string into a duration
olderThanHours, err := time.ParseDuration(olderThanHoursString)
if err != nil {
return time.Time{}, err
}

// 'subtract' that from the time now to give our threshold
olderThan := time.Now().Add(-olderThanHours)

return olderThan, nil
}
9 changes: 9 additions & 0 deletions internal/processing/admin/mediaprune.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ func (p *processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gt
}
}()

go func() {
pruned, err := p.mediaManager.PruneUnusedLocalAttachments(ctx)
if err != nil {
logrus.Errorf("MediaPrune: error pruning unused local cache: %s", err)
} else {
logrus.Infof("MediaPrune: pruned %d unused local cache entries", pruned)
}
}()

go func() {
pruned, err := p.mediaManager.PruneAllMeta(ctx)
if err != nil {
Expand Down