Skip to content

Commit

Permalink
Telemetry: delete old BuildData models (#9403)
Browse files Browse the repository at this point in the history
Define a task to delete old `BuildData` older than
`RTD_TELEMETRY_DATA_RETENTION_DAYS`, which is set to 180 days for now. This task
is configured to be run every day at 2AM.

Related #9328
  • Loading branch information
humitos committed Jul 6, 2022
1 parent dc001ae commit d10e61d
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
8 changes: 8 additions & 0 deletions readthedocs/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ def SESSION_COOKIE_SAMESITE(self):
RTD_ANALYTICS_DEFAULT_RETENTION_DAYS = 30 * 3
RTD_AUDITLOGS_DEFAULT_RETENTION_DAYS = 30 * 3

# Keep BuildData models on database during this time
RTD_TELEMETRY_DATA_RETENTION_DAYS = 30 * 6 # 180 days / 6 months

# Database and API hitting settings
DONT_HIT_API = False
DONT_HIT_DB = True
Expand Down Expand Up @@ -419,6 +422,11 @@ def TEMPLATES(self):
'schedule': crontab(minute=0, hour=1),
'options': {'queue': 'web'},
},
'every-day-delete-old-buildata-models': {
'task': 'readthedocs.telemetry.tasks.delete_old_build_data',
'schedule': crontab(minute=0, hour=2),
'options': {'queue': 'web'},
},
'every-day-resync-sso-organization-users': {
'task': 'readthedocs.oauth.tasks.sync_remote_repositories_organizations',
'schedule': crontab(minute=0, hour=4),
Expand Down
18 changes: 18 additions & 0 deletions readthedocs/telemetry/tasks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Tasks related to telemetry."""

from django.conf import settings
from django.utils import timezone

from readthedocs.builds.models import Build
from readthedocs.telemetry.models import BuildData
from readthedocs.worker import app
Expand All @@ -16,3 +19,18 @@ def save_build_data(build_id, data):
build = Build.objects.filter(id=build_id).first()
if build:
BuildData.objects.collect(build, data)


@app.task(queue="web")
def delete_old_build_data():
"""
Delete BuildData models older than ``RTD_TELEMETRY_DATA_RETENTION_DAYS``.
This is intended to run from a periodic task daily.
NOTE: the logic of this task could be improved to keep longer data we care
more (eg. active projects )and remove data we don't (eg. builds from spam projects)
"""
retention_days = settings.RTD_TELEMETRY_DATA_RETENTION_DAYS
days_ago = timezone.now().date() - timezone.timedelta(days=retention_days)
return BuildData.objects.filter(created__lt=days_ago).delete()

0 comments on commit d10e61d

Please sign in to comment.