diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index af629ae2..a2b1225b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -4,6 +4,7 @@ env: IMAGE_NAME: tubesync on: + workflow_dispatch: push: branches: - main diff --git a/tubesync/common/templates/pagination.html b/tubesync/common/templates/pagination.html index 0e378a86..e48b24d8 100644 --- a/tubesync/common/templates/pagination.html +++ b/tubesync/common/templates/pagination.html @@ -3,7 +3,7 @@
diff --git a/tubesync/sync/migrations/0020_auto_20231024_1825.py b/tubesync/sync/migrations/0020_auto_20231024_1825.py new file mode 100644 index 00000000..295339a8 --- /dev/null +++ b/tubesync/sync/migrations/0020_auto_20231024_1825.py @@ -0,0 +1,29 @@ +# Generated by Django 3.2.22 on 2023-10-24 17:25 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('sync', '0019_add_delete_removed_media'), + ] + + operations = [ + migrations.AddField( + model_name='source', + name='filter_text', + field=models.CharField(blank=True, default='', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'), + ), + migrations.AlterField( + model_name='source', + name='auto_subtitles', + field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'), + ), + migrations.AlterField( + model_name='source', + name='sub_langs', + field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'), + ), + ] diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index bb8c7233..729e21a4 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1,6 +1,7 @@ import os import uuid import json +import re from xml.etree import ElementTree from collections import OrderedDict from datetime import datetime, timedelta @@ -287,6 +288,13 @@ class IndexSchedule(models.IntegerChoices): help_text=_('If "delete old media" is ticked, the number of days after which ' 'to automatically delete media') ) + filter_text = models.CharField( + _('filter string'), + max_length=100, + default='', + blank=True, + help_text=_('Regex compatible filter string for video titles') + ) delete_removed_media = models.BooleanField( _('delete removed media'), default=False, @@ -538,6 +546,9 @@ def get_example_media_format(self): except Exception as e: return '' + def is_regex_match(self, media_item_title): + return bool(re.search(self.filter_text,media_item_title)) + def index_media(self): ''' Index the media source returning a list of media metadata as dicts. diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index f27b452d..d1f3d03f 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -104,36 +104,57 @@ def media_post_save(sender, instance, created, **kwargs): # already been downloaded if not instance.downloaded: max_cap_age = instance.source.download_cap_date - published = instance.published - if not published: - if not instance.skip: - log.warn(f'Media: {instance.source} / {instance} has no published date ' - f'set, marking to be skipped') - instance.skip = True - cap_changed = True - else: + filter_text = instance.source.filter_text + published = instance.published + + if instance.skip: + #currently marked to be skipped, check if skip conditions still apply + if not published: log.debug(f'Media: {instance.source} / {instance} has no published date ' - f'set but is already marked to be skipped') + f'set but is already marked to be skipped') + else: + if max_cap_age and filter_text: + if (published > max_cap_age) and (source.is_regex_match(instance.title)): + # Media was published after the cap date and matches the filter text, but is set to be skipped + print('Has a valid publishing date and matches filter, marking unskipped') + instance.skip = False + cap_changed = True + else: + print('does not have a valid publishing date or filter string, already marked skipped') + log.info(f'Media: {instance.source} / {instance} has no published date ' + f'set but is already marked to be skipped') + elif max_cap_age: + if published > max_cap_age: + # Media was published after the cap date but is set to be skipped + log.info(f'Media: {instance.source} / {instance} has a valid ' + f'publishing date, marking to be unskipped') + instance.skip = False + cap_changed = True + elif filter_text: + if source.is_regex_match(instance.title): + # Media matches the filter text but is set to be skipped + log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped') + instance.skip = False + cap_changed = True else: - if max_cap_age: - if published > max_cap_age and instance.skip: - # Media was published after the cap date but is set to be skipped - log.info(f'Media: {instance.source} / {instance} has a valid ' - f'publishing date, marking to be unskipped') - instance.skip = False - cap_changed = True - elif published <= max_cap_age and not instance.skip: - log.info(f'Media: {instance.source} / {instance} is too old for ' - f'the download cap date, marking to be skipped') - instance.skip = True - cap_changed = True + if not published: + log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped') + instance.skip = True + cap_changed = True else: - if instance.skip: - # Media marked to be skipped but source download cap removed - log.info(f'Media: {instance.source} / {instance} has a valid ' - f'publishing date, marking to be unskipped') - instance.skip = False - cap_changed = True + if max_cap_age: + if published <= max_cap_age: + log.info(f'Media: {instance.source} / {instance} is too old for ' + f'the download cap date, marking to be skipped') + instance.skip = True + cap_changed = True + if filter_text: + if not re.search(filter_text,instance.title): + #media doesn't match the filter text but is not marked to be skipped + log.info(f'Media: {instance.source} / {instance} does not match the filter text') + instance.skip = True + cap_changed = True + # Recalculate the "can_download" flag, this may # need to change if the source specifications have been changed if instance.metadata: diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 2f946211..7e795309 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -254,6 +254,11 @@ def download_media_metadata(media_id): log.warn(f'Media: {source} / {media} is older than cap age ' f'{max_cap_age}, skipping') media.skip = True + # If the source has a search filter, check the video title matches the filter + if source.filter_text and not source.is_regex_match(media.title): + # Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false + log.warn(f'Media: {source} / {media} does not match {source.filter_text}, skipping') + media.skip = True # If the source has a cut-off check the upload date is within the allowed delta if source.delete_old_media and source.days_to_keep > 0: if not isinstance(media.published, datetime): diff --git a/tubesync/sync/templates/sync/media.html b/tubesync/sync/templates/sync/media.html index 420b15ba..d2d4e639 100644 --- a/tubesync/sync/templates/sync/media.html +++ b/tubesync/sync/templates/sync/media.html @@ -64,5 +64,5 @@

Media

{% endfor %} -{% include 'pagination.html' with pagination=sources.paginator filter=source.pk show_skipped=show_skipped %} +{% include 'pagination.html' with pagination=sources.paginator filter=source.pk show_skipped=show_skipped only_skipped=only_skipped%} {% endblock %} diff --git a/tubesync/sync/templates/sync/source.html b/tubesync/sync/templates/sync/source.html index 22122e2c..c5812b28 100644 --- a/tubesync/sync/templates/sync/source.html +++ b/tubesync/sync/templates/sync/source.html @@ -43,6 +43,10 @@

Source {{ source.name }}

Directory Directory
{{ source.directory }} + + Filter text + Filter text
{{ source.filter_text }} + Media format Media format
{{ source.media_format }} diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index f303c885..1ca2643a 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -175,6 +175,7 @@ def test_source(self): 'directory': 'testdirectory', 'media_format': settings.MEDIA_FORMATSTR_DEFAULT, 'download_cap': 0, + 'filter_text':'.*', 'index_schedule': 3600, 'delete_old_media': False, 'days_to_keep': 14, @@ -217,6 +218,7 @@ def test_source(self): 'directory': 'testdirectory', 'media_format': settings.MEDIA_FORMATSTR_DEFAULT, 'download_cap': 0, + 'filter_text':'.*', 'index_schedule': Source.IndexSchedule.EVERY_HOUR, 'delete_old_media': False, 'days_to_keep': 14, @@ -247,6 +249,7 @@ def test_source(self): 'directory': 'testdirectory', 'media_format': settings.MEDIA_FORMATSTR_DEFAULT, 'download_cap': 0, + 'filter_text':'.*', 'index_schedule': Source.IndexSchedule.EVERY_2_HOURS, # changed 'delete_old_media': False, 'days_to_keep': 14, @@ -1468,6 +1471,29 @@ def test_metadata_20230629(self): self.media.get_best_video_format() self.media.get_best_audio_format() + def test_is_regex_match(self): + + self.media.metadata = all_test_metadata['boring'] + expected_matches = { + ('.*'): (True), + ('no fancy stuff'): (True), + ('No fancy stuff'): (False), + ('(?i)No fancy stuff'): (True), #set case insensitive flag + ('no'): (True), + ('Foo'): (False), + ('^(?!.*fancy).*$'): (False), + ('^(?!.*funny).*$'): (True), + ('(?=.*f.*)(?=.{0,2}|.{4,})'): (True), + ('f{4,}'): (False), + ('^[^A-Z]*$'): (True), + ('^[^a-z]*$'): (False), + ('^[^\\s]*$'): (False) + } + + for params, expected in expected_matches.items(): + self.source.filter_text = params + expected_match_result = expected + self.assertEqual(self.source.is_regex_match(self.media.title), expected_match_result) class TasksTestCase(TestCase): def setUp(self): diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index e187cd85..0b808eb9 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -294,7 +294,7 @@ def get_success_url(self): class EditSourceMixin: model = Source - fields = ('source_type', 'key', 'name', 'directory', 'media_format', + fields = ('source_type', 'key', 'name', 'directory', 'filter_text', 'media_format', 'index_schedule', 'download_media', 'download_cap', 'delete_old_media', 'delete_removed_media', 'days_to_keep', 'source_resolution', 'source_vcodec', 'source_acodec', 'prefer_60fps', 'prefer_hdr', 'fallback', 'copy_thumbnails',