Skip to content

Commit

Permalink
Merge pull request #425 from locke4/main
Browse files Browse the repository at this point in the history
Add support for regex video title filtering
  • Loading branch information
meeb committed Nov 20, 2023
2 parents 7f4e858 + d1cb7ef commit 33b4711
Show file tree
Hide file tree
Showing 10 changed files with 127 additions and 30 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ env:
IMAGE_NAME: tubesync

on:
workflow_dispatch:
push:
branches:
- main
Expand Down
2 changes: 1 addition & 1 deletion tubesync/common/templates/pagination.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<div class="col s12">
<div class="pagination">
{% for i in paginator.page_range %}
<a class="pagenum{% if i == page_obj.number %} currentpage{% endif %}" href="?{% if filter %}filter={{ filter }}&{% endif %}page={{ i }}{% if show_skipped %}&show_skipped=yes{% endif %}">{{ i }}</a>
<a class="pagenum{% if i == page_obj.number %} currentpage{% endif %}" href="?{% if filter %}filter={{ filter }}&{% endif %}page={{ i }}{% if show_skipped %}&show_skipped=yes{% endif %}{% if only_skipped %}&only_skipped=yes{% endif %}">{{ i }}</a>
{% endfor %}
</div>
</div>
Expand Down
29 changes: 29 additions & 0 deletions tubesync/sync/migrations/0020_auto_20231024_1825.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 3.2.22 on 2023-10-24 17:25

import django.core.validators
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('sync', '0019_add_delete_removed_media'),
]

operations = [
migrations.AddField(
model_name='source',
name='filter_text',
field=models.CharField(blank=True, default='', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'),
),
migrations.AlterField(
model_name='source',
name='auto_subtitles',
field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'),
),
migrations.AlterField(
model_name='source',
name='sub_langs',
field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'),
),
]
11 changes: 11 additions & 0 deletions tubesync/sync/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import uuid
import json
import re
from xml.etree import ElementTree
from collections import OrderedDict
from datetime import datetime, timedelta
Expand Down Expand Up @@ -287,6 +288,13 @@ class IndexSchedule(models.IntegerChoices):
help_text=_('If "delete old media" is ticked, the number of days after which '
'to automatically delete media')
)
filter_text = models.CharField(
_('filter string'),
max_length=100,
default='',
blank=True,
help_text=_('Regex compatible filter string for video titles')
)
delete_removed_media = models.BooleanField(
_('delete removed media'),
default=False,
Expand Down Expand Up @@ -538,6 +546,9 @@ def get_example_media_format(self):
except Exception as e:
return ''

def is_regex_match(self, media_item_title):
return bool(re.search(self.filter_text,media_item_title))

def index_media(self):
'''
Index the media source returning a list of media metadata as dicts.
Expand Down
75 changes: 48 additions & 27 deletions tubesync/sync/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,36 +104,57 @@ def media_post_save(sender, instance, created, **kwargs):
# already been downloaded
if not instance.downloaded:
max_cap_age = instance.source.download_cap_date
published = instance.published
if not published:
if not instance.skip:
log.warn(f'Media: {instance.source} / {instance} has no published date '
f'set, marking to be skipped')
instance.skip = True
cap_changed = True
else:
filter_text = instance.source.filter_text
published = instance.published

if instance.skip:
#currently marked to be skipped, check if skip conditions still apply
if not published:
log.debug(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped')
f'set but is already marked to be skipped')
else:
if max_cap_age and filter_text:
if (published > max_cap_age) and (source.is_regex_match(instance.title)):
# Media was published after the cap date and matches the filter text, but is set to be skipped
print('Has a valid publishing date and matches filter, marking unskipped')
instance.skip = False
cap_changed = True
else:
print('does not have a valid publishing date or filter string, already marked skipped')
log.info(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped')
elif max_cap_age:
if published > max_cap_age:
# Media was published after the cap date but is set to be skipped
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date, marking to be unskipped')
instance.skip = False
cap_changed = True
elif filter_text:
if source.is_regex_match(instance.title):
# Media matches the filter text but is set to be skipped
log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
instance.skip = False
cap_changed = True
else:
if max_cap_age:
if published > max_cap_age and instance.skip:
# Media was published after the cap date but is set to be skipped
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date, marking to be unskipped')
instance.skip = False
cap_changed = True
elif published <= max_cap_age and not instance.skip:
log.info(f'Media: {instance.source} / {instance} is too old for '
f'the download cap date, marking to be skipped')
instance.skip = True
cap_changed = True
if not published:
log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
instance.skip = True
cap_changed = True
else:
if instance.skip:
# Media marked to be skipped but source download cap removed
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date, marking to be unskipped')
instance.skip = False
cap_changed = True
if max_cap_age:
if published <= max_cap_age:
log.info(f'Media: {instance.source} / {instance} is too old for '
f'the download cap date, marking to be skipped')
instance.skip = True
cap_changed = True
if filter_text:
if not re.search(filter_text,instance.title):
#media doesn't match the filter text but is not marked to be skipped
log.info(f'Media: {instance.source} / {instance} does not match the filter text')
instance.skip = True
cap_changed = True

# Recalculate the "can_download" flag, this may
# need to change if the source specifications have been changed
if instance.metadata:
Expand Down
5 changes: 5 additions & 0 deletions tubesync/sync/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,11 @@ def download_media_metadata(media_id):
log.warn(f'Media: {source} / {media} is older than cap age '
f'{max_cap_age}, skipping')
media.skip = True
# If the source has a search filter, check the video title matches the filter
if source.filter_text and not source.is_regex_match(media.title):
# Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false
log.warn(f'Media: {source} / {media} does not match {source.filter_text}, skipping')
media.skip = True
# If the source has a cut-off check the upload date is within the allowed delta
if source.delete_old_media and source.days_to_keep > 0:
if not isinstance(media.published, datetime):
Expand Down
2 changes: 1 addition & 1 deletion tubesync/sync/templates/sync/media.html
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,5 @@ <h1 class="truncate">Media</h1>
</div>
{% endfor %}
</div>
{% include 'pagination.html' with pagination=sources.paginator filter=source.pk show_skipped=show_skipped %}
{% include 'pagination.html' with pagination=sources.paginator filter=source.pk show_skipped=show_skipped only_skipped=only_skipped%}
{% endblock %}
4 changes: 4 additions & 0 deletions tubesync/sync/templates/sync/source.html
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ <h1 class="truncate">Source <strong>{{ source.name }}</strong></h1>
<td class="hide-on-small-only">Directory</td>
<td><span class="hide-on-med-and-up">Directory<br></span><strong>{{ source.directory }}</strong></td>
</tr>
<tr title="Filter text">
<td class="hide-on-small-only">Filter text</td>
<td><span class="hide-on-med-and-up">Filter text<br></span><strong>{{ source.filter_text }}</strong></td>
</tr>
<tr title="Media file name format to use for saving files">
<td class="hide-on-small-only">Media format</td>
<td><span class="hide-on-med-and-up">Media format<br></span><strong>{{ source.media_format }}</strong></td>
Expand Down
26 changes: 26 additions & 0 deletions tubesync/sync/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ def test_source(self):
'directory': 'testdirectory',
'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
'download_cap': 0,
'filter_text':'.*',
'index_schedule': 3600,
'delete_old_media': False,
'days_to_keep': 14,
Expand Down Expand Up @@ -217,6 +218,7 @@ def test_source(self):
'directory': 'testdirectory',
'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
'download_cap': 0,
'filter_text':'.*',
'index_schedule': Source.IndexSchedule.EVERY_HOUR,
'delete_old_media': False,
'days_to_keep': 14,
Expand Down Expand Up @@ -247,6 +249,7 @@ def test_source(self):
'directory': 'testdirectory',
'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
'download_cap': 0,
'filter_text':'.*',
'index_schedule': Source.IndexSchedule.EVERY_2_HOURS, # changed
'delete_old_media': False,
'days_to_keep': 14,
Expand Down Expand Up @@ -1468,6 +1471,29 @@ def test_metadata_20230629(self):
self.media.get_best_video_format()
self.media.get_best_audio_format()

def test_is_regex_match(self):

self.media.metadata = all_test_metadata['boring']
expected_matches = {
('.*'): (True),
('no fancy stuff'): (True),
('No fancy stuff'): (False),
('(?i)No fancy stuff'): (True), #set case insensitive flag
('no'): (True),
('Foo'): (False),
('^(?!.*fancy).*$'): (False),
('^(?!.*funny).*$'): (True),
('(?=.*f.*)(?=.{0,2}|.{4,})'): (True),
('f{4,}'): (False),
('^[^A-Z]*$'): (True),
('^[^a-z]*$'): (False),
('^[^\\s]*$'): (False)
}

for params, expected in expected_matches.items():
self.source.filter_text = params
expected_match_result = expected
self.assertEqual(self.source.is_regex_match(self.media.title), expected_match_result)

class TasksTestCase(TestCase):
def setUp(self):
Expand Down
2 changes: 1 addition & 1 deletion tubesync/sync/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def get_success_url(self):

class EditSourceMixin:
model = Source
fields = ('source_type', 'key', 'name', 'directory', 'media_format',
fields = ('source_type', 'key', 'name', 'directory', 'filter_text', 'media_format',
'index_schedule', 'download_media', 'download_cap', 'delete_old_media',
'delete_removed_media', 'days_to_keep', 'source_resolution', 'source_vcodec',
'source_acodec', 'prefer_60fps', 'prefer_hdr', 'fallback', 'copy_thumbnails',
Expand Down

0 comments on commit 33b4711

Please sign in to comment.