Skip to content

Commit

Permalink
rework skip logic check, prevent race condition between metadata down…
Browse files Browse the repository at this point in the history
…loading and upload date being checked, resolves #440, #183, related to #438
  • Loading branch information
meeb committed Nov 30, 2023
1 parent 512b70a commit e54a762
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 50 deletions.
4 changes: 3 additions & 1 deletion tubesync/sync/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,9 @@ def get_example_media_format(self):
return ''

def is_regex_match(self, media_item_title):
return bool(re.search(self.filter_text,media_item_title))
if not self.filter_text:
return True
return bool(re.search(self.filter_text, media_item_title))

def index_media(self):
'''
Expand Down
92 changes: 45 additions & 47 deletions tubesync/sync/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,65 +96,63 @@ def media_post_save(sender, instance, created, **kwargs):
# If the media is skipped manually, bail.
if instance.manual_skip:
return

# Triggered after media is saved
cap_changed = False
can_download_changed = False
# Reset the skip flag if the download cap has changed if the media has not
# already been downloaded
if not instance.downloaded:
if not instance.downloaded and instance.metadata:
max_cap_age = instance.source.download_cap_date
filter_text = instance.source.filter_text
published = instance.published

if instance.skip:
#currently marked to be skipped, check if skip conditions still apply
if not published:
filter_text = instance.source.filter_text.strip()
published = instance.published
if not published:
if not instance.skip:
log.warn(f'Media: {instance.source} / {instance} has no published date '
f'set, marking to be skipped')
instance.skip = True
cap_changed = True
else:
log.debug(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped')
else:
if max_cap_age and filter_text:
if (published > max_cap_age) and (instance.source.is_regex_match(instance.title)):
# Media was published after the cap date and matches the filter text, but is set to be skipped
print('Has a valid publishing date and matches filter, marking unskipped')
instance.skip = False
cap_changed = True
f'set but is already marked to be skipped')
else:
if max_cap_age:
if published > max_cap_age and instance.skip:
if filter_text:
if instance.source.is_regex_match(instance.title):
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date and title filter, marking to be unskipped')
instance.skip = False
cap_changed = True
else:
log.debug(f'Media: {instance.source} / {instance} has a valid publishing date '
f'but failed the title filter match, already marked skipped')
else:
print('does not have a valid publishing date or filter string, already marked skipped')
log.info(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped')
elif max_cap_age:
if published > max_cap_age:
# Media was published after the cap date but is set to be skipped
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date, marking to be unskipped')
f'publishing date, marking to be unskipped')
instance.skip = False
cap_changed = True
elif filter_text:
if instance.source.is_regex_match(instance.title):
# Media matches the filter text but is set to be skipped
log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
instance.skip = False
cap_changed = True
else:
if not published:
log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
instance.skip = True
cap_changed = True
elif published <= max_cap_age and not instance.skip:
log.info(f'Media: {instance.source} / {instance} is too old for '
f'the download cap date, marking to be skipped')
instance.skip = True
cap_changed = True
else:
if max_cap_age:
if published <= max_cap_age:
log.info(f'Media: {instance.source} / {instance} is too old for '
f'the download cap date, marking to be skipped')
instance.skip = True
cap_changed = True
if filter_text:
if not instance.source.is_regex_match(instance.title):
#media doesn't match the filter text but is not marked to be skipped
log.info(f'Media: {instance.source} / {instance} does not match the filter text')
instance.skip = True
cap_changed = True

if instance.skip:
# Media marked to be skipped but source download cap removed
if filter_text:
if instance.source.is_regex_match(instance.title):
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date and title filter, marking to be unskipped')
instance.skip = False
cap_changed = True
else:
log.info(f'Media: {instance.source} / {instance} has a valid publishing date '
f'but failed the title filter match, already marked skipped')
else:
log.debug(f'Media: {instance.source} / {instance} has a valid publishing date and '
f'is already marked as not to be skipped')

cap_changed = False
# Recalculate the "can_download" flag, this may
# need to change if the source specifications have been changed
if instance.metadata:
Expand Down
2 changes: 0 additions & 2 deletions tubesync/sync/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,9 @@ def download_media_metadata(media_id):
log.error(f'Task download_media_metadata(pk={media_id}) called but no '
f'media exists with ID: {media_id}')
return

if media.manual_skip:
log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.')
return

source = media.source
metadata = media.index_metadata()
media.metadata = json.dumps(metadata, default=json_serial)
Expand Down

0 comments on commit e54a762

Please sign in to comment.