Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements command to change a job location. #739

Merged
merged 7 commits into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/
- Document how to ignore whitespace changes (PR#707, by Paulo Magalhaes)
- `shell` reporter: Call a script or program when chanegs are detected (fixes #650)
- New `separate` configuration option for reporters to split reports into one-per-job (contributed by Ryne Everett)
- `--change-location` option allowing job location to be changed without losing job history (#739, by trevorshannon)

### Changed

Expand Down
13 changes: 13 additions & 0 deletions docs/source/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,19 @@ the URLs, like this:
- grep: "Thing B"


Updating a URL and keeping past history
---------------------------------------

Job history is stored based on the value of the ``url`` parameter, so updating
a job's URL in the configuration file ``urls.yaml`` will create a new job with
no history. Retain history by using ``--change-location``::

urlwatch --change-location http://example.org#old http://example.org#new

The command also works with Browser and Shell jobs, changing ``navigate`` and
``command`` respectively.


Running a subset of jobs
------------------------

Expand Down
3 changes: 3 additions & 0 deletions docs/source/manpage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ job list management:
--delete JOB
delete job by location or index

--change_location JOB NEW_LOCATION
change the location of an existing job by location or index

--test-filter JOB
test filter output of job by location or index

Expand Down
29 changes: 28 additions & 1 deletion lib/urlwatch/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,31 @@ def modify_urls(self):
print('Adding %r' % (job,))
self.urlwatcher.jobs.append(job)

if self.urlwatch_config.change_location is not None:
new_loc = self.urlwatch_config.change_location[1]
# Ensure the user isn't overwriting an existing job with the change.
if new_loc in (j.get_location() for j in self.urlwatcher.jobs):
print(f'The new location "{new_loc}" already exists for a job. '
'Delete the existing job or choose a different value.')
save = False
else:
job = self._find_job(self.urlwatch_config.change_location[0])
if job is not None:
# Update the job's location (which will also update the
# guid) and move any history in the cache over to the job's
# updated guid.
print(f'Moving location of {job!r} to "{new_loc}"')
old_guid = job.get_guid()
old_loc = job.get_location()
job.set_base_location(new_loc)
num_moved = self.urlwatcher.cache_storage.move(
old_guid, job.get_guid())
if num_moved:
print(f'Moved {num_moved} snapshots of "{old_loc}" to "{new_loc}"')
else:
print(f'Not found: {self.urlwatch_config.change_location[0]}')
save = False

if save:
self.urlwatcher.urls_storage.save(self.urlwatcher.jobs)

Expand All @@ -235,7 +260,9 @@ def handle_actions(self):
sys.exit(self.dump_history(self.urlwatch_config.dump_history))
if self.urlwatch_config.list:
sys.exit(self.list_urls())
if self.urlwatch_config.add is not None or self.urlwatch_config.delete is not None:
if (self.urlwatch_config.add is not None
or self.urlwatch_config.delete is not None
or self.urlwatch_config.change_location is not None):
sys.exit(self.modify_urls())

def check_edit_config(self):
Expand Down
1 change: 1 addition & 0 deletions lib/urlwatch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def parse_args(self, cmdline_args):
group.add_argument('--list', action='store_true', help='list jobs')
group.add_argument('--add', metavar='JOB', help='add job (key1=value1,key2=value2,...)')
group.add_argument('--delete', metavar='JOB', help='delete job by location or index')
group.add_argument('--change-location', metavar=('JOB', 'NEW_LOCATION'), nargs=2, help='change the location of an existing job by location or index')
group.add_argument('--test-filter', metavar='JOB', help='test filter output of job by location or index')
group.add_argument('--test-diff-filter', metavar='JOB',
help='test diff filter output of job by location or index (needs at least 2 snapshots)')
Expand Down
12 changes: 12 additions & 0 deletions lib/urlwatch/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ def job_documentation(cls):
def get_location(self):
raise NotImplementedError()

def set_base_location(self):
raise NotImplementedError()

def pretty_name(self):
raise NotImplementedError()

Expand Down Expand Up @@ -212,6 +215,9 @@ class ShellJob(Job):
def get_location(self):
return self.user_visible_url or self.command

def set_base_location(self, location):
self.command = location

def retrieve(self, job_state):
if not self.stderr or self.stderr == 'ignore':
# Report stderr output for non-zero exit code,
Expand Down Expand Up @@ -259,6 +265,9 @@ class UrlJob(Job):
def get_location(self):
return self.user_visible_url or self.url

def set_base_location(self, location):
self.url = location

def retrieve(self, job_state):
headers = {
'User-agent': urlwatch.__user_agent__,
Expand Down Expand Up @@ -402,6 +411,9 @@ class BrowserJob(Job):
def get_location(self):
return self.user_visible_url or self.navigate

def set_base_location(self, location):
self.navigate = location

def main_thread_enter(self):
from .browser import BrowserContext
self.ctx = BrowserContext()
Expand Down
33 changes: 33 additions & 0 deletions lib/urlwatch/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,10 @@ def delete(self, guid):
def clean(self, guid, retain_limit=1):
...

@abstractmethod
def move(self, guid, new_guid):
...

def backup(self):
for guid in self.get_guids():
data, timestamp, tries, etag = self.load(None, guid)
Expand Down Expand Up @@ -530,6 +534,12 @@ def clean(self, guid, retain_limit=1):
# We only store the latest version, no need to clean
return 0

def move(self, guid, new_guid):
if guid == new_guid:
return 0
os.rename(self._get_filename(guid), self._get_filename(new_guid))
return 1


class CacheEntry(minidb.Model):
guid = str
Expand Down Expand Up @@ -609,6 +619,19 @@ def clean(self, guid, retain_limit=1):

return 0

def move(self, guid, new_guid):
total_moved = 0
if guid != new_guid:
# Note if there are existing records with 'new_guid', they will
# not be overwritten and the job histories will be merged.
for entry in CacheEntry.load(self.db, CacheEntry.c.guid == guid):
entry.guid = new_guid
entry.save()
total_moved += 1
self.db.commit()

return total_moved


class CacheRedisStorage(CacheStorage):
def __init__(self, filename):
Expand Down Expand Up @@ -678,3 +701,13 @@ def clean(self, guid, retain_limit=1):
return i - self.db.llen(key)

return 0

def move(self, guid, new_guid):
if guid == new_guid:
return 0
key = self._make_key(guid)
new_key = self._make_key(new_guid)
# Note if a list with 'new_key' already exists, the data stored there
# will be overwritten.
self.db.rename(key, new_key)
return self.db.llen(new_key)