Skip to content

Commit

Permalink
Add --change-location command-line switch (#739)
Browse files Browse the repository at this point in the history
  • Loading branch information
trevorshannon committed Jan 23, 2023
1 parent c53876b commit 08ddce5
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/
- Document how to ignore whitespace changes (PR#707, by Paulo Magalhaes)
- `shell` reporter: Call a script or program when chanegs are detected (fixes #650)
- New `separate` configuration option for reporters to split reports into one-per-job (contributed by Ryne Everett)
- `--change-location` option allowing job location to be changed without losing job history (#739, by trevorshannon)

### Changed

Expand Down
13 changes: 13 additions & 0 deletions docs/source/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,19 @@ the URLs, like this:
- grep: "Thing B"
Updating a URL and keeping past history
---------------------------------------

Job history is stored based on the value of the ``url`` parameter, so updating
a job's URL in the configuration file ``urls.yaml`` will create a new job with
no history. Retain history by using ``--change-location``::

urlwatch --change-location http://example.org#old http://example.org#new

The command also works with Browser and Shell jobs, changing ``navigate`` and
``command`` respectively.


Running a subset of jobs
------------------------

Expand Down
3 changes: 3 additions & 0 deletions docs/source/manpage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ job list management:
--delete JOB
delete job by location or index

--change_location JOB NEW_LOCATION
change the location of an existing job by location or index

--test-filter JOB
test filter output of job by location or index

Expand Down
29 changes: 28 additions & 1 deletion lib/urlwatch/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,31 @@ def modify_urls(self):
print('Adding %r' % (job,))
self.urlwatcher.jobs.append(job)

if self.urlwatch_config.change_location is not None:
new_loc = self.urlwatch_config.change_location[1]
# Ensure the user isn't overwriting an existing job with the change.
if new_loc in (j.get_location() for j in self.urlwatcher.jobs):
print(f'The new location "{new_loc}" already exists for a job. '
'Delete the existing job or choose a different value.')
save = False
else:
job = self._find_job(self.urlwatch_config.change_location[0])
if job is not None:
# Update the job's location (which will also update the
# guid) and move any history in the cache over to the job's
# updated guid.
print(f'Moving location of {job!r} to "{new_loc}"')
old_guid = job.get_guid()
old_loc = job.get_location()
job.set_base_location(new_loc)
num_moved = self.urlwatcher.cache_storage.move(
old_guid, job.get_guid())
if num_moved:
print(f'Moved {num_moved} snapshots of "{old_loc}" to "{new_loc}"')
else:
print(f'Not found: {self.urlwatch_config.change_location[0]}')
save = False

if save:
self.urlwatcher.urls_storage.save(self.urlwatcher.jobs)

Expand All @@ -235,7 +260,9 @@ def handle_actions(self):
sys.exit(self.dump_history(self.urlwatch_config.dump_history))
if self.urlwatch_config.list:
sys.exit(self.list_urls())
if self.urlwatch_config.add is not None or self.urlwatch_config.delete is not None:
if (self.urlwatch_config.add is not None
or self.urlwatch_config.delete is not None
or self.urlwatch_config.change_location is not None):
sys.exit(self.modify_urls())

def check_edit_config(self):
Expand Down
1 change: 1 addition & 0 deletions lib/urlwatch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def parse_args(self, cmdline_args):
group.add_argument('--list', action='store_true', help='list jobs')
group.add_argument('--add', metavar='JOB', help='add job (key1=value1,key2=value2,...)')
group.add_argument('--delete', metavar='JOB', help='delete job by location or index')
group.add_argument('--change-location', metavar=('JOB', 'NEW_LOCATION'), nargs=2, help='change the location of an existing job by location or index')
group.add_argument('--test-filter', metavar='JOB', help='test filter output of job by location or index')
group.add_argument('--test-diff-filter', metavar='JOB',
help='test diff filter output of job by location or index (needs at least 2 snapshots)')
Expand Down
12 changes: 12 additions & 0 deletions lib/urlwatch/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ def job_documentation(cls):
def get_location(self):
raise NotImplementedError()

def set_base_location(self):
raise NotImplementedError()

def pretty_name(self):
raise NotImplementedError()

Expand Down Expand Up @@ -212,6 +215,9 @@ class ShellJob(Job):
def get_location(self):
return self.user_visible_url or self.command

def set_base_location(self, location):
self.command = location

def retrieve(self, job_state):
if not self.stderr or self.stderr == 'ignore':
# Report stderr output for non-zero exit code,
Expand Down Expand Up @@ -259,6 +265,9 @@ class UrlJob(Job):
def get_location(self):
return self.user_visible_url or self.url

def set_base_location(self, location):
self.url = location

def retrieve(self, job_state):
headers = {
'User-agent': urlwatch.__user_agent__,
Expand Down Expand Up @@ -402,6 +411,9 @@ class BrowserJob(Job):
def get_location(self):
return self.user_visible_url or self.navigate

def set_base_location(self, location):
self.navigate = location

def main_thread_enter(self):
from .browser import BrowserContext
self.ctx = BrowserContext()
Expand Down
33 changes: 33 additions & 0 deletions lib/urlwatch/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,10 @@ def delete(self, guid):
def clean(self, guid, retain_limit=1):
...

@abstractmethod
def move(self, guid, new_guid):
...

def backup(self):
for guid in self.get_guids():
data, timestamp, tries, etag = self.load(None, guid)
Expand Down Expand Up @@ -530,6 +534,12 @@ def clean(self, guid, retain_limit=1):
# We only store the latest version, no need to clean
return 0

def move(self, guid, new_guid):
if guid == new_guid:
return 0
os.rename(self._get_filename(guid), self._get_filename(new_guid))
return 1


class CacheEntry(minidb.Model):
guid = str
Expand Down Expand Up @@ -609,6 +619,19 @@ def clean(self, guid, retain_limit=1):

return 0

def move(self, guid, new_guid):
total_moved = 0
if guid != new_guid:
# Note if there are existing records with 'new_guid', they will
# not be overwritten and the job histories will be merged.
for entry in CacheEntry.load(self.db, CacheEntry.c.guid == guid):
entry.guid = new_guid
entry.save()
total_moved += 1
self.db.commit()

return total_moved


class CacheRedisStorage(CacheStorage):
def __init__(self, filename):
Expand Down Expand Up @@ -678,3 +701,13 @@ def clean(self, guid, retain_limit=1):
return i - self.db.llen(key)

return 0

def move(self, guid, new_guid):
if guid == new_guid:
return 0
key = self._make_key(guid)
new_key = self._make_key(new_guid)
# Note if a list with 'new_key' already exists, the data stored there
# will be overwritten.
self.db.rename(key, new_key)
return self.db.llen(new_key)

0 comments on commit 08ddce5

Please sign in to comment.