From d680dab70fd90bbfbaef2b0b5bba8ca3bd71f33c Mon Sep 17 00:00:00 2001 From: Geoff Franks Date: Thu, 4 Feb 2016 10:42:48 -0500 Subject: [PATCH 1/3] Failed tasks/archives no longer report as successful Due to confusing WorkerUpdate struct usage, failed tasks were being `FAILED`, then subsequently `STOPPED`, to set the time of job completion. `STOPPED` also sets status to `done`. `FAILED` already sets the completion time to now. The `FAILED` op has been updated to be more explicit about setting completion time, and moved to the end of the worker runloop, so you either have success, or failure. Additionally, archives were not being invalidated, as the logic for that would only be triggered if the creation of an archive in the database failed. This has been addressed to happen only if the archive database record was successfully created. Fixes #79 --- supervisor/supervisor.go | 3 ++- supervisor/worker.go | 13 ++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/supervisor/supervisor.go b/supervisor/supervisor.go index c70235746..0e5fb5f45 100644 --- a/supervisor/supervisor.go +++ b/supervisor/supervisor.go @@ -268,7 +268,7 @@ func (s *Supervisor) Run() error { case FAILED: log.Warnf(" %s: task failed!", u.Task) - if err := s.Database.FailTask(u.Task, time.Now()); err != nil { + if err := s.Database.FailTask(u.Task, u.StoppedAt); err != nil { log.Errorf(" %s: !! failed to update database - %s", u.Task, err) } @@ -282,6 +282,7 @@ func (s *Supervisor) Run() error { log.Infof(" %s: restore key is %s", u.Task, u.Output) if id, err := s.Database.CreateTaskArchive(u.Task, u.Output, time.Now()); err != nil { log.Errorf(" %s: !! failed to update database - %s", u.Task, err) + } else { if !u.TaskSuccess { s.Database.InvalidateArchive(id) } diff --git a/supervisor/worker.go b/supervisor/worker.go index a08320a5e..7b2cd4a27 100644 --- a/supervisor/worker.go +++ b/supervisor/worker.go @@ -117,7 +117,6 @@ func worker(id uint, privateKeyFile string, work chan Task, updates chan WorkerU if err != nil { updates <- WorkerUpdate{Task: t.UUID, Op: OUTPUT, Output: fmt.Sprintf("TASK FAILED!! shield worker %d failed to execute the command against the remote agent %s (%s)\n", id, remote, err)} - updates <- WorkerUpdate{Task: t.UUID, Op: FAILED} jobFailed = true } @@ -156,10 +155,14 @@ func worker(id uint, privateKeyFile string, work chan Task, updates chan WorkerU } // signal to the supervisor that we finished - updates <- WorkerUpdate{ - Task: t.UUID, - Op: STOPPED, - StoppedAt: time.Now(), + if jobFailed { + updates <- WorkerUpdate{Task: t.UUID, Op: FAILED, StoppedAt: time.Now()} + } else { + updates <- WorkerUpdate{ + Task: t.UUID, + Op: STOPPED, + StoppedAt: time.Now(), + } } } } From 8960b2df61a222912c7e36a535894a08d5bd27ec Mon Sep 17 00:00:00 2001 From: Geoff Franks Date: Thu, 4 Feb 2016 10:43:10 -0500 Subject: [PATCH 2/3] Prevent archive records without restore keys If a restore key was not parsed/detected by the worker, the task is now failed, and no archive record is created in the database. Additionally, the database is now updated to refuse to allow archives without restore keys. Fixes #55 --- db/tasks.go | 4 ++++ db/tasks_test.go | 21 ++++++++++++++++++++- supervisor/worker.go | 16 +++++++++++----- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/db/tasks.go b/db/tasks.go index 8b7f5d012..ae8ead63a 100644 --- a/db/tasks.go +++ b/db/tasks.go @@ -191,6 +191,10 @@ func (db *DB) UpdateTaskLog(id uuid.UUID, more string) error { } func (db *DB) CreateTaskArchive(id uuid.UUID, key string, effective time.Time) (uuid.UUID, error) { + // fail on empty store_key, as '' seems to satisfy the NOT NULL constraint in postgres + if key == "" { + return nil, fmt.Errorf("cannot create an archive without a store_key") + } // determine how long we need to keep this specific archive for r, err := db.Query( `SELECT r.expiry diff --git a/db/tasks_test.go b/db/tasks_test.go index 371352448..b2e22e859 100644 --- a/db/tasks_test.go +++ b/db/tasks_test.go @@ -28,6 +28,12 @@ var _ = Describe("Task Management", func() { Ω(n).Should(BeNumerically(">", 0)) } + shouldNotExist := func(q string, params ...interface{}) { + n, err := db.Count(q, params...) + Expect(err).ShouldNot(HaveOccurred()) + Expect(n).Should(BeNumerically("==", 0)) + } + BeforeEach(func() { var err error db, err = Database( @@ -184,7 +190,7 @@ var _ = Describe("Task Management", func() { Ω(db.CompleteTask(id, time.Now())).Should(Succeed()) archive_id, err := db.CreateTaskArchive(id, "SOME-KEY", time.Now()) Expect(err).ShouldNot(HaveOccurred()) - Expect(id).ShouldNot(BeNil()) + Expect(archive_id).ShouldNot(BeNil()) shouldExist(`SELECT * FROM tasks`) shouldExist(`SELECT * FROM tasks WHERE archive_uuid IS NOT NULL`) @@ -197,4 +203,17 @@ var _ = Describe("Task Management", func() { shouldExist(`SELECT * FROM archives WHERE taken_at IS NOT NULL`) shouldExist(`SELECT * FROM archives WHERE expires_at IS NOT NULL`) }) + It("Fails to associate archives with a task, when no restore key is present", func() { + id, err := db.CreateBackupTask("bob", JOB_UUID) + Expect(err).ShouldNot(HaveOccurred()) + Expect(id).ShouldNot(BeNil()) + + Expect(db.StartTask(id, time.Now())).Should(Succeed()) + Expect(db.CompleteTask(id, time.Now())).Should(Succeed()) + archive_id, err := db.CreateTaskArchive(id, "", time.Now()) + Expect(err).Should(HaveOccurred()) + Expect(archive_id).Should(BeNil()) + + shouldNotExist(`SELECT * from archives where store_key = ''`) + }) }) diff --git a/supervisor/worker.go b/supervisor/worker.go index 7b2cd4a27..b429d43c5 100644 --- a/supervisor/worker.go +++ b/supervisor/worker.go @@ -137,11 +137,17 @@ func worker(id uint, privateKeyFile string, work chan Task, updates chan WorkerU Output: fmt.Sprintf("WORKER FAILED!! shield worker %d failed to parse JSON response from remote agent %s (%s)\n", id, remote, err)} } else { - updates <- WorkerUpdate{ - Task: t.UUID, - Op: RESTORE_KEY, - TaskSuccess: !jobFailed, - Output: v.Key, + if v.Key != "" { + updates <- WorkerUpdate{ + Task: t.UUID, + Op: RESTORE_KEY, + TaskSuccess: !jobFailed, + Output: v.Key, + } + } else { + updates <- WorkerUpdate{Task: t.UUID, Op: OUTPUT, + Output: fmt.Sprintf("TASK FAILED!! No restore key detected in worker %d. Cowardly refusing to create an archive record", id)} + jobFailed = true } } } From e69c4d11e0068668d93e6e10f626d2b866625257 Mon Sep 17 00:00:00 2001 From: Geoff Franks Date: Thu, 4 Feb 2016 12:09:10 -0500 Subject: [PATCH 3/3] Force task failures on failed json parsing of store_key --- supervisor/worker.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/supervisor/worker.go b/supervisor/worker.go index b429d43c5..a867bfe72 100644 --- a/supervisor/worker.go +++ b/supervisor/worker.go @@ -133,6 +133,7 @@ func worker(id uint, privateKeyFile string, work chan Task, updates chan WorkerU err := dec.Decode(&v) if err != nil { + jobFailed = true updates <- WorkerUpdate{Task: t.UUID, Op: OUTPUT, Output: fmt.Sprintf("WORKER FAILED!! shield worker %d failed to parse JSON response from remote agent %s (%s)\n", id, remote, err)} @@ -145,9 +146,9 @@ func worker(id uint, privateKeyFile string, work chan Task, updates chan WorkerU Output: v.Key, } } else { + jobFailed = true updates <- WorkerUpdate{Task: t.UUID, Op: OUTPUT, Output: fmt.Sprintf("TASK FAILED!! No restore key detected in worker %d. Cowardly refusing to create an archive record", id)} - jobFailed = true } } }