Skip to content

Commit

Permalink
satellite/satellitedb: don't remove offline nodes from containment
Browse files Browse the repository at this point in the history
When audits are being recorded, we automatically add some SQL to remove
the node from the pending audits table in case it exists. They are
removed from pending audits even if the node was offline for the audit.
This is not the correct behavior.

Add statement to record audit results in reverify tests to ensure no
more false positives.

Change-Id: I186ae68bc5e7962ef6c5defbebc1d95e63596a17
  • Loading branch information
cam-a authored and Cameron Ayer committed May 3, 2021
1 parent 0084164 commit bb343d9
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
24 changes: 24 additions & 0 deletions satellite/audit/reverify_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,9 @@ func TestReverifyOffline(t *testing.T) {
require.Len(t, report.Offlines, 1)
require.Equal(t, report.Offlines[0], pieces[0].StorageNode)

_, err = audits.Reporter.RecordAudits(ctx, report)
require.NoError(t, err)

// make sure that pending audit is not removed
containment := satellite.DB.Containment()
_, err = containment.Get(ctx, pending.NodeID)
Expand Down Expand Up @@ -437,6 +440,9 @@ func TestReverifyOfflineDialTimeout(t *testing.T) {
require.Len(t, report.Offlines, 1)
require.Equal(t, report.Offlines[0], pending.NodeID)

_, err = audits.Reporter.RecordAudits(ctx, report)
require.NoError(t, err)

// make sure that pending audit is not removed
containment := satellite.DB.Containment()
_, err = containment.Get(ctx, pending.NodeID)
Expand Down Expand Up @@ -530,6 +536,9 @@ func TestReverifyDeletedSegment(t *testing.T) {
assert.Empty(t, report.Successes)
assert.Empty(t, report.PendingAudits)

_, err = audits.Reporter.RecordAudits(ctx, report)
require.NoError(t, err)

// expect that the node was removed from containment since the segment it was contained for has been deleted
_, err = containment.Get(ctx, nodeID)
require.True(t, audit.ErrContainedNotFound.Has(err))
Expand Down Expand Up @@ -626,6 +635,9 @@ func TestReverifyModifiedSegment(t *testing.T) {
assert.Empty(t, report.Successes)
assert.Empty(t, report.PendingAudits)

_, err = audits.Reporter.RecordAudits(ctx, report)
require.NoError(t, err)

// expect that the node was removed from containment since the segment it was contained for has been changed
_, err = containment.Get(ctx, nodeID)
require.True(t, audit.ErrContainedNotFound.Has(err))
Expand Down Expand Up @@ -714,6 +726,9 @@ func TestReverifyReplacedSegment(t *testing.T) {
assert.Empty(t, report.Successes)
assert.Empty(t, report.PendingAudits)

_, err = audits.Reporter.RecordAudits(ctx, report)
require.NoError(t, err)

// expect that the node was removed from containment since the segment it was contained for has been changed
_, err = containment.Get(ctx, nodeID)
require.True(t, audit.ErrContainedNotFound.Has(err))
Expand Down Expand Up @@ -877,6 +892,9 @@ func TestReverifyExpired1(t *testing.T) {
report, err := audits.Verifier.Reverify(ctx, queueSegment)
require.NoError(t, err)

_, err = audits.Reporter.RecordAudits(ctx, report)
require.NoError(t, err)

assert.Len(t, report.Successes, 0)
assert.Len(t, report.Fails, 0)
assert.Len(t, report.Offlines, 0)
Expand Down Expand Up @@ -998,6 +1016,9 @@ func TestReverifyExpired2(t *testing.T) {
require.Len(t, report.PendingAudits, 0)
require.Len(t, report.Fails, 0)

_, err = audits.Reporter.RecordAudits(ctx, report)
require.NoError(t, err)

// Reverify should remove the node from containment mode
_, err = containment.Get(ctx, pending.NodeID)
require.True(t, audit.ErrContainedNotFound.Has(err))
Expand Down Expand Up @@ -1092,6 +1113,9 @@ func TestReverifySlowDownload(t *testing.T) {
require.Len(t, report.Unknown, 0)
require.Equal(t, report.PendingAudits[0].NodeID, slowNode)

_, err = audits.Reporter.RecordAudits(ctx, report)
require.NoError(t, err)

_, err = containment.Get(ctx, slowNode)
require.NoError(t, err)
})
Expand Down
10 changes: 7 additions & 3 deletions satellite/satellitedb/overlaycache.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ func (cache *overlaycache) BatchUpdateStats(ctx context.Context, updateRequests

updateNodeStats := cache.populateUpdateNodeStats(dbNode, updateReq, auditHistoryResponse, now)

sql := buildUpdateStatement(updateNodeStats)
sql := buildUpdateStatement(updateNodeStats, isUp)

allSQL += sql
}
Expand Down Expand Up @@ -1137,7 +1137,7 @@ func updateReputation(isSuccess bool, alpha, beta, lambda, w float64, totalCount
return newAlpha, newBeta, totalCount + 1
}

func buildUpdateStatement(update updateNodeStats) string {
func buildUpdateStatement(update updateNodeStats, isUp bool) string {
if update.NodeID.IsZero() {
return ""
}
Expand Down Expand Up @@ -1264,7 +1264,11 @@ func buildUpdateStatement(update updateNodeStats) string {
hexNodeID := hex.EncodeToString(update.NodeID.Bytes())

sql += fmt.Sprintf(" WHERE nodes.id = decode('%v', 'hex');\n", hexNodeID)
sql += fmt.Sprintf("DELETE FROM pending_audits WHERE pending_audits.node_id = decode('%v', 'hex');\n", hexNodeID)

// only remove from containment if node is online
if isUp {
sql += fmt.Sprintf("DELETE FROM pending_audits WHERE pending_audits.node_id = decode('%v', 'hex');\n", hexNodeID)
}

return sql
}
Expand Down

0 comments on commit bb343d9

Please sign in to comment.