Skip to content

Commit

Permalink
backupccl: stop including historical databases in cluster backup Descs
Browse files Browse the repository at this point in the history
A previous commit attempted to fix a bug where cluster backup would not
include tables in dropped databases between incremental backups. That
fixed aimed to find dropped databases and add it to the set of
descriptors. However, this causes issues when a database is recreated
with the same name.

Rather than adding the dropped DBs to the Descriptors field on the
backup manifest, this commit updates how DescriptorChanges are populated
for cluster backups with revision history. Now, the initial scan of
descriptors as of the start time will look for all descriptors in the
cluster rather than just those that were resolved as of the end time of
the backup.

Release note (bug fix): Fix a bug where cluster revision-history backups
may have included dropped descriptors in the "current" snapshot of
descriptors on the cluster.

Release justification: bug fix. Fix a bug where cluster revision-history
backups may have included dropped descriptors in the "current" snapshot
of descriptors on the cluster.
  • Loading branch information
pbardea committed Aug 16, 2021
1 parent bab3128 commit 51ea2a5
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 68 deletions.
14 changes: 1 addition & 13 deletions pkg/ccl/backupccl/backup_planning.go
Expand Up @@ -903,19 +903,7 @@ func backupPlanHook(
}
case tree.AllDescriptors:
// Cluster backups include all of the descriptors in the cluster.
var allDescs []catalog.Descriptor
var err error
switch mvccFilter {
case MVCCFilter_All:
// Usually, revision_history backups include all previous versions of the
// descriptors at exist as of end time since they have not been GC'ed yet.
// However, since database descriptors are deleted as soon as the database
// is deleted, cluster backups need to explicitly go looking for these
// dropped descriptors up front.
allDescs, err = loadAllDescsInInterval(ctx, p.ExecCfg().Codec, p.ExecCfg().DB, startTime, endTime)
case MVCCFilter_Latest:
allDescs, err = backupresolver.LoadAllDescs(ctx, p.ExecCfg().Codec, p.ExecCfg().DB, endTime)
}
allDescs, err := backupresolver.LoadAllDescs(ctx, p.ExecCfg().Codec, p.ExecCfg().DB, endTime)
if err != nil {
return err
}
Expand Down
26 changes: 25 additions & 1 deletion pkg/ccl/backupccl/full_cluster_backup_restore_test.go
Expand Up @@ -737,6 +737,30 @@ func TestClusterRestoreFailCleanup(t *testing.T) {
})
}

// A regression test where dropped descriptors would appear in the set of
// `Descriptors`.
func TestDropDatabaseRevisionHistory(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

const numAccounts = 1
_, _, sqlDB, tempDir, cleanupFn := BackupRestoreTestSetup(t, singleNode, numAccounts, InitManualReplication)
defer cleanupFn()

sqlDB.Exec(t, `BACKUP TO $1 WITH revision_history`, LocalFoo)
sqlDB.Exec(t, `
CREATE DATABASE same_name_db;
DROP DATABASE same_name_db;
CREATE DATABASE same_name_db;
`)
sqlDB.Exec(t, `BACKUP TO $1 WITH revision_history`, LocalFoo)

_, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication, base.TestClusterArgs{})
defer cleanupEmptyCluster()
sqlDBRestore.Exec(t, `RESTORE FROM $1`, LocalFoo)
sqlDBRestore.ExpectErr(t, `database "same_name_db" already exists`, `CREATE DATABASE same_name_db`)
}

// TestClusterRevisionHistory tests that cluster backups can be taken with
// revision_history and correctly restore into various points in time.
func TestClusterRevisionHistory(t *testing.T) {
Expand All @@ -748,7 +772,7 @@ func TestClusterRevisionHistory(t *testing.T) {
check func(t *testing.T, runner *sqlutils.SQLRunner)
}

testCases := make([]testCase, 0, 6)
testCases := make([]testCase, 0)
ts := make([]string, 6)

var tc testCase
Expand Down
68 changes: 14 additions & 54 deletions pkg/ccl/backupccl/targets.go
Expand Up @@ -68,6 +68,19 @@ func getRelevantDescChanges(
// point in the interval.
interestingIDs := make(map[descpb.ID]struct{}, len(descs))

isInterestingID := func(id descpb.ID) bool {
// We're interested in changes to all descriptors if we're targeting all
// descriptors except for the system database itself.
if descriptorCoverage == tree.AllDescriptors && id != keys.SystemDatabaseID {
return true
}
// A change to an ID that we're interested in is obviously interesting.
if _, ok := interestingIDs[id]; ok {
return true
}
return false
}

// The descriptors that currently (endTime) match the target spec (desc) are
// obviously interesting to our backup.
for _, i := range descs {
Expand Down Expand Up @@ -104,7 +117,7 @@ func getRelevantDescChanges(
interestingIDs[desc.GetID()] = struct{}{}
}
}
if _, ok := interestingIDs[i.GetID()]; ok {
if isInterestingID(i.GetID()) {
desc := i
// We inject a fake "revision" that captures the starting state for
// matched descriptor, to allow restoring to times before its first rev
Expand All @@ -118,19 +131,6 @@ func getRelevantDescChanges(
}
}

isInterestingID := func(id descpb.ID) bool {
// We're interested in changes to all descriptors if we're targeting all
// descriptors except for the system database itself.
if descriptorCoverage == tree.AllDescriptors && id != keys.SystemDatabaseID {
return true
}
// A change to an ID that we're interested in is obviously interesting.
if _, ok := interestingIDs[id]; ok {
return true
}
return false
}

for _, change := range allChanges {
// A change to an ID that we are interested in is obviously interesting --
// a change is also interesting if it is to a table that has a parent that
Expand Down Expand Up @@ -210,46 +210,6 @@ func getAllDescChanges(
return res, nil
}

func loadAllDescsInInterval(
ctx context.Context, codec keys.SQLCodec, db *kv.DB, startTime, endTime hlc.Timestamp,
) ([]catalog.Descriptor, error) {
seen := make(map[descpb.ID]struct{})
allDescs := make([]catalog.Descriptor, 0)

currentDescs, err := backupresolver.LoadAllDescs(ctx, codec, db, endTime)
if err != nil {
return nil, err
}

for _, desc := range currentDescs {
if _, wasSeen := seen[desc.GetID()]; wasSeen {
continue
}
seen[desc.GetID()] = struct{}{}
allDescs = append(allDescs, desc)
}

revs, err := getAllDescChanges(ctx, codec, db, startTime, endTime, nil /* priorIDs */)
if err != nil {
return nil, err
}

for _, rev := range revs {
if rev.Desc == nil {
// rev.Desc may be nil when the descriptor was deleted in this revision.
continue
}
desc := catalogkv.NewBuilder(rev.Desc).BuildImmutable()
if _, wasSeen := seen[desc.GetID()]; wasSeen {
continue
}
seen[desc.GetID()] = struct{}{}
allDescs = append(allDescs, desc)
}

return allDescs, nil
}

// validateMultiRegionBackup validates that for all tables included in the
// backup, their parent database is also being backed up. For multi-region
// tables, we require that the parent database is included to ensure that the
Expand Down

0 comments on commit 51ea2a5

Please sign in to comment.