Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
ERS should remove currently-serving shard primary from consideration
Browse files Browse the repository at this point in the history
This was in the old implementation, and was overlooked in the port to an
encapsulated struct. I've added tests as penance.

Signed-off-by: Andrew Mason <amason@slack-corp.com>
  • Loading branch information
ajm188 committed Feb 19, 2021
1 parent 4e64644 commit c5bbcc7
Show file tree
Hide file tree
Showing 2 changed files with 305 additions and 0 deletions.
42 changes: 42 additions & 0 deletions go/vt/vtctl/reparentutil/emergency_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"vitess.io/vitess/go/vt/logutil"
"vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/topotools"
"vitess.io/vitess/go/vt/topotools/events"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/vt/vttablet/tmclient"
Expand Down Expand Up @@ -277,6 +278,47 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
return vterrors.Wrapf(err, "failed to get tablet map for %v/%v: %v", keyspace, shard, err)
}

if opts.NewPrimaryAlias != nil {
primaryElectAliasStr := topoproto.TabletAliasString(opts.NewPrimaryAlias)
primaryElectTabletInfo, ok := tabletMap[primaryElectAliasStr]
if !ok {
return vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "primary-elect tablet %v is not in the shard", primaryElectAliasStr)
}

ev.NewMaster = *primaryElectTabletInfo.Tablet

if topoproto.TabletAliasEqual(shardInfo.MasterAlias, opts.NewPrimaryAlias) {
return vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "primary-elect tablet %v is already the shard primary", primaryElectAliasStr)
}
}

if shardInfo.HasMaster() {
deleteOldPrimary := true
shardPrimaryAliasStr := topoproto.TabletAliasString(shardInfo.MasterAlias)
shardPrimaryTabletInfo, ok := tabletMap[shardPrimaryAliasStr]
if ok {
delete(tabletMap, shardPrimaryAliasStr)
} else {
shardPrimaryTabletInfo, err = erp.ts.GetTablet(ctx, shardInfo.MasterAlias)
if err != nil {
erp.logger.Warningf("cannot read old primary tablet %v, won't touch it: %v", shardPrimaryAliasStr, err)
deleteOldPrimary = false
}
}

if deleteOldPrimary {
ev.OldMaster = *shardPrimaryTabletInfo.Tablet
erp.logger.Infof("deleting old primary", shardPrimaryAliasStr)

ctx, cancel := context.WithTimeout(ctx, opts.WaitReplicasTimeout)
defer cancel()

if err := topotools.DeleteTablet(ctx, erp.ts, shardPrimaryTabletInfo.Tablet); err != nil {
erp.logger.Warningf("failed to delete old primary tablet %v: %v", shardPrimaryAliasStr, err)
}
}
}

statusMap, primaryStatusMap, err := StopReplicationAndBuildStatusMaps(ctx, erp.tmc, ev, tabletMap, opts.WaitReplicasTimeout, opts.IgnoreReplicas, erp.logger)
if err != nil {
return vterrors.Wrapf(err, "failed to stop replication and build status maps: %v", err)
Expand Down
263 changes: 263 additions & 0 deletions go/vt/vtctl/reparentutil/emergency_reparenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata"
"vitess.io/vitess/go/vt/proto/vttime"
)

func TestNewEmergencyReparenter(t *testing.T) {
Expand Down Expand Up @@ -338,6 +339,268 @@ func TestEmergencyReparenter_reparentShardLocked(t *testing.T) {
},
shouldErr: false,
},
{
name: "requested primary-elect is already shard primary",
ts: memorytopo.NewServer("zone1"),
shards: []*vtctldatapb.Shard{
{
Keyspace: "testkeyspace",
Name: "-",
Shard: &topodatapb.Shard{
IsMasterServing: true,
MasterAlias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 101,
},
MasterTermStartTime: &vttime.Time{
Seconds: 100,
},
},
},
},
tablets: []*topodatapb.Tablet{
{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 100,
},
Keyspace: "testkeyspace",
Shard: "-",
},
{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 101,
},
Keyspace: "testkeyspace",
Shard: "-",
Type: topodatapb.TabletType_MASTER,
MasterTermStartTime: &vttime.Time{
Seconds: 100,
},
},
{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 102,
},
Keyspace: "testkeyspace",
Shard: "-",
},
},
keyspace: "testkeyspace",
shard: "-",
opts: EmergencyReparentOptions{
NewPrimaryAlias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 101,
},
},
shouldErr: true,
},
{
name: "shard primary is not in tablet map and ignored",
ts: memorytopo.NewServer("zone1"),
tmc: &emergencyReparenterTestTMClient{
PopulateReparentJournalResults: map[string]error{
"zone1-0000000102": nil,
},
PromoteReplicaResults: map[string]struct {
Result string
Error error
}{
"zone1-0000000102": {
Result: "ok",
Error: nil,
},
},
SetMasterResults: map[string]error{
"zone1-0000000100": nil,
"zone1-0000000101": nil,
},
StopReplicationAndGetStatusResults: map[string]struct {
Status *replicationdatapb.Status
StopStatus *replicationdatapb.StopReplicationStatus
Error error
}{
"zone1-0000000100": {
StopStatus: &replicationdatapb.StopReplicationStatus{
Before: &replicationdatapb.Status{},
After: &replicationdatapb.Status{
MasterUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562",
RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21",
},
},
},
"zone1-0000000101": {
StopStatus: &replicationdatapb.StopReplicationStatus{
Before: &replicationdatapb.Status{},
After: &replicationdatapb.Status{
MasterUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562",
RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21",
},
},
},
"zone1-0000000102": {
StopStatus: &replicationdatapb.StopReplicationStatus{
Before: &replicationdatapb.Status{},
After: &replicationdatapb.Status{
MasterUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562",
RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-26",
},
},
},
},
WaitForPositionResults: map[string]map[string]error{
"zone1-0000000100": {
"MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21": nil,
},
"zone1-0000000101": {
"MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21": nil,
},
"zone1-0000000102": {
"MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-26": nil,
},
},
},
shards: []*vtctldatapb.Shard{
{
Keyspace: "testkeyspace",
Name: "-",
Shard: &topodatapb.Shard{
IsMasterServing: true,
MasterAlias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 404,
},
MasterTermStartTime: &vttime.Time{
Seconds: 100,
},
},
},
},
tablets: []*topodatapb.Tablet{
{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 100,
},
Keyspace: "testkeyspace",
Shard: "-",
},
{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 101,
},
Keyspace: "testkeyspace",
Shard: "-",
},
{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 102,
},
Keyspace: "testkeyspace",
Shard: "-",
},
},
keyspace: "testkeyspace",
shard: "-",
opts: EmergencyReparentOptions{},
shouldErr: false,
},
{
// Here, all our (2) tablets are tied, but the current primary is
// ignored, and we promote zone1-101.
name: "shard primary is deleted from tablet map",
ts: memorytopo.NewServer("zone1"),
tmc: &emergencyReparenterTestTMClient{
PopulateReparentJournalResults: map[string]error{
"zone1-0000000101": nil,
},
PromoteReplicaResults: map[string]struct {
Result string
Error error
}{
"zone1-0000000101": {
Result: "ok",
Error: nil,
},
},
SetMasterResults: map[string]error{
"zone1-0000000100": nil,
},
StopReplicationAndGetStatusResults: map[string]struct {
Status *replicationdatapb.Status
StopStatus *replicationdatapb.StopReplicationStatus
Error error
}{
"zone1-0000000100": {
StopStatus: &replicationdatapb.StopReplicationStatus{
Before: &replicationdatapb.Status{},
After: &replicationdatapb.Status{
MasterUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562",
RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21",
},
},
},
"zone1-0000000101": {
StopStatus: &replicationdatapb.StopReplicationStatus{
Before: &replicationdatapb.Status{},
After: &replicationdatapb.Status{
MasterUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562",
RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21",
},
},
},
},
WaitForPositionResults: map[string]map[string]error{
"zone1-0000000100": {
"MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21": nil,
},
"zone1-0000000101": {
"MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21": nil,
},
},
},
shards: []*vtctldatapb.Shard{
{
Keyspace: "testkeyspace",
Name: "-",
Shard: &topodatapb.Shard{
IsMasterServing: true,
MasterAlias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 100,
},
},
},
},
tablets: []*topodatapb.Tablet{
{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 100,
},
Keyspace: "testkeyspace",
Shard: "-",
Type: topodatapb.TabletType_MASTER,
},
{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 101,
},
Keyspace: "testkeyspace",
Shard: "-",
},
},
keyspace: "testkeyspace",
shard: "-",
opts: EmergencyReparentOptions{},
shouldErr: false,
},
{
name: "shard not found",
ts: memorytopo.NewServer("zone1"),
Expand Down

0 comments on commit c5bbcc7

Please sign in to comment.