Skip to content

Commit

Permalink
Fix panic in VTOrc (vitessio#10519)
Browse files Browse the repository at this point in the history
* test: reproduce the panic as a unit test

Signed-off-by: Manan Gupta <manan@planetscale.com>

* feat: check ev is not nil before using its fields

Signed-off-by: Manan Gupta <manan@planetscale.com>

* feat: increase timeout of LockShard and wait replicas in VTOrc default config

Signed-off-by: Manan Gupta <manan@planetscale.com>
  • Loading branch information
GuptaManan100 committed Jun 22, 2022
1 parent 3198c87 commit 538ec19
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 5 deletions.
4 changes: 2 additions & 2 deletions go/vt/orchestrator/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,8 @@ func newConfiguration() *Configuration {
WebMessage: "",
MaxConcurrentReplicaOperations: 5,
InstanceDBExecContextTimeoutSeconds: 30,
LockShardTimeoutSeconds: 1,
WaitReplicasTimeoutSeconds: 1,
LockShardTimeoutSeconds: 30,
WaitReplicasTimeoutSeconds: 30,
}
}

Expand Down
6 changes: 3 additions & 3 deletions go/vt/orchestrator/logic/topology_recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ func recoverDeadPrimary(ctx context.Context, analysisEntry inst.ReplicationAnaly

// We should refresh the tablet information again to update our information.
RefreshTablets(true /* forceRefresh */)
if ev.NewPrimary != nil {
if ev != nil && ev.NewPrimary != nil {
promotedReplica, _, _ = inst.ReadInstance(&inst.InstanceKey{
Hostname: ev.NewPrimary.MysqlHostname,
Port: int(ev.NewPrimary.MysqlPort),
Expand Down Expand Up @@ -1681,7 +1681,7 @@ func GracefulPrimaryTakeover(clusterName string, designatedKey *inst.InstanceKey
// For example, if we do not refresh the tablets forcefully and the new primary is found in the cache then its source key is not updated and this spawns off
// PrimaryHasPrimary analysis which runs ERS
RefreshTablets(true /* forceRefresh */)
if ev.NewPrimary != nil {
if ev != nil && ev.NewPrimary != nil {
promotedReplica, _, _ = inst.ReadInstance(&inst.InstanceKey{
Hostname: ev.NewPrimary.MysqlHostname,
Port: int(ev.NewPrimary.MysqlPort),
Expand Down Expand Up @@ -1775,7 +1775,7 @@ func electNewPrimary(ctx context.Context, analysisEntry inst.ReplicationAnalysis
// For example, if we do not refresh the tablets forcefully and the new primary is found in the cache then its source key is not updated and this spawns off
// PrimaryHasPrimary analysis which runs ERS
RefreshTablets(true /* forceRefresh */)
if ev.NewPrimary != nil {
if ev != nil && ev.NewPrimary != nil {
promotedReplica, _, _ = inst.ReadInstance(&inst.InstanceKey{
Hostname: ev.NewPrimary.MysqlHostname,
Port: int(ev.NewPrimary.MysqlPort),
Expand Down
43 changes: 43 additions & 0 deletions go/vt/orchestrator/logic/topology_recovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,20 @@ limitations under the License.
package logic

import (
"context"
"testing"
"time"

"github.com/patrickmn/go-cache"
"github.com/stretchr/testify/require"

"vitess.io/vitess/go/vt/orchestrator/db"
"vitess.io/vitess/go/vt/orchestrator/inst"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
"vitess.io/vitess/go/vt/topo/memorytopo"

// import the gRPC client implementation for tablet manager
_ "vitess.io/vitess/go/vt/vttablet/grpctmclient"
)

func TestAnalysisEntriesHaveSameRecovery(t *testing.T) {
Expand Down Expand Up @@ -85,3 +92,39 @@ func TestAnalysisEntriesHaveSameRecovery(t *testing.T) {
})
}
}

func TestElectNewPrimaryPanic(t *testing.T) {
orcDb, err := db.OpenOrchestrator()
require.NoError(t, err)
oldTs := ts
defer func() {
ts = oldTs
_, err = orcDb.Exec("delete from vitess_tablet")
require.NoError(t, err)
}()

tablet := &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 100,
},
Hostname: "localhost",
MysqlHostname: "localhost",
MysqlPort: 1200,
Keyspace: "ks",
Shard: "-",
Type: topodatapb.TabletType_REPLICA,
}
err = inst.SaveTablet(tablet)
require.NoError(t, err)
analysisEntry := inst.ReplicationAnalysis{
AnalyzedInstanceKey: inst.InstanceKey{
Hostname: tablet.MysqlHostname,
Port: int(tablet.MysqlPort),
},
}
ts = memorytopo.NewServer("zone1")
recoveryAttempted, _, err := electNewPrimary(context.Background(), analysisEntry, nil, false, false)
require.True(t, recoveryAttempted)
require.Error(t, err)
}

0 comments on commit 538ec19

Please sign in to comment.