Skip to content

Commit

Permalink
slack-vitess-r15.0.5: fix races in Unit Test (Race) CI, fix "old"…
Browse files Browse the repository at this point in the history
… reparent CIs (#356)
  • Loading branch information
timvaillancourt committed May 20, 2024
1 parent 3e35d19 commit a2a622a
Show file tree
Hide file tree
Showing 15 changed files with 58 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -191,4 +191,8 @@ jobs:
mkdir -p /tmp/vtdataroot
source build.env
# skip TestCrossCellDurability test on v14 (as previous). It doesn't setup semi-sync the way this test (from v16) expects
export SKIPTESTCROSSCELLDURABILITY=1
eatmydata -- go run test.go -skip-build -keep-data=false -docker=false -print-log -follow -tag upgrade_downgrade_reparent
Original file line number Diff line number Diff line change
Expand Up @@ -188,4 +188,8 @@ jobs:
mkdir -p /tmp/vtdataroot
source build.env
# skip TestCrossCellDurability test on v14 (as previous). It doesn't setup semi-sync the way this test (from v16) expects
export SKIPTESTCROSSCELLDURABILITY=1
eatmydata -- go run test.go -skip-build -keep-data=false -docker=false -print-log -follow -tag upgrade_downgrade_reparent
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ require (

require (
github.com/bndr/gotabulate v1.1.2
github.com/hashicorp/go-cleanhttp v0.5.1
github.com/hashicorp/go-version v1.6.0
github.com/planetscale/log v0.0.0-20221118170849-fb599bc35c50
github.com/slok/noglog v0.2.0
Expand Down Expand Up @@ -147,7 +148,6 @@ require (
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/googleapis/gax-go/v2 v2.0.5 // indirect
github.com/hashicorp/go-cleanhttp v0.5.1 // indirect
github.com/hashicorp/go-hclog v0.12.0 // indirect
github.com/hashicorp/go-rootcerts v1.0.2 // indirect
github.com/hashicorp/golang-lru v0.5.1 // indirect
Expand Down
2 changes: 1 addition & 1 deletion go/flags/endtoend/vtbackup.txt
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ Usage of vtbackup:
--topo_consul_lock_delay duration LockDelay for consul session. (default 15s)
--topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth")
--topo_consul_lock_session_ttl string TTL for consul session.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250)
--topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100)
--topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s)
--topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30)
Expand Down
2 changes: 1 addition & 1 deletion go/flags/endtoend/vtctld.txt
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ Usage of vtctld:
--topo_consul_lock_delay duration LockDelay for consul session. (default 15s)
--topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth")
--topo_consul_lock_session_ttl string TTL for consul session.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250)
--topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100)
--topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s)
--topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30)
Expand Down
2 changes: 1 addition & 1 deletion go/flags/endtoend/vtgate.txt
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ Usage of vtgate:
--topo_consul_lock_delay duration LockDelay for consul session. (default 15s)
--topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth")
--topo_consul_lock_session_ttl string TTL for consul session.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250)
--topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100)
--topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s)
--topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30)
Expand Down
2 changes: 1 addition & 1 deletion go/flags/endtoend/vtgr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Usage of vtgr:
--topo_consul_lock_delay duration LockDelay for consul session. (default 15s)
--topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth")
--topo_consul_lock_session_ttl string TTL for consul session.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250)
--topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100)
--topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s)
--topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30)
Expand Down
2 changes: 1 addition & 1 deletion go/flags/endtoend/vtorc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Usage of vtorc:
--topo_consul_lock_delay duration LockDelay for consul session. (default 15s)
--topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth")
--topo_consul_lock_session_ttl string TTL for consul session.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250)
--topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100)
--topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s)
--topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30)
Expand Down
2 changes: 1 addition & 1 deletion go/flags/endtoend/vttablet.txt
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ Usage of vttablet:
--topo_consul_lock_delay duration LockDelay for consul session. (default 15s)
--topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth")
--topo_consul_lock_session_ttl string TTL for consul session.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250)
--topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100)
--topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s)
--topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30)
Expand Down
2 changes: 1 addition & 1 deletion go/flags/endtoend/vttestserver.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ Usage of vttestserver:
--topo_consul_lock_delay duration LockDelay for consul session. (default 15s)
--topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth")
--topo_consul_lock_session_ttl string TTL for consul session.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host.
--topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250)
--topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100)
--topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s)
--topo_zk_auth_file string auth to use when connecting to the zk topo server, file contents should be <scheme>:<auth>, e.g., digest:user:pass
Expand Down
13 changes: 13 additions & 0 deletions go/internal/flag/flag.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@ import (
"os"
"reflect"
"strings"
"sync"

flag "github.com/spf13/pflag"

"vitess.io/vitess/go/vt/log"
)

var flagsMu sync.Mutex

// Parse wraps the standard library's flag.Parse to perform some sanity checking
// and issue deprecation warnings in advance of our move to pflag.
//
Expand All @@ -44,6 +47,8 @@ import (
//
// See VEP-4, phase 1 for details: https://github.com/vitessio/enhancements/blob/c766ea905e55409cddeb666d6073cd2ac4c9783e/veps/vep-4.md#phase-1-preparation
func Parse(fs *flag.FlagSet) {
flagsMu.Lock()
defer flagsMu.Unlock()
preventGlogVFlagFromClobberingVersionFlagShorthand(fs)
fs.AddGoFlagSet(goflag.CommandLine)

Expand Down Expand Up @@ -72,6 +77,8 @@ func Parse(fs *flag.FlagSet) {

// IsFlagProvided returns if the given flag has been provided by the user explicitly or not
func IsFlagProvided(name string) bool {
flagsMu.Lock()
defer flagsMu.Unlock()
found := false
flag.Visit(func(f *flag.Flag) {
if f.Name == name {
Expand Down Expand Up @@ -171,6 +178,8 @@ func filterTestFlags() ([]string, []string) {
// handle `go test` flags correctly. We need to separately parse the test flags using goflags. Additionally flags
// like test.Short() require that goflag.Parse() is called first.
func ParseFlagsForTest() {
flagsMu.Lock()
defer flagsMu.Unlock()
// We need to split up the test flags and the regular app pflags.
// Then hand them off the std flags and pflags parsers respectively.
args, testFlags := filterTestFlags()
Expand Down Expand Up @@ -202,6 +211,8 @@ func Parsed() bool {
// standard library `flag` CommandLine. If found in the latter, it is converted
// to a pflag.Flag first. If found in neither, this function returns nil.
func Lookup(name string) *flag.Flag {
flagsMu.Lock()
defer flagsMu.Unlock()
if f := flag.Lookup(name); f != nil {
return f
}
Expand All @@ -217,6 +228,8 @@ func Lookup(name string) *flag.Flag {
// removed. If no double-dash was specified on the command-line, this is
// equivalent to flag.Args() from the standard library flag package.
func Args() (args []string) {
flagsMu.Lock()
defer flagsMu.Unlock()
doubleDashIdx := -1
for i, arg := range flag.Args() {
if arg == "--" {
Expand Down
8 changes: 8 additions & 0 deletions go/test/endtoend/reparent/plannedreparent/reparent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package plannedreparent
import (
"context"
"fmt"
"os"
"strconv"
"testing"
"time"
Expand Down Expand Up @@ -392,6 +393,10 @@ func TestReparentDoesntHangIfPrimaryFails(t *testing.T) {
// 1. When PRS is run with the cross_cell durability policy setup, then the semi-sync settings on all the tablets are as expected
// 2. Bringing up a new vttablet should have its replication and semi-sync setup correctly without any manual intervention
func TestCrossCellDurability(t *testing.T) {
if os.Getenv("SKIPTESTCROSSCELLDURABILITY") == "1" {
t.Log("skipping due to SKIPTESTCROSSCELLDURABILITY=1")
return
}
defer cluster.PanicHandler(t)
clusterInstance := utils.SetupReparentCluster(t, "cross_cell")
defer utils.TeardownCluster(clusterInstance)
Expand Down Expand Up @@ -443,6 +448,9 @@ func TestFullStatus(t *testing.T) {
require.NoError(t, err)
primaryStatus := &replicationdatapb.FullStatus{}
err = protojson.Unmarshal([]byte(primaryStatusString), primaryStatus)
if err != nil {
t.Logf("TestFullStatus got primaryStatusString: %s", string(primaryStatusString))
}
require.NoError(t, err)
assert.NotEmpty(t, primaryStatus.ServerUuid)
assert.NotEmpty(t, primaryStatus.ServerId)
Expand Down
3 changes: 3 additions & 0 deletions go/vt/servenv/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ type Exporter struct {
name, label string
handleFuncs map[string]*handleFunc
sp *statusPage
mu sync.Mutex
}

// NewExporter creates a new Exporter with name as namespace.
Expand Down Expand Up @@ -154,6 +155,8 @@ func (e *Exporter) URLPrefix() string {
// url remapped from /path to /name/path. If name is empty, the request
// is passed through to http.HandleFunc.
func (e *Exporter) HandleFunc(url string, f func(w http.ResponseWriter, r *http.Request)) {
e.mu.Lock()
defer e.mu.Unlock()
if e.name == "" {
http.HandleFunc(url, f)
return
Expand Down
23 changes: 17 additions & 6 deletions go/vt/topo/consultopo/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"time"

"github.com/hashicorp/consul/api"
"github.com/hashicorp/go-cleanhttp"
"github.com/spf13/pflag"

"vitess.io/vitess/go/vt/log"
Expand All @@ -38,25 +39,32 @@ import (

var (
consulAuthClientStaticFile string
consulConfig = api.DefaultConfig()
// serfHealth is the default check from consul
consulLockSessionChecks = "serfHealth"
consulLockSessionTTL string
consulLockDelay = 15 * time.Second
consulLockDelay = 15 * time.Second
consulMaxConnsPerHost int = 250 // do not use client default of 0/unlimited
consulMaxIdleConns int
consulIdleConnTimeout time.Duration
)

func init() {
servenv.RegisterFlagsForTopoBinaries(registerServerFlags)
}

func registerServerFlags(fs *pflag.FlagSet) {
// cleanhttp.DefaultPooledTransport() is used by the consul api client
// as an *http.Transport. We call it here just to get the default
// values the consul api client will inherit from it later.
defaultConsulPooledTransport := cleanhttp.DefaultPooledTransport()

fs.StringVar(&consulAuthClientStaticFile, "consul_auth_static_file", consulAuthClientStaticFile, "JSON File to read the topos/tokens from.")
fs.StringVar(&consulLockSessionChecks, "topo_consul_lock_session_checks", consulLockSessionChecks, "List of checks for consul session.")
fs.StringVar(&consulLockSessionTTL, "topo_consul_lock_session_ttl", consulLockSessionTTL, "TTL for consul session.")
fs.DurationVar(&consulLockDelay, "topo_consul_lock_delay", consulLockDelay, "LockDelay for consul session.")
fs.IntVar(&consulConfig.Transport.MaxConnsPerHost, "topo_consul_max_conns_per_host", consulConfig.Transport.MaxConnsPerHost, "Maximum number of consul connections per host.")
fs.IntVar(&consulConfig.Transport.MaxIdleConns, "topo_consul_max_idle_conns", consulConfig.Transport.MaxIdleConns, "Maximum number of idle consul connections.")
fs.DurationVar(&consulConfig.Transport.IdleConnTimeout, "topo_consul_idle_conn_timeout", consulConfig.Transport.IdleConnTimeout, "Maximum amount of time to pool idle connections.")
fs.IntVar(&consulMaxConnsPerHost, "topo_consul_max_conns_per_host", consulMaxConnsPerHost, "Maximum number of consul connections per host.")
fs.IntVar(&consulMaxIdleConns, "topo_consul_max_idle_conns", defaultConsulPooledTransport.MaxIdleConns, "Maximum number of idle consul connections.")
fs.DurationVar(&consulIdleConnTimeout, "topo_consul_idle_conn_timeout", defaultConsulPooledTransport.IdleConnTimeout, "Maximum amount of time to pool idle connections.")
}

// ClientAuthCred credential to use for consul clusters
Expand Down Expand Up @@ -135,8 +143,11 @@ func NewServer(cell, serverAddr, root string) (*Server, error) {
if err != nil {
return nil, err
}
cfg := consulConfig
cfg := api.DefaultConfig()
cfg.Address = serverAddr
cfg.Transport.MaxConnsPerHost = consulMaxConnsPerHost
cfg.Transport.MaxIdleConns = consulMaxIdleConns
cfg.Transport.IdleConnTimeout = consulIdleConnTimeout
if creds != nil {
if creds[cell] != nil {
cfg.Token = creds[cell].ACLToken
Expand Down
1 change: 1 addition & 0 deletions go/vt/vttest/topoctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ func (ctl *Topoctl) Setup() error {
if err != nil {
return err
}
defer topoServer.Close()

log.Infof("Creating cells if they don't exist in the provided topo server %s %s %s", ctl.TopoImplementation, ctl.TopoGlobalServerAddress, ctl.TopoGlobalRoot)
// Create cells if it doesn't exist to be idempotent. Should work when we share the same topo server across multiple local clusters.
Expand Down

0 comments on commit a2a622a

Please sign in to comment.