New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
server, version: check and wait if cluster is incompatible #2695
Changes from all commits
f70d62e
fa19c27
4272028
7aaebfc
8489d01
629b974
92448ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,7 @@ import ( | |
"github.com/pingcap/ticdc/pkg/config" | ||
cerror "github.com/pingcap/ticdc/pkg/errors" | ||
"github.com/pingcap/ticdc/pkg/httputil" | ||
"github.com/pingcap/ticdc/pkg/retry" | ||
"github.com/pingcap/ticdc/pkg/util" | ||
"github.com/pingcap/ticdc/pkg/version" | ||
tidbkv "github.com/pingcap/tidb/kv" | ||
|
@@ -143,19 +144,6 @@ func (s *Server) Run(ctx context.Context) error { | |
return errors.Trace(err) | ||
} | ||
|
||
// To not block CDC server startup, we need to warn instead of error | ||
// when TiKV is incompatible. | ||
errorTiKVIncompatible := false | ||
for _, pdEndpoint := range s.pdEndpoints { | ||
err = version.CheckClusterVersion(ctx, s.pdClient, pdEndpoint, conf.Security, errorTiKVIncompatible) | ||
if err == nil { | ||
break | ||
} | ||
} | ||
if err != nil { | ||
return err | ||
} | ||
|
||
kv.InitWorkerPool() | ||
kvStore, err := kv.CreateTiStore(strings.Join(s.pdEndpoints, ","), conf.Security) | ||
if err != nil { | ||
|
@@ -177,6 +165,10 @@ func (s *Server) Run(ctx context.Context) error { | |
return err | ||
} | ||
|
||
// Check cluster version and wait if it's incompatible. | ||
// We start status server first to not block tiup cluster upgrading. | ||
checkAndWaitClusterVersion(ctx, s.pdClient, s.pdEndpoints, conf.Security) | ||
|
||
return s.run(ctx) | ||
} | ||
|
||
|
@@ -356,3 +348,22 @@ func findBestDataDir(candidates []string) (result string, ok bool) { | |
|
||
return result, ok | ||
} | ||
|
||
func checkAndWaitClusterVersion( | ||
ctx context.Context, pdClient pd.Client, pdEndpoints []string, security *config.SecurityConfig, | ||
) { | ||
const backoffOneSecond = 1000 | ||
retry.Do(ctx, func() error { | ||
var err error | ||
for _, pdEndpoint := range pdEndpoints { | ||
err = version.CheckClusterVersion(ctx, pdClient, pdEndpoint, security) | ||
if err == nil { | ||
break | ||
} | ||
Comment on lines
+360
to
+362
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two scenarios have logic conflicts here.
|
||
log.Warn("check version", zap.Error(err)) | ||
} | ||
return err | ||
}, retry.WithInfiniteTries(), | ||
retry.WithBackoffBaseDelay(backoffOneSecond), retry.WithBackoffMaxDelay(backoffOneSecond)) | ||
return | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -161,7 +161,7 @@ func (f factoryImpl) PdClient() (pd.Client, error) { | |
|
||
// TODO: we need to check all pd endpoint and make sure they belong to the same cluster. | ||
// See also: https://github.com/pingcap/ticdc/pull/2341#discussion_r673021305. | ||
err = version.CheckClusterVersion(ctx, pdClient, pdEndpoints[0], credential, true) | ||
err = version.CheckClusterVersion(ctx, pdClient, pdEndpoints[0], credential) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to check all PD endpoints to avoid failure when the first PD endpoint is unreachable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #2713 implements your suggestion. |
||
if err != nil { | ||
return nil, err | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,34 +26,29 @@ import ( | |
|
||
"github.com/coreos/go-semver/semver" | ||
"github.com/pingcap/kvproto/pkg/metapb" | ||
"github.com/pingcap/log" | ||
cerror "github.com/pingcap/ticdc/pkg/errors" | ||
"github.com/pingcap/ticdc/pkg/httputil" | ||
"github.com/pingcap/ticdc/pkg/security" | ||
pd "github.com/tikv/pd/client" | ||
"go.uber.org/zap" | ||
) | ||
|
||
var ( | ||
// minPDVersion is the version of the minimal compatible PD. | ||
// TODO bump 5.2.0-alpha once PD releases. | ||
minPDVersion *semver.Version = semver.New("5.1.0-alpha") | ||
// MinPDVersion is the version of the minimal compatible PD. | ||
MinPDVersion *semver.Version = semver.New("5.2.0-alpha") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we remove all the version-related global variables? I think we can check whether the major/minor version matched TiKV/PD/TiCDC. |
||
// maxPDVersion is the version of the maximum compatible PD. | ||
// Compatible versions are in [minPDVersion, maxPDVersion) | ||
// 9999.0.0 disables the check effectively in the master branch. | ||
maxPDVersion *semver.Version = semver.New("9999.0.0") | ||
|
||
// MinTiKVVersion is the version of the minimal compatible TiKV. | ||
// TODO bump 5.2.0-alpha once TiKV releases. | ||
MinTiKVVersion *semver.Version = semver.New("5.1.0-alpha") | ||
MinTiKVVersion *semver.Version = semver.New("5.2.0-alpha") | ||
// maxTiKVVersion is the version of the maximum compatible TiKV. | ||
// Compatible versions are in [MinTiKVVersion, maxTiKVVersion) | ||
// 9999.0.0 disables the check effectively in the master branch. | ||
maxTiKVVersion *semver.Version = semver.New("9999.0.0") | ||
|
||
// minTiCDCVersion is the version of the minimal compatible TiCDC. | ||
// TODO bump 5.2.0-alpha once TiCDC releases. | ||
minTiCDCVersion *semver.Version = semver.New("5.1.0-alpha") | ||
minTiCDCVersion *semver.Version = semver.New("5.2.0-alpha") | ||
// Compatible versions are in [MinTiCDCVersion, MaxTiCDCVersion) | ||
// 9999.0.0 disables the check effectively in the master branch. | ||
maxTiCDCVersion *semver.Version = semver.New("9999.0.0") | ||
|
@@ -76,14 +71,11 @@ func removeVAndHash(v string) string { | |
|
||
// CheckClusterVersion check TiKV and PD version. | ||
func CheckClusterVersion( | ||
ctx context.Context, client pd.Client, pdHTTP string, credential *security.Credential, errorTiKVIncompat bool, | ||
ctx context.Context, client pd.Client, pdHTTP string, credential *security.Credential, | ||
) error { | ||
err := CheckStoreVersion(ctx, client, 0 /* check all TiKV */) | ||
if err != nil { | ||
if errorTiKVIncompat { | ||
return err | ||
} | ||
log.Warn("check TiKV version failed", zap.Error(err)) | ||
return err | ||
} | ||
|
||
httpCli, err := httputil.NewClient(credential) | ||
|
@@ -127,10 +119,10 @@ func CheckClusterVersion( | |
return cerror.WrapError(cerror.ErrNewSemVersion, err) | ||
} | ||
|
||
minOrd := ver.Compare(*minPDVersion) | ||
minOrd := ver.Compare(*MinPDVersion) | ||
if minOrd < 0 { | ||
arg := fmt.Sprintf("PD %s is not supported, the minimal compatible version is %s", | ||
removeVAndHash(pdVer.Version), minPDVersion) | ||
removeVAndHash(pdVer.Version), MinPDVersion) | ||
return cerror.ErrVersionIncompatible.GenWithStackByArgs(arg) | ||
} | ||
maxOrd := ver.Compare(*maxPDVersion) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently
/status
API is available after status server starts, however #2691 is considering to change this behavior, which means the/status
API will be available after capture info is persisted to etcd.