From 95f6ebd7a7630d4a1a92b30f41fc6c272d4276b7 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Thu, 27 Aug 2020 14:44:12 +0800 Subject: [PATCH 1/7] optimize log and make their specification consistent Signed-off-by: Ryan Leung --- client/base_client.go | 14 ++-- client/client.go | 6 +- pkg/apiutil/serverapi/middleware.go | 4 +- pkg/dashboard/adapter/manager.go | 4 +- pkg/errs/errno.go | 99 +++++++++++++++++------------ pkg/etcdutil/etcdutil.go | 10 +-- pkg/grpcutil/grpcutil.go | 10 +-- pkg/metricutil/metricutil.go | 2 +- server/election/leadership.go | 8 +-- server/kv/etcd_kv.go | 18 +++--- server/member/member.go | 21 +++--- 11 files changed, 110 insertions(+), 86 deletions(-) diff --git a/client/base_client.go b/client/base_client.go index 015aec87b1b..39eb88206e7 100644 --- a/client/base_client.go +++ b/client/base_client.go @@ -137,7 +137,7 @@ func (c *baseClient) leaderLoop() { } if err := c.updateLeader(); err != nil { - log.Error("[pd] failed updateLeader", errs.ZapError(errs.ErrUpdateLeader, err)) + log.Error("[pd] failed updateLeader", errs.ZapError(err)) } } } @@ -177,7 +177,7 @@ func (c *baseClient) initClusterID() error { members, err := c.getMembers(timeoutCtx, u) timeoutCancel() if err != nil || members.GetHeader() == nil { - log.Warn("[pd] failed to get cluster id", zap.String("url", u), errs.ZapError(errs.ErrGetClusterID, err)) + log.Warn("[pd] failed to get cluster id", zap.String("url", u), zap.Error(err)) continue } c.clusterID = members.GetHeader().GetClusterId() @@ -191,7 +191,7 @@ func (c *baseClient) updateLeader() error { ctx, cancel := context.WithTimeout(c.ctx, updateLeaderTimeout) members, err := c.getMembers(ctx, u) if err != nil { - log.Warn("[pd] cannot update leader", zap.String("address", u), errs.ZapError(errs.ErrUpdateLeader, err)) + log.Warn("[pd] cannot update leader", zap.String("address", u), zap.Error(err)) } cancel() if err != nil || members.GetLeader() == nil || len(members.GetLeader().GetClientUrls()) == 0 { @@ -205,7 +205,7 @@ func (c *baseClient) updateLeader() error { c.updateURLs(members.GetMembers()) return c.switchLeader(members.GetLeader().GetClientUrls()) } - return errors.Errorf("failed to get leader from %v", c.urls) + return errs.ErrClientGetLeader.FastGenByArgs(c.urls) } func (c *baseClient) getMembers(ctx context.Context, url string) (*pdpb.GetMembersResponse, error) { @@ -216,7 +216,7 @@ func (c *baseClient) getMembers(ctx context.Context, url string) (*pdpb.GetMembe members, err := pdpb.NewPDClient(cc).GetMembers(ctx, &pdpb.GetMembersRequest{}) if err != nil { attachErr := errors.Errorf("error:%s target:%s status:%s", err, cc.Target(), cc.GetState().String()) - return nil, errors.WithStack(attachErr) + return nil, errs.ErrClientGetMember.GenWithStackByArgs(attachErr) } return members, nil } @@ -273,13 +273,13 @@ func (c *baseClient) getOrCreateGRPCConn(addr string) (*grpc.ClientConn, error) KeyPath: c.security.KeyPath, }.ToTLSConfig() if err != nil { - return nil, errors.WithStack(err) + return nil, err } dctx, cancel := context.WithTimeout(c.ctx, dialTimeout) defer cancel() cc, err := grpcutil.GetClientConn(dctx, addr, tlsCfg, c.gRPCDialOptions...) if err != nil { - return nil, errors.WithStack(err) + return nil, err } c.connMu.Lock() defer c.connMu.Unlock() diff --git a/client/client.go b/client/client.go index 27f351dd3b5..ffdbb250a44 100644 --- a/client/client.go +++ b/client/client.go @@ -182,7 +182,7 @@ func (c *client) tsCancelLoop() { case d := <-c.tsDeadlineCh: select { case <-d.timer: - log.Error("tso request is canceled due to timeout", zap.Error(errs.ErrGetTSO.FastGenByArgs())) + log.Error("tso request is canceled due to timeout", errs.ZapError(errs.ErrClientGetTSOTimeout.FastGenByArgs())) d.cancel() case <-d.done: case <-ctx.Done(): @@ -255,7 +255,7 @@ func (c *client) tsLoop() { return default: } - log.Error("[pd] create tso stream error", errs.ZapError(errs.ErrCreateTSOStream, err)) + log.Error("[pd] create tso stream error", errs.ZapError(errs.ErrClientCreateTSOStream, err)) c.ScheduleCheckLeader() cancel() c.revokeTSORequest(errors.WithStack(err)) @@ -302,7 +302,7 @@ func (c *client) tsLoop() { return default: } - log.Error("[pd] getTS error", errs.ZapError(errs.ErrGetTSO, err)) + log.Error("[pd] getTS error", errs.ZapError(errs.ErrClientGetTSO, err)) c.ScheduleCheckLeader() cancel() stream, cancel = nil, nil diff --git a/pkg/apiutil/serverapi/middleware.go b/pkg/apiutil/serverapi/middleware.go index 64dcf5cfa8d..f6114cb1175 100644 --- a/pkg/apiutil/serverapi/middleware.go +++ b/pkg/apiutil/serverapi/middleware.go @@ -146,14 +146,14 @@ func (p *customReverseProxies) ServeHTTP(w http.ResponseWriter, r *http.Request) resp, err := p.client.Do(r) if err != nil { - log.Error("request failed", errs.ZapError(errs.ErrRequestHTTP, err)) + log.Error("request failed", errs.ZapError(errs.ErrSendRequest, err)) continue } b, err := ioutil.ReadAll(resp.Body) resp.Body.Close() if err != nil { - log.Error("read failed", errs.ZapError(errs.ErrReadHTTPBody, err)) + log.Error("read failed", errs.ZapError(errs.ErrIORead, err)) continue } diff --git a/pkg/dashboard/adapter/manager.go b/pkg/dashboard/adapter/manager.go index 8b9761d0aa4..7bb6fcc8202 100644 --- a/pkg/dashboard/adapter/manager.go +++ b/pkg/dashboard/adapter/manager.go @@ -196,7 +196,7 @@ func (m *Manager) startService() { return } if err := m.service.Start(m.ctx); err != nil { - log.Error("Can not start dashboard server", errs.ZapError(errs.ErrStartDashboard, err)) + log.Error("Can not start dashboard server", errs.ZapError(errs.ErrDashboardStart, err)) } else { log.Info("Dashboard server is started") } @@ -207,7 +207,7 @@ func (m *Manager) stopService() { return } if err := m.service.Stop(context.Background()); err != nil { - log.Error("Stop dashboard server error", errs.ZapError(errs.ErrStopDashboard, err)) + log.Error("Stop dashboard server error", errs.ZapError(errs.ErrDashboardStop, err)) } else { log.Info("Dashboard server is stopped") } diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 76bc8dde156..bd4319d81a8 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -15,6 +15,7 @@ package errs import "github.com/pingcap/errors" +// The internal error which is generated in PD project. // tso errors var ( ErrInvalidTimestamp = errors.Normalize("invalid timestamp", errors.RFCCodeText("PD:tso:ErrInvalidTimestamp")) @@ -22,31 +23,19 @@ var ( ErrIncorrectSystemTime = errors.Normalize("incorrect system time", errors.RFCCodeText("PD:tso:ErrIncorrectSystemTime")) ) -// adapter errors -var ( - ErrStartDashboard = errors.Normalize("start dashboard failed", errors.RFCCodeText("PD:adapter:ErrStartDashboard")) - ErrStopDashboard = errors.Normalize("stop dashboard failed", errors.RFCCodeText("PD:adapter:ErrStopDashboard")) -) - // member errors var ( - ErretcdLeaderNotFound = errors.Normalize("etcd leader not found", errors.RFCCodeText("PD:member:ErretcdLeaderNotFound")) - ErrGetLeader = errors.Normalize("get leader failed", errors.RFCCodeText("PD:member:ErrGetLeader")) - ErrDeleteLeaderKey = errors.Normalize("delete leader key failed", errors.RFCCodeText("PD:member:ErrDeleteLeaderKey")) - ErrLoadLeaderPriority = errors.Normalize("load leader priority failed", errors.RFCCodeText("PD:member:ErrLoadLeaderPriority")) - ErrLoadetcdLeaderPriority = errors.Normalize("load etcd leader priority failed", errors.RFCCodeText("PD:member:ErrLoadetcdLeaderPriority")) - ErrTransferetcdLeader = errors.Normalize("transfer etcd leader failed", errors.RFCCodeText("PD:member:ErrTransferetcdLeader")) - ErrWatcherCancel = errors.Normalize("watcher canceled", errors.RFCCodeText("PD:member:ErrWatcherCancel")) - ErrMarshalLeader = errors.Normalize("marshal leader failed", errors.RFCCodeText("PD:member:ErrMarshalLeader")) + ErretcdLeaderNotFound = errors.Normalize("etcd leader not found", errors.RFCCodeText("PD:member:ErretcdLeaderNotFound")) + ErrMarshalLeader = errors.Normalize("marshal leader failed", errors.RFCCodeText("PD:member:ErrMarshalLeader")) ) // client errors var ( - ErrCloseGRPCConn = errors.Normalize("close gRPC connection failed", errors.RFCCodeText("PD:client:ErrCloseGRPCConn")) - ErrUpdateLeader = errors.Normalize("update leader failed", errors.RFCCodeText("PD:client:ErrUpdateLeader")) - ErrCreateTSOStream = errors.Normalize("create TSO stream failed", errors.RFCCodeText("PD:client:ErrCreateTSOStream")) - ErrGetTSO = errors.Normalize("get TSO failed", errors.RFCCodeText("PD:client:ErrGetTSO")) - ErrGetClusterID = errors.Normalize("get cluster ID failed", errors.RFCCodeText("PD:client:ErrGetClusterID")) + ErrClientCreateTSOStream = errors.Normalize("create TSO stream failed", errors.RFCCodeText("PD:client:ErrClientCreateTSOStream")) + ErrClientGetTSOTimeout = errors.Normalize("get TSO timeout", errors.RFCCodeText("PD:client:ErrClientGetTSOTimeout")) + ErrClientGetTSO = errors.Normalize("get TSO failed", errors.RFCCodeText("PD:client:ErrClientGetTSO")) + ErrClientGetLeader = errors.Normalize("get leader from %v error", errors.RFCCodeText("PD:client:ErrClientGetLeader")) + ErrClientGetMember = errors.Normalize("get member failed", errors.RFCCodeText("PD:client:ErrClientGetMember")) ) // placement errors @@ -57,19 +46,6 @@ var ( ErrBuildRuleList = errors.Normalize("build rule list failed, %s", errors.RFCCodeText("PD:placement:ErrBuildRuleList")) ) -// kv errors -var ( - ErrEtcdKVSave = errors.Normalize("etcd KV save failed", errors.RFCCodeText("PD:kv:ErrEtcdKVSave")) - ErrEtcdKVRemove = errors.Normalize("etcd KV remove failed", errors.RFCCodeText("PD:kv:ErrEtcdKVRemove")) -) - -// apiutil errors -var ( - ErrRequestHTTP = errors.Normalize("send HTTP request failed", errors.RFCCodeText("PD:apiutil:ErrRequestHTTP")) - ErrReadHTTPBody = errors.Normalize("read HTTP body failed", errors.RFCCodeText("PD:apiutil:ErrReadHTTPBody")) - ErrWriteHTTPBody = errors.Normalize("write HTTP body failed", errors.RFCCodeText("PD:apiutil:ErrWriteHTTPBody")) -) - // cluster errors var ( ErrPersistStore = errors.Normalize("failed to persist store", errors.RFCCodeText("PD:cluster:ErrPersistStore")) @@ -79,19 +55,64 @@ var ( ErrDeleteStore = errors.Normalize("failed to delete store", errors.RFCCodeText("PD:cluster:ErrDeleteStore")) ErrPersistClusterVersion = errors.Normalize("persist cluster version meet error", errors.RFCCodeText("PD:cluster:ErrPersistClusterVersion")) ErrGetMembers = errors.Normalize("get members failed", errors.RFCCodeText("PD:cluster:ErrGetMembers")) - // TODO: ErrNewHTTPRequest may not be suitable to put in cluster category - ErrNewHTTPRequest = errors.Normalize("new HTTP request failed", errors.RFCCodeText("PD:cluster:ErrNewHTTPRequest")) ) -// metricutil errors +// grpcutil errors +var ( + ErrSecurityConfig = errors.Normalize("security config error: %s", errors.RFCCodeText("PD:grpcutil:ErrSecurityConfig")) +) + +// The third-party project error. +// url errors +var ( + ErrURLParse = errors.Normalize("parse url error", errors.RFCCodeText("PD:url:ErrURLParse")) +) + +// grpc errors +var ( + ErrGRPCDial = errors.Normalize("dial error", errors.RFCCodeText("PD:grpc:ErrGRPCDial")) + ErrCloseGRPCConn = errors.Normalize("close gRPC connection failed", errors.RFCCodeText("PD:grpc:ErrCloseGRPCConn")) +) + +// etcd errors +var ( + ErrEtcdTxn = errors.Normalize("etcd Txn failed, %s", errors.RFCCodeText("PD:etcd:ErrEtcdKVSave")) + ErrEtcdKVPut = errors.Normalize("etcd KV put failed", errors.RFCCodeText("PD:etcd:ErrEtcdKVPut")) + ErrEtcdKVDelete = errors.Normalize("etcd KV delete failed", errors.RFCCodeText("PD:etcd:ErrEtcdKVDelete")) + ErrEtcdKVGet = errors.Normalize("etcd KV get failed", errors.RFCCodeText("PD:etcd:ErrEtcdKVGet")) + ErrEtcdGetCluster = errors.Normalize("etcd get cluster from remote peer failed", errors.RFCCodeText("PD:etcd:ErrEtcdGetCluster")) + ErrEtcdMoveLeader = errors.Normalize("etcd move leader error", errors.RFCCodeText("PD:etcd:ErrEtcdMoveLeader")) + ErrEtcdTLSConfig = errors.Normalize("etcd TLS config error", errors.RFCCodeText("PD:etcd:ErrEtcdTLSConfig")) + ErrEtcdGetProtoMsgWithModRev = errors.Normalize("etcd get proto message with mod rev error", errors.RFCCodeText("PD:etcd:ErrEtcdGetProtoMsgWithModRev")) + ErrEtcdWatcherCancel = errors.Normalize("watcher canceled", errors.RFCCodeText("PD:etcd:ErrEtcdWatcherCancel")) +) + +// dashboard errors +var ( + ErrDashboardStart = errors.Normalize("start dashboard failed", errors.RFCCodeText("PD:dashboard:ErrDashboardStart")) + ErrDashboardStop = errors.Normalize("stop dashboard failed", errors.RFCCodeText("PD:dashboard:ErrDashboardStop")) +) + +// strconv errors +var ( + ErrStrconvParseInt = errors.Normalize("parse int error", errors.RFCCodeText("PD:strconv:ErrStrconvParseInt")) +) + +// prometheus errors +var ( + ErrPrometheusPushMetrics = errors.Normalize("push metrics to gateway failed", errors.RFCCodeText("PD:prometheus:ErrPrometheusPushMetrics")) +) + +// http errors var ( - ErrPushGateway = errors.Normalize("push metrics to gateway failed", errors.RFCCodeText("PD:metricutil:ErrPushGateway")) + ErrSendRequest = errors.Normalize("send HTTP request failed", errors.RFCCodeText("PD:http:ErrSendRequest")) + ErrWriteHTTPBody = errors.Normalize("write HTTP body failed", errors.RFCCodeText("PD:http:ErrWriteHTTPBody")) + ErrNewHTTPRequest = errors.Normalize("new HTTP request failed", errors.RFCCodeText("PD:http:ErrNewHTTPRequest")) ) -// etcdutil errors +// ioutil error var ( - ErrLoadValue = errors.Normalize("load value from etcd failed", errors.RFCCodeText("PD:etcdutil:ErrLoadValue")) - ErrGetCluster = errors.Normalize("get cluster from remote peer failed", errors.RFCCodeText("PD:etcdutil:ErrGetCluster")) + ErrIORead = errors.Normalize("IO read err", errors.RFCCodeText("PD:ioutil:ErrIORead")) ) // scheduler errors diff --git a/pkg/etcdutil/etcdutil.go b/pkg/etcdutil/etcdutil.go index 94b7c18efcb..8204db7b778 100644 --- a/pkg/etcdutil/etcdutil.go +++ b/pkg/etcdutil/etcdutil.go @@ -62,7 +62,7 @@ func CheckClusterID(localClusterID types.ID, um types.URLsMap, tlsConfig *tls.Co trp.CloseIdleConnections() if gerr != nil { // Do not return error, because other members may be not ready. - log.Error("failed to get cluster from remote", errs.ZapError(errs.ErrGetCluster, gerr)) + log.Error("failed to get cluster from remote", errs.ZapError(errs.ErrEtcdGetCluster, gerr)) continue } @@ -105,14 +105,14 @@ func EtcdKVGet(c *clientv3.Client, key string, opts ...clientv3.OpOption) (*clie start := time.Now() resp, err := clientv3.NewKV(c).Get(ctx, key, opts...) - if err != nil { - log.Error("load from etcd meet error", errs.ZapError(errs.ErrLoadValue, err)) - } if cost := time.Since(start); cost > DefaultSlowRequestTime { log.Warn("kv gets too slow", zap.String("request-key", key), zap.Duration("cost", cost), zap.Error(err)) } - return resp, errors.WithStack(err) + if err != nil { + return resp, errs.ErrEtcdKVGet.GenWithStackByArgs(err) + } + return resp, nil } // GetValue gets value with key from etcd. diff --git a/pkg/grpcutil/grpcutil.go b/pkg/grpcutil/grpcutil.go index 72b96bc0021..0874b6cd1cd 100644 --- a/pkg/grpcutil/grpcutil.go +++ b/pkg/grpcutil/grpcutil.go @@ -18,7 +18,7 @@ import ( "crypto/tls" "net/url" - "github.com/pingcap/errors" + "github.com/tikv/pd/pkg/errs" "go.etcd.io/etcd/pkg/transport" "google.golang.org/grpc" "google.golang.org/grpc/credentials" @@ -55,7 +55,7 @@ func (s SecurityConfig) ToTLSConfig() (*tls.Config, error) { tlsConfig, err := tlsInfo.ClientConfig() if err != nil { - return nil, errors.WithStack(err) + return nil, errs.ErrEtcdTLSConfig.GenWithStackByArgs(err) } return tlsConfig, nil } @@ -68,7 +68,7 @@ func (s SecurityConfig) GetOneAllowedCN() (string, error) { case 0: return "", nil default: - return "", errors.New("Currently only supports one CN") + return "", errs.ErrSecurityConfig.FastGenByArgs("only supports one CN") } } @@ -93,11 +93,11 @@ func GetClientConn(ctx context.Context, addr string, tlsCfg *tls.Config, do ...g } u, err := url.Parse(addr) if err != nil { - return nil, errors.WithStack(err) + return nil, errs.ErrURLParse.GenWithStackByArgs(err) } cc, err := grpc.DialContext(ctx, u.Host, append(do, opt)...) if err != nil { - return nil, errors.WithStack(err) + return nil, errs.ErrGRPCDial.GenWithStackByArgs(err) } return cc, nil } diff --git a/pkg/metricutil/metricutil.go b/pkg/metricutil/metricutil.go index a1e5d8e7846..d5de5c12ca2 100644 --- a/pkg/metricutil/metricutil.go +++ b/pkg/metricutil/metricutil.go @@ -68,7 +68,7 @@ func prometheusPushClient(job, addr string, interval time.Duration) { for { err := pusher.Push() if err != nil { - log.Error("could not push metrics to Prometheus Pushgateway", errs.ZapError(errs.ErrPushGateway, err)) + log.Error("could not push metrics to Prometheus Pushgateway", errs.ZapError(errs.ErrPrometheusPushMetrics, err)) } time.Sleep(interval) diff --git a/server/election/leadership.go b/server/election/leadership.go index c3edac16b56..b470fe5ecb9 100644 --- a/server/election/leadership.go +++ b/server/election/leadership.go @@ -34,7 +34,7 @@ func GetLeader(c *clientv3.Client, leaderPath string) (*pdpb.Member, int64, erro leader := &pdpb.Member{} ok, rev, err := etcdutil.GetProtoMsgWithModRev(c, leaderPath, leader) if err != nil { - return nil, 0, err + return nil, 0, errs.ErrEtcdGetProtoMsgWithModRev.Wrap(err).FastGenWithCause() } if !ok { return nil, 0, nil @@ -138,10 +138,10 @@ func (ls *Leadership) DeleteLeader() error { // delete leader itself and let others start a new election again. resp, err := ls.LeaderTxn().Then(clientv3.OpDelete(ls.leaderKey)).Commit() if err != nil { - return errors.WithStack(err) + return errs.ErrEtcdKVDelete.GenWithStackByArgs(err) } if !resp.Succeeded { - return errors.New("resign leader failed, we are not leader already") + return errs.ErrEtcdTxn.FastGenByArgs("not leader already") } return nil @@ -174,7 +174,7 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { zap.Int64("revision", revision), zap.String("leaderKey", ls.leaderKey), zap.String("purpose", ls.purpose), - errs.ZapError(errs.ErrWatcherCancel, wresp.Err())) + errs.ZapError(errs.ErrEtcdWatcherCancel, wresp.Err())) return } diff --git a/server/kv/etcd_kv.go b/server/kv/etcd_kv.go index 74caef41539..83e4af7ee67 100644 --- a/server/kv/etcd_kv.go +++ b/server/kv/etcd_kv.go @@ -32,10 +32,6 @@ const ( slowRequestTime = 1 * time.Second ) -var ( - errTxnFailed = errors.New("failed to commit transaction") -) - type etcdKVBase struct { client *clientv3.Client rootPath string @@ -93,11 +89,12 @@ func (kv *etcdKVBase) Save(key, value string) error { txn := NewSlowLogTxn(kv.client) resp, err := txn.Then(clientv3.OpPut(key, value)).Commit() if err != nil { - log.Error("save to etcd meet error", zap.String("key", key), zap.String("value", value), errs.ZapError(errs.ErrEtcdKVSave, err)) - return errors.WithStack(err) + err = errs.ErrEtcdKVPut.GenWithStackByArgs(err) + log.Error("save to etcd meet error", zap.String("key", key), zap.String("value", value), errs.ZapError(err)) + return err } if !resp.Succeeded { - return errors.WithStack(errTxnFailed) + return errs.ErrEtcdTxn.FastGenByArgs() } return nil } @@ -108,11 +105,12 @@ func (kv *etcdKVBase) Remove(key string) error { txn := NewSlowLogTxn(kv.client) resp, err := txn.Then(clientv3.OpDelete(key)).Commit() if err != nil { - log.Error("remove from etcd meet error", zap.String("key", key), errs.ZapError(errs.ErrEtcdKVRemove, err)) - return errors.WithStack(err) + err = errs.ErrEtcdKVDelete.GenWithStackByArgs(err) + log.Error("remove from etcd meet error", zap.String("key", key), errs.ZapError(err)) + return err } if !resp.Succeeded { - return errors.WithStack(errTxnFailed) + return errs.ErrEtcdTxn.FastGenByArgs() } return nil } diff --git a/server/member/member.go b/server/member/member.go index 845a0c94395..fc17f5b8860 100644 --- a/server/member/member.go +++ b/server/member/member.go @@ -157,14 +157,15 @@ func (m *Member) IsStillLeader() bool { // CheckLeader checks returns true if it is needed to check later. func (m *Member) CheckLeader(name string) (*pdpb.Member, int64, bool) { if m.GetEtcdLeader() == 0 { - log.Error("no etcd leader, check pd leader later", zap.Error(errs.ErretcdLeaderNotFound.FastGenByArgs())) + err := errs.ErretcdLeaderNotFound.FastGenByArgs() + log.Error("no etcd leader, check pd leader later", errs.ZapError(err)) time.Sleep(200 * time.Millisecond) return nil, 0, true } leader, rev, err := election.GetLeader(m.client, m.GetLeaderPath()) if err != nil { - log.Error("getting pd leader meets error", errs.ZapError(errs.ErrGetLeader, err)) + log.Error("getting pd leader meets error", errs.ZapError(err)) time.Sleep(200 * time.Millisecond) return nil, 0, true } @@ -174,7 +175,7 @@ func (m *Member) CheckLeader(name string) (*pdpb.Member, int64, bool) { // in previous CampaignLeader. We should delete the leadership and campaign again. log.Warn("the pd leader has not changed, delete and campaign again", zap.Stringer("old-pd-leader", leader)) if err = m.leadership.DeleteLeader(); err != nil { - log.Error("deleting pd leader key meets error", errs.ZapError(errs.ErrDeleteLeaderKey, err)) + log.Error("deleting pd leader key meets error", errs.ZapError(err)) time.Sleep(200 * time.Millisecond) return nil, 0, true } @@ -205,18 +206,18 @@ func (m *Member) CheckPriority(ctx context.Context) { } myPriority, err := m.GetMemberLeaderPriority(m.ID()) if err != nil { - log.Error("failed to load etcd leader priority", errs.ZapError(errs.ErrLoadLeaderPriority, err)) + log.Error("failed to load etcd leader priority", errs.ZapError(err)) return } leaderPriority, err := m.GetMemberLeaderPriority(etcdLeader) if err != nil { - log.Error("failed to load etcd leader priority", errs.ZapError(errs.ErrLoadetcdLeaderPriority, err)) + log.Error("failed to load etcd leader priority", errs.ZapError(err)) return } if myPriority > leaderPriority { err := m.MoveEtcdLeader(ctx, etcdLeader, m.ID()) if err != nil { - log.Error("failed to transfer etcd leader", errs.ZapError(errs.ErrTransferetcdLeader, err)) + log.Error("failed to transfer etcd leader", errs.ZapError(err)) } else { log.Info("transfer etcd leader", zap.Uint64("from", etcdLeader), @@ -229,7 +230,11 @@ func (m *Member) CheckPriority(ctx context.Context) { func (m *Member) MoveEtcdLeader(ctx context.Context, old, new uint64) error { moveCtx, cancel := context.WithTimeout(ctx, moveLeaderTimeout) defer cancel() - return errors.WithStack(m.etcd.Server.MoveLeader(moveCtx, old, new)) + err := m.etcd.Server.MoveLeader(moveCtx, old, new) + if err != nil { + return errs.ErrEtcdMoveLeader.GenWithStackByArgs(err) + } + return nil } // GetEtcdLeader returns the etcd leader ID. @@ -326,7 +331,7 @@ func (m *Member) GetMemberLeaderPriority(id uint64) (int, error) { } priority, err := strconv.ParseInt(string(res.Kvs[0].Value), 10, 32) if err != nil { - return 0, errors.WithStack(err) + return 0, errs.ErrStrconvParseInt.GenWithStackByArgs(err) } return int(priority), nil } From a621e990f866c8d9634baa3c9a781648c56a0f90 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Thu, 27 Aug 2020 19:06:09 +0800 Subject: [PATCH 2/7] change the log which contains a stack Signed-off-by: Ryan Leung --- client/base_client.go | 2 +- pkg/errs/errs_test.go | 17 ++++++++++++++++- pkg/etcdutil/etcdutil.go | 2 +- pkg/grpcutil/grpcutil.go | 6 +++--- server/election/leadership.go | 2 +- server/kv/etcd_kv.go | 4 ++-- server/member/member.go | 4 ++-- 7 files changed, 26 insertions(+), 11 deletions(-) diff --git a/client/base_client.go b/client/base_client.go index 39eb88206e7..45b226dd8d4 100644 --- a/client/base_client.go +++ b/client/base_client.go @@ -216,7 +216,7 @@ func (c *baseClient) getMembers(ctx context.Context, url string) (*pdpb.GetMembe members, err := pdpb.NewPDClient(cc).GetMembers(ctx, &pdpb.GetMembersRequest{}) if err != nil { attachErr := errors.Errorf("error:%s target:%s status:%s", err, cc.Target(), cc.GetState().String()) - return nil, errs.ErrClientGetMember.GenWithStackByArgs(attachErr) + return nil, errs.ErrClientGetMember.Wrap(attachErr).GenWithStackByCause() } return members, nil } diff --git a/pkg/errs/errs_test.go b/pkg/errs/errs_test.go index 9718b42b575..e70a1c4eb9b 100644 --- a/pkg/errs/errs_test.go +++ b/pkg/errs/errs_test.go @@ -16,6 +16,7 @@ package errs import ( "bytes" "fmt" + "strconv" "strings" "testing" @@ -89,7 +90,6 @@ func (s *testErrorSuite) TestError(c *C) { err := errors.New("test error") log.Error("test", ZapError(ErrInvalidTimestamp, err)) rfc = `[error="[PD:tso:ErrInvalidTimestamp] test error"]` - fmt.Println(lg.Message()) c.Assert(strings.Contains(lg.Message(), rfc), IsTrue) } @@ -127,3 +127,18 @@ func (s *testErrorSuite) TestZapError(c *C) { log.Info("test", ZapError(err1)) log.Info("test", ZapError(err1, err)) } + +func (s *testErrorSuite) TestErrorWithStack(c *C) { + conf := &log.Config{Level: "debug", File: log.FileLogConfig{}, DisableTimestamp: true} + lg := newZapTestLogger(conf) + log.ReplaceGlobals(lg.Logger, nil) + + _, err := strconv.ParseUint("-42", 10, 64) + log.Error("test", ZapError(ErrStrconvParseInt.Wrap(err).GenWithStackByCause())) + m1 := lg.Message() + log.Error("test", zap.Error(errors.WithStack(err))) + m2 := lg.Message() + // This test is based on line number and the first log is in line 102, the second is in line 104. + // So they have the same length stack. Move this test to another place need to change the corresponding length. + c.Assert(len(m1[strings.Index(m1, "[stack="):]), Equals, len(m2[strings.Index(m2, "[stack="):])) +} diff --git a/pkg/etcdutil/etcdutil.go b/pkg/etcdutil/etcdutil.go index 8204db7b778..ea11e678cd2 100644 --- a/pkg/etcdutil/etcdutil.go +++ b/pkg/etcdutil/etcdutil.go @@ -110,7 +110,7 @@ func EtcdKVGet(c *clientv3.Client, key string, opts ...clientv3.OpOption) (*clie } if err != nil { - return resp, errs.ErrEtcdKVGet.GenWithStackByArgs(err) + return resp, errs.ErrEtcdKVGet.Wrap(err).GenWithStackByCause() } return resp, nil } diff --git a/pkg/grpcutil/grpcutil.go b/pkg/grpcutil/grpcutil.go index 0874b6cd1cd..1f7455db0e3 100644 --- a/pkg/grpcutil/grpcutil.go +++ b/pkg/grpcutil/grpcutil.go @@ -55,7 +55,7 @@ func (s SecurityConfig) ToTLSConfig() (*tls.Config, error) { tlsConfig, err := tlsInfo.ClientConfig() if err != nil { - return nil, errs.ErrEtcdTLSConfig.GenWithStackByArgs(err) + return nil, errs.ErrEtcdTLSConfig.Wrap(err).GenWithStackByCause() } return tlsConfig, nil } @@ -93,11 +93,11 @@ func GetClientConn(ctx context.Context, addr string, tlsCfg *tls.Config, do ...g } u, err := url.Parse(addr) if err != nil { - return nil, errs.ErrURLParse.GenWithStackByArgs(err) + return nil, errs.ErrURLParse.Wrap(err).GenWithStackByCause() } cc, err := grpc.DialContext(ctx, u.Host, append(do, opt)...) if err != nil { - return nil, errs.ErrGRPCDial.GenWithStackByArgs(err) + return nil, errs.ErrGRPCDial.Wrap(err).GenWithStackByCause() } return cc, nil } diff --git a/server/election/leadership.go b/server/election/leadership.go index b470fe5ecb9..f1211ba9cfc 100644 --- a/server/election/leadership.go +++ b/server/election/leadership.go @@ -138,7 +138,7 @@ func (ls *Leadership) DeleteLeader() error { // delete leader itself and let others start a new election again. resp, err := ls.LeaderTxn().Then(clientv3.OpDelete(ls.leaderKey)).Commit() if err != nil { - return errs.ErrEtcdKVDelete.GenWithStackByArgs(err) + return errs.ErrEtcdKVDelete.Wrap(err).GenWithStackByCause() } if !resp.Succeeded { return errs.ErrEtcdTxn.FastGenByArgs("not leader already") diff --git a/server/kv/etcd_kv.go b/server/kv/etcd_kv.go index 83e4af7ee67..0a681d017c9 100644 --- a/server/kv/etcd_kv.go +++ b/server/kv/etcd_kv.go @@ -89,7 +89,7 @@ func (kv *etcdKVBase) Save(key, value string) error { txn := NewSlowLogTxn(kv.client) resp, err := txn.Then(clientv3.OpPut(key, value)).Commit() if err != nil { - err = errs.ErrEtcdKVPut.GenWithStackByArgs(err) + err = errs.ErrEtcdKVPut.Wrap(err).GenWithStackByCause() log.Error("save to etcd meet error", zap.String("key", key), zap.String("value", value), errs.ZapError(err)) return err } @@ -105,7 +105,7 @@ func (kv *etcdKVBase) Remove(key string) error { txn := NewSlowLogTxn(kv.client) resp, err := txn.Then(clientv3.OpDelete(key)).Commit() if err != nil { - err = errs.ErrEtcdKVDelete.GenWithStackByArgs(err) + err = errs.ErrEtcdKVDelete.Wrap(err).GenWithStackByCause() log.Error("remove from etcd meet error", zap.String("key", key), errs.ZapError(err)) return err } diff --git a/server/member/member.go b/server/member/member.go index fc17f5b8860..8a51fcac2ee 100644 --- a/server/member/member.go +++ b/server/member/member.go @@ -232,7 +232,7 @@ func (m *Member) MoveEtcdLeader(ctx context.Context, old, new uint64) error { defer cancel() err := m.etcd.Server.MoveLeader(moveCtx, old, new) if err != nil { - return errs.ErrEtcdMoveLeader.GenWithStackByArgs(err) + return errs.ErrEtcdMoveLeader.Wrap(err).GenWithStackByCause() } return nil } @@ -331,7 +331,7 @@ func (m *Member) GetMemberLeaderPriority(id uint64) (int, error) { } priority, err := strconv.ParseInt(string(res.Kvs[0].Value), 10, 32) if err != nil { - return 0, errs.ErrStrconvParseInt.GenWithStackByArgs(err) + return 0, errs.ErrStrconvParseInt.Wrap(err).GenWithStackByCause() } return int(priority), nil } From 903f163a68f56e3d040e9296dc0656e4e15c0fa0 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Mon, 31 Aug 2020 15:16:02 +0800 Subject: [PATCH 3/7] update the ZapError Signed-off-by: Ryan Leung --- client/base_client.go | 4 ++-- client/client.go | 2 +- pkg/errs/errno.go | 44 +++++++++++++++------------------------- pkg/errs/errs_test.go | 2 +- pkg/etcdutil/etcdutil.go | 2 +- 5 files changed, 21 insertions(+), 33 deletions(-) diff --git a/client/base_client.go b/client/base_client.go index 45b226dd8d4..64510799ad7 100644 --- a/client/base_client.go +++ b/client/base_client.go @@ -177,7 +177,7 @@ func (c *baseClient) initClusterID() error { members, err := c.getMembers(timeoutCtx, u) timeoutCancel() if err != nil || members.GetHeader() == nil { - log.Warn("[pd] failed to get cluster id", zap.String("url", u), zap.Error(err)) + log.Warn("[pd] failed to get cluster id", zap.String("url", u), errs.ZapError(err)) continue } c.clusterID = members.GetHeader().GetClusterId() @@ -191,7 +191,7 @@ func (c *baseClient) updateLeader() error { ctx, cancel := context.WithTimeout(c.ctx, updateLeaderTimeout) members, err := c.getMembers(ctx, u) if err != nil { - log.Warn("[pd] cannot update leader", zap.String("address", u), zap.Error(err)) + log.Warn("[pd] cannot update leader", zap.String("address", u), errs.ZapError(err)) } cancel() if err != nil || members.GetLeader() == nil || len(members.GetLeader().GetClientUrls()) == 0 { diff --git a/client/client.go b/client/client.go index ffdbb250a44..e1d393cfe78 100644 --- a/client/client.go +++ b/client/client.go @@ -182,7 +182,7 @@ func (c *client) tsCancelLoop() { case d := <-c.tsDeadlineCh: select { case <-d.timer: - log.Error("tso request is canceled due to timeout", errs.ZapError(errs.ErrClientGetTSOTimeout.FastGenByArgs())) + log.Error("tso request is canceled due to timeout", errs.ZapError(errs.ErrClientGetTSOTimeout)) d.cancel() case <-d.done: case <-ctx.Done(): diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index bd4319d81a8..5e07347c5f1 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -38,6 +38,17 @@ var ( ErrClientGetMember = errors.Normalize("get member failed", errors.RFCCodeText("PD:client:ErrClientGetMember")) ) +// scheduler errors +var ( + ErrGetSourceStore = errors.Normalize("failed to get the source store", errors.RFCCodeText("PD:scheduler:ErrGetSourceStore")) + ErrSchedulerExisted = errors.Normalize("scheduler existed", errors.RFCCodeText("PD:scheduler:ErrSchedulerExisted")) + ErrSchedulerNotFound = errors.Normalize("scheduler not found", errors.RFCCodeText("PD:scheduler:ErrSchedulerNotFound")) + ErrScheduleConfigNotExist = errors.Normalize("the config does not exist", errors.RFCCodeText("PD:scheduler:ErrScheduleConfigNotExist")) + ErrSchedulerConfig = errors.Normalize("wrong scheduler config %s", errors.RFCCodeText("PD:scheduler:ErrSchedulerConfig")) + ErrCacheOverflow = errors.Normalize("cache overflow", errors.RFCCodeText("PD:scheduler:ErrCacheOverflow")) + ErrInternalGrowth = errors.Normalize("unknown interval growth type error", errors.RFCCodeText("PD:scheduler:ErrInternalGrowth")) +) + // placement errors var ( ErrRuleContent = errors.Normalize("invalid rule content, %s", errors.RFCCodeText("PD:placement:ErrRuleContent")) @@ -65,7 +76,8 @@ var ( // The third-party project error. // url errors var ( - ErrURLParse = errors.Normalize("parse url error", errors.RFCCodeText("PD:url:ErrURLParse")) + ErrURLParse = errors.Normalize("parse url error", errors.RFCCodeText("PD:url:ErrURLParse")) + ErrQueryUnescape = errors.Normalize("inverse transformation of QueryEscape error", errors.RFCCodeText("PD:url:ErrQueryUnescape")) ) // grpc errors @@ -85,6 +97,7 @@ var ( ErrEtcdTLSConfig = errors.Normalize("etcd TLS config error", errors.RFCCodeText("PD:etcd:ErrEtcdTLSConfig")) ErrEtcdGetProtoMsgWithModRev = errors.Normalize("etcd get proto message with mod rev error", errors.RFCCodeText("PD:etcd:ErrEtcdGetProtoMsgWithModRev")) ErrEtcdWatcherCancel = errors.Normalize("watcher canceled", errors.RFCCodeText("PD:etcd:ErrEtcdWatcherCancel")) + ErrCloseEtcdClient = errors.Normalize("close etcd client failed", errors.RFCCodeText("PD:etcd:ErrCloseEtcdClient")) ) // dashboard errors @@ -95,7 +108,8 @@ var ( // strconv errors var ( - ErrStrconvParseInt = errors.Normalize("parse int error", errors.RFCCodeText("PD:strconv:ErrStrconvParseInt")) + ErrStrconvParseInt = errors.Normalize("parse int error", errors.RFCCodeText("PD:strconv:ErrStrconvParseInt")) + ErrStrconvParseUint = errors.Normalize("parse uint error", errors.RFCCodeText("PD:strconv:ErrStrconvParseUint")) ) // prometheus errors @@ -114,29 +128,3 @@ var ( var ( ErrIORead = errors.Normalize("IO read err", errors.RFCCodeText("PD:ioutil:ErrIORead")) ) - -// scheduler errors -var ( - ErrGetSourceStore = errors.Normalize("failed to get the source store", errors.RFCCodeText("PD:scheduler:ErrGetSourceStore")) - ErrSchedulerExisted = errors.Normalize("scheduler existed", errors.RFCCodeText("PD:scheduler:ErrSchedulerExisted")) - ErrSchedulerNotFound = errors.Normalize("scheduler not found", errors.RFCCodeText("PD:scheduler:ErrSchedulerNotFound")) - ErrScheduleConfigNotExist = errors.Normalize("the config does not exist", errors.RFCCodeText("PD:scheduler:ErrScheduleConfigNotExist")) - ErrSchedulerConfig = errors.Normalize("wrong scheduler config %s", errors.RFCCodeText("PD:scheduler:ErrSchedulerConfig")) - ErrCacheOverflow = errors.Normalize("cache overflow", errors.RFCCodeText("PD:scheduler:ErrCacheOverflow")) - ErrInternalGrowth = errors.Normalize("unknown interval growth type error", errors.RFCCodeText("PD:scheduler:ErrInternalGrowth")) -) - -// etcd errors -var ( - ErrCloseEtcdClient = errors.Normalize("close etcd client failed", errors.RFCCodeText("PD:etcd:ErrCloseEtcdClient")) -) - -// strconv errors -var ( - ErrStrconvParseUint = errors.Normalize("parse uint error", errors.RFCCodeText("PD:strconv:ErrStrconvParseUint")) -) - -// url errors -var ( - ErrQueryUnescape = errors.Normalize("inverse transformation of QueryEscape error", errors.RFCCodeText("PD:url:ErrQueryUnescape")) -) diff --git a/pkg/errs/errs_test.go b/pkg/errs/errs_test.go index e70a1c4eb9b..ad3447942f2 100644 --- a/pkg/errs/errs_test.go +++ b/pkg/errs/errs_test.go @@ -138,7 +138,7 @@ func (s *testErrorSuite) TestErrorWithStack(c *C) { m1 := lg.Message() log.Error("test", zap.Error(errors.WithStack(err))) m2 := lg.Message() - // This test is based on line number and the first log is in line 102, the second is in line 104. + // This test is based on line number and the first log is in line 141, the second is in line 142. // So they have the same length stack. Move this test to another place need to change the corresponding length. c.Assert(len(m1[strings.Index(m1, "[stack="):]), Equals, len(m2[strings.Index(m2, "[stack="):])) } diff --git a/pkg/etcdutil/etcdutil.go b/pkg/etcdutil/etcdutil.go index ea11e678cd2..a15ef99afca 100644 --- a/pkg/etcdutil/etcdutil.go +++ b/pkg/etcdutil/etcdutil.go @@ -106,7 +106,7 @@ func EtcdKVGet(c *clientv3.Client, key string, opts ...clientv3.OpOption) (*clie start := time.Now() resp, err := clientv3.NewKV(c).Get(ctx, key, opts...) if cost := time.Since(start); cost > DefaultSlowRequestTime { - log.Warn("kv gets too slow", zap.String("request-key", key), zap.Duration("cost", cost), zap.Error(err)) + log.Warn("kv gets too slow", zap.String("request-key", key), zap.Duration("cost", cost), errs.ZapError(err)) } if err != nil { From d717e5af65029fac3df5125424d46f392bf3c9a4 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Mon, 31 Aug 2020 16:18:25 +0800 Subject: [PATCH 4/7] update more errors Signed-off-by: Ryan Leung --- pkg/dashboard/adapter/manager.go | 3 +-- pkg/errs/errno.go | 12 +++++++++++- pkg/errs/errs.go | 3 +++ pkg/tempurl/check_env_linux.go | 6 +++--- plugin/scheduler_example/evict_leader.go | 4 ++-- server/cluster/cluster_worker.go | 4 ++-- server/cluster/coordinator.go | 2 +- server/core/basic_cluster.go | 3 ++- server/election/leadership.go | 2 +- server/election/lease.go | 3 ++- server/handler.go | 15 ++++++++------- server/heartbeat_streams.go | 3 ++- server/kv/etcd_kv.go | 10 +++++----- server/region_syncer/history_buffer.go | 3 ++- server/replication/replication_mode.go | 15 ++++++++------- server/schedule/checker/learner_checker.go | 4 ++-- server/schedule/checker/merge_checker.go | 3 ++- server/schedule/checker/replica_checker.go | 3 ++- server/schedule/checker/rule_checker.go | 5 +++-- server/schedule/placement/rule_manager.go | 14 +++++++------- server/schedule/region_scatterer.go | 3 ++- server/server.go | 10 +++++----- server/tso/global_allocator.go | 4 ++-- server/tso/tso.go | 2 +- 24 files changed, 79 insertions(+), 57 deletions(-) diff --git a/pkg/dashboard/adapter/manager.go b/pkg/dashboard/adapter/manager.go index 7bb6fcc8202..bfeca2223e1 100644 --- a/pkg/dashboard/adapter/manager.go +++ b/pkg/dashboard/adapter/manager.go @@ -21,7 +21,6 @@ import ( "github.com/pingcap-incubator/tidb-dashboard/pkg/apiserver" "github.com/pingcap/kvproto/pkg/pdpb" - "go.uber.org/zap" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" @@ -106,7 +105,7 @@ func (m *Manager) updateInfo() { var err error if m.members, err = cluster.GetMembers(m.srv.GetClient()); err != nil { - log.Warn("failed to get members", zap.Error(err)) + log.Warn("failed to get members", errs.ZapError(err)) m.members = nil return } diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 5e07347c5f1..37ba50c1fc7 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -126,5 +126,15 @@ var ( // ioutil error var ( - ErrIORead = errors.Normalize("IO read err", errors.RFCCodeText("PD:ioutil:ErrIORead")) + ErrIORead = errors.Normalize("IO read error", errors.RFCCodeText("PD:ioutil:ErrIORead")) +) + +// netstat error +var ( + ErrNetstatTCPSocks = errors.Normalize("TCP socks error", errors.RFCCodeText("PD:netstat:ErrNetstatTCPSocks")) +) + +// hex error +var ( + ErrHexDecodingString = errors.Normalize("decode string %s error", errors.RFCCodeText("PD:hex:ErrHexDecodingString")) ) diff --git a/pkg/errs/errs.go b/pkg/errs/errs.go index 1509b26af4d..f1b78843fc6 100644 --- a/pkg/errs/errs.go +++ b/pkg/errs/errs.go @@ -21,6 +21,9 @@ import ( // ZapError is used to make the log output eaiser. func ZapError(err error, causeError ...error) zap.Field { + if err == nil { + return zap.Skip() + } if e, ok := err.(*errors.Error); ok { if len(causeError) >= 1 { err = e.Wrap(causeError[0]).FastGenWithCause() diff --git a/pkg/tempurl/check_env_linux.go b/pkg/tempurl/check_env_linux.go index 5c97db8e584..dfe00c84cf1 100644 --- a/pkg/tempurl/check_env_linux.go +++ b/pkg/tempurl/check_env_linux.go @@ -17,13 +17,13 @@ package tempurl import ( "github.com/cakturk/go-netstat/netstat" "github.com/pingcap/log" - "go.uber.org/zap" + "github.com/tikv/pd/pkg/errs" ) func environmentCheck(addr string) bool { valid, err := checkAddr(addr[len("http://"):]) if err != nil { - log.Error("check port status failed", zap.Error(err)) + log.Error("check port status failed", errs.ZapError(err)) return false } return valid @@ -34,7 +34,7 @@ func checkAddr(addr string) (bool, error) { return s.RemoteAddr.String() == addr || s.LocalAddr.String() == addr }) if err != nil { - return false, err + return false, errs.ErrNetstatTCPSocks.Wrap(err).FastGenWithCause() } return len(tabs) < 1, nil } diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index 1cc9d4e0a68..81413bc8df4 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -23,6 +23,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" "github.com/tikv/pd/pkg/apiutil" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/schedule" "github.com/tikv/pd/server/schedule/filter" @@ -30,7 +31,6 @@ import ( "github.com/tikv/pd/server/schedule/opt" "github.com/tikv/pd/server/schedulers" "github.com/unrolled/render" - "go.uber.org/zap" ) const ( @@ -226,7 +226,7 @@ func (s *evictLeaderScheduler) Schedule(cluster opt.Cluster) []*operator.Operato } op, err := operator.CreateTransferLeaderOperator(EvictLeaderType, cluster, region, region.GetLeader().GetStoreId(), target.GetID(), operator.OpLeader) if err != nil { - log.Debug("fail to create evict leader operator", zap.Error(err)) + log.Debug("fail to create evict leader operator", errs.ZapError(err)) continue } diff --git a/server/cluster/cluster_worker.go b/server/cluster/cluster_worker.go index 4a4ce102a2e..46c49c374a0 100644 --- a/server/cluster/cluster_worker.go +++ b/server/cluster/cluster_worker.go @@ -192,7 +192,7 @@ func (c *RaftCluster) HandleReportSplit(request *pdpb.ReportSplitRequest) (*pdpb log.Warn("report split region is invalid", zap.Stringer("left-region", core.RegionToHexMeta(left)), zap.Stringer("right-region", core.RegionToHexMeta(right)), - zap.Error(err)) + errs.ZapError(err)) return nil, err } @@ -215,7 +215,7 @@ func (c *RaftCluster) HandleBatchReportSplit(request *pdpb.ReportBatchSplitReque if err != nil { log.Warn("report batch split region is invalid", zap.Stringer("region-meta", hrm), - zap.Error(err)) + errs.ZapError(err)) return nil, err } last := len(regions) - 1 diff --git a/server/cluster/coordinator.go b/server/cluster/coordinator.go index 27276e178a1..4c354a52516 100644 --- a/server/cluster/coordinator.go +++ b/server/cluster/coordinator.go @@ -645,7 +645,7 @@ func (c *coordinator) runScheduler(s *scheduleController) { case <-s.Ctx().Done(): log.Info("scheduler has been stopped", zap.String("scheduler-name", s.GetName()), - zap.Error(s.Ctx().Err())) + errs.ZapError(s.Ctx().Err())) return } } diff --git a/server/core/basic_cluster.go b/server/core/basic_cluster.go index a853e322027..e1d0369ba47 100644 --- a/server/core/basic_cluster.go +++ b/server/core/basic_cluster.go @@ -19,6 +19,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/server/schedule/storelimit" "go.uber.org/zap" @@ -329,7 +330,7 @@ func (bc *BasicCluster) PutRegion(region *RegionInfo) []*RegionInfo { func (bc *BasicCluster) CheckAndPutRegion(region *RegionInfo) []*RegionInfo { origin, err := bc.PreCheckPutRegion(region) if err != nil { - log.Debug("region is stale", zap.Error(err), zap.Stringer("origin", origin.GetMeta())) + log.Debug("region is stale", zap.Stringer("origin", origin.GetMeta()), errs.ZapError(err)) // return the state region to delete. return []*RegionInfo{region} } diff --git a/server/election/leadership.go b/server/election/leadership.go index f1211ba9cfc..a4c5a553dfc 100644 --- a/server/election/leadership.go +++ b/server/election/leadership.go @@ -101,7 +101,7 @@ func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string) error { If(clientv3.Compare(clientv3.CreateRevision(ls.leaderKey), "=", 0)). Then(clientv3.OpPut(ls.leaderKey, leaderData, clientv3.WithLease(ls.getLease().ID))). Commit() - log.Info("check campaign resp", zap.Any("resp", resp), zap.Error(err)) + log.Info("check campaign resp", zap.Any("resp", resp)) if err != nil { return errors.WithStack(err) } diff --git a/server/election/lease.go b/server/election/lease.go index a90992006c8..a249b7ae916 100644 --- a/server/election/lease.go +++ b/server/election/lease.go @@ -20,6 +20,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/etcdutil" "go.etcd.io/etcd/clientv3" "go.uber.org/zap" @@ -125,7 +126,7 @@ func (l *lease) keepAliveWorker(ctx context.Context, interval time.Duration) <-c defer cancel() res, err := l.lease.KeepAliveOnce(ctx1, l.ID) if err != nil { - log.Warn("lease keep alive failed", zap.Error(err), zap.String("purpose", l.Purpose)) + log.Warn("lease keep alive failed", zap.String("purpose", l.Purpose), errs.ZapError(err)) return } if res.TTL > 0 { diff --git a/server/handler.go b/server/handler.go index 63ede896460..ee8725f0858 100644 --- a/server/handler.go +++ b/server/handler.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/server/cluster" "github.com/tikv/pd/server/config" "github.com/tikv/pd/server/core" @@ -457,7 +458,7 @@ func (h *Handler) AddTransferLeaderOperator(regionID uint64, storeID uint64) err op, err := operator.CreateTransferLeaderOperator("admin-transfer-leader", c, region, region.GetLeader().GetStoreId(), newLeader.GetStoreId(), operator.OpAdmin) if err != nil { - log.Debug("fail to create transfer leader operator", zap.Error(err)) + log.Debug("fail to create transfer leader operator", errs.ZapError(err)) return err } if ok := c.GetOperatorController().AddOperator(op); !ok { @@ -505,7 +506,7 @@ func (h *Handler) AddTransferRegionOperator(regionID uint64, storeIDs map[uint64 op, err := operator.CreateMoveRegionOperator("admin-move-region", c, region, operator.OpAdmin, peers) if err != nil { - log.Debug("fail to create move region operator", zap.Error(err)) + log.Debug("fail to create move region operator", errs.ZapError(err)) return err } if ok := c.GetOperatorController().AddOperator(op); !ok { @@ -542,7 +543,7 @@ func (h *Handler) AddTransferPeerOperator(regionID uint64, fromStoreID, toStoreI newPeer := &metapb.Peer{StoreId: toStoreID, Role: oldPeer.GetRole()} op, err := operator.CreateMovePeerOperator("admin-move-peer", c, region, operator.OpAdmin, fromStoreID, newPeer) if err != nil { - log.Debug("fail to create move peer operator", zap.Error(err)) + log.Debug("fail to create move peer operator", errs.ZapError(err)) return err } if ok := c.GetOperatorController().AddOperator(op); !ok { @@ -588,7 +589,7 @@ func (h *Handler) AddAddPeerOperator(regionID uint64, toStoreID uint64) error { newPeer := &metapb.Peer{StoreId: toStoreID} op, err := operator.CreateAddPeerOperator("admin-add-peer", c, region, newPeer, operator.OpAdmin) if err != nil { - log.Debug("fail to create add peer operator", zap.Error(err)) + log.Debug("fail to create add peer operator", errs.ZapError(err)) return err } if ok := c.GetOperatorController().AddOperator(op); !ok { @@ -611,7 +612,7 @@ func (h *Handler) AddAddLearnerOperator(regionID uint64, toStoreID uint64) error op, err := operator.CreateAddPeerOperator("admin-add-learner", c, region, newPeer, operator.OpAdmin) if err != nil { - log.Debug("fail to create add learner operator", zap.Error(err)) + log.Debug("fail to create add learner operator", errs.ZapError(err)) return err } if ok := c.GetOperatorController().AddOperator(op); !ok { @@ -638,7 +639,7 @@ func (h *Handler) AddRemovePeerOperator(regionID uint64, fromStoreID uint64) err op, err := operator.CreateRemovePeerOperator("admin-remove-peer", c, operator.OpAdmin, region, fromStoreID) if err != nil { - log.Debug("fail to create move peer operator", zap.Error(err)) + log.Debug("fail to create move peer operator", errs.ZapError(err)) return err } if ok := c.GetOperatorController().AddOperator(op); !ok { @@ -680,7 +681,7 @@ func (h *Handler) AddMergeRegionOperator(regionID uint64, targetID uint64) error ops, err := operator.CreateMergeRegionOperator("admin-merge-region", c, region, target, operator.OpAdmin) if err != nil { - log.Debug("fail to create merge region operator", zap.Error(err)) + log.Debug("fail to create merge region operator", errs.ZapError(err)) return err } if ok := c.GetOperatorController().AddOperator(ops...); !ok { diff --git a/server/heartbeat_streams.go b/server/heartbeat_streams.go index 39caa9a3daf..aa5cd0219fd 100644 --- a/server/heartbeat_streams.go +++ b/server/heartbeat_streams.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/logutil" "github.com/tikv/pd/server/cluster" "github.com/tikv/pd/server/core" @@ -120,7 +121,7 @@ func (s *heartbeatStreams) run() { if err := stream.Send(keepAlive); err != nil { log.Warn("send keepalive message fail, store maybe disconnected", zap.Uint64("target-store-id", storeID), - zap.Error(err)) + errs.ZapError(err)) delete(s.streams, storeID) regionHeartbeatCounter.WithLabelValues(storeAddress, storeLabel, "keepalive", "err").Inc() } else { diff --git a/server/kv/etcd_kv.go b/server/kv/etcd_kv.go index 0a681d017c9..b370ff0e0e8 100644 --- a/server/kv/etcd_kv.go +++ b/server/kv/etcd_kv.go @@ -89,9 +89,9 @@ func (kv *etcdKVBase) Save(key, value string) error { txn := NewSlowLogTxn(kv.client) resp, err := txn.Then(clientv3.OpPut(key, value)).Commit() if err != nil { - err = errs.ErrEtcdKVPut.Wrap(err).GenWithStackByCause() - log.Error("save to etcd meet error", zap.String("key", key), zap.String("value", value), errs.ZapError(err)) - return err + e := errs.ErrEtcdKVPut.Wrap(err).GenWithStackByCause() + log.Error("save to etcd meet error", zap.String("key", key), zap.String("value", value), errs.ZapError(e)) + return e } if !resp.Succeeded { return errs.ErrEtcdTxn.FastGenByArgs() @@ -158,9 +158,9 @@ func (t *SlowLogTxn) Commit() (*clientv3.TxnResponse, error) { cost := time.Since(start) if cost > slowRequestTime { log.Warn("txn runs too slow", - zap.Error(err), zap.Reflect("response", resp), - zap.Duration("cost", cost)) + zap.Duration("cost", cost), + errs.ZapError(err)) } label := "success" if err != nil { diff --git a/server/region_syncer/history_buffer.go b/server/region_syncer/history_buffer.go index 0ee891b1c4e..b2984999304 100644 --- a/server/region_syncer/history_buffer.go +++ b/server/region_syncer/history_buffer.go @@ -18,6 +18,7 @@ import ( "sync" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/kv" "go.uber.org/zap" @@ -151,6 +152,6 @@ func (h *historyBuffer) persist() { regionSyncerStatus.WithLabelValues("last_index").Set(float64(h.nextIndex())) err := h.kv.Save(historyKey, strconv.FormatUint(h.nextIndex(), 10)) if err != nil { - log.Warn("persist history index failed", zap.Uint64("persist-index", h.nextIndex()), zap.Error(err)) + log.Warn("persist history index failed", zap.Uint64("persist-index", h.nextIndex()), errs.ZapError(err)) } } diff --git a/server/replication/replication_mode.go b/server/replication/replication_mode.go index dff8181d69e..ce184cf91c2 100644 --- a/server/replication/replication_mode.go +++ b/server/replication/replication_mode.go @@ -23,6 +23,7 @@ import ( pb "github.com/pingcap/kvproto/pkg/replication_modepb" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/server/config" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/schedule/opt" @@ -217,7 +218,7 @@ func (m *ModeManager) drSwitchToAsync() error { func (m *ModeManager) drSwitchToAsyncWithLock() error { id, err := m.cluster.AllocID() if err != nil { - log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), zap.Error(err)) + log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err } dr := drAutoSyncStatus{State: drStateAsync, StateID: id} @@ -225,7 +226,7 @@ func (m *ModeManager) drSwitchToAsyncWithLock() error { return err } if err := m.storage.SaveReplicationStatus(modeDRAutoSync, dr); err != nil { - log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), zap.Error(err)) + log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err } m.drAutoSync = dr @@ -242,7 +243,7 @@ func (m *ModeManager) drSwitchToSyncRecover() error { func (m *ModeManager) drSwitchToSyncRecoverWithLock() error { id, err := m.cluster.AllocID() if err != nil { - log.Warn("failed to switch to sync_recover state", zap.String("replicate-mode", modeDRAutoSync), zap.Error(err)) + log.Warn("failed to switch to sync_recover state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err } dr := drAutoSyncStatus{State: drStateSyncRecover, StateID: id, RecoverStartTime: time.Now()} @@ -250,7 +251,7 @@ func (m *ModeManager) drSwitchToSyncRecoverWithLock() error { return err } if err = m.storage.SaveReplicationStatus(modeDRAutoSync, dr); err != nil { - log.Warn("failed to switch to sync_recover state", zap.String("replicate-mode", modeDRAutoSync), zap.Error(err)) + log.Warn("failed to switch to sync_recover state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err } m.drAutoSync = dr @@ -264,7 +265,7 @@ func (m *ModeManager) drSwitchToSync() error { defer m.Unlock() id, err := m.cluster.AllocID() if err != nil { - log.Warn("failed to switch to sync state", zap.String("replicate-mode", modeDRAutoSync), zap.Error(err)) + log.Warn("failed to switch to sync state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err } dr := drAutoSyncStatus{State: drStateSync, StateID: id} @@ -272,7 +273,7 @@ func (m *ModeManager) drSwitchToSync() error { return err } if err := m.storage.SaveReplicationStatus(modeDRAutoSync, dr); err != nil { - log.Warn("failed to switch to sync state", zap.String("replicate-mode", modeDRAutoSync), zap.Error(err)) + log.Warn("failed to switch to sync state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err } m.drAutoSync = dr @@ -286,7 +287,7 @@ func (m *ModeManager) drPersistStatus(status drAutoSyncStatus) error { defer cancel() data, _ := json.Marshal(status) if err := m.fileReplicater.ReplicateFileToAllMembers(ctx, drStatusFile, data); err != nil { - log.Warn("failed to switch state", zap.String("replicate-mode", modeDRAutoSync), zap.String("new-state", status.State), zap.Error(err)) + log.Warn("failed to switch state", zap.String("replicate-mode", modeDRAutoSync), zap.String("new-state", status.State), errs.ZapError(err)) // Throw away the error to make it possible to switch to async when // primary and dr DC are disconnected. This will result in the // inability to accurately determine whether data is fully diff --git a/server/schedule/checker/learner_checker.go b/server/schedule/checker/learner_checker.go index 0167580e8b4..2034b41d2a8 100644 --- a/server/schedule/checker/learner_checker.go +++ b/server/schedule/checker/learner_checker.go @@ -15,10 +15,10 @@ package checker import ( "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/schedule/operator" "github.com/tikv/pd/server/schedule/opt" - "go.uber.org/zap" ) // LearnerChecker ensures region has a learner will be promoted. @@ -41,7 +41,7 @@ func (l *LearnerChecker) Check(region *core.RegionInfo) *operator.Operator { } op, err := operator.CreatePromoteLearnerOperator("promote-learner", l.cluster, region, p) if err != nil { - log.Debug("fail to create promote learner operator", zap.Error(err)) + log.Debug("fail to create promote learner operator", errs.ZapError(err)) return nil } return op diff --git a/server/schedule/checker/merge_checker.go b/server/schedule/checker/merge_checker.go index 3202f68f1dc..9869464f93d 100644 --- a/server/schedule/checker/merge_checker.go +++ b/server/schedule/checker/merge_checker.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/cache" "github.com/tikv/pd/pkg/codec" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/schedule/operator" "github.com/tikv/pd/server/schedule/opt" @@ -121,7 +122,7 @@ func (m *MergeChecker) Check(region *core.RegionInfo) []*operator.Operator { log.Debug("try to merge region", zap.Stringer("from", core.RegionToHexMeta(region.GetMeta())), zap.Stringer("to", core.RegionToHexMeta(target.GetMeta()))) ops, err := operator.CreateMergeRegionOperator("merge-region", m.cluster, region, target, operator.OpMerge) if err != nil { - log.Warn("create merge region operator failed", zap.Error(err)) + log.Warn("create merge region operator failed", errs.ZapError(err)) return nil } checkerCounter.WithLabelValues("merge_checker", "new-operator").Inc() diff --git a/server/schedule/checker/replica_checker.go b/server/schedule/checker/replica_checker.go index 3bfe5f14357..98b926d454a 100644 --- a/server/schedule/checker/replica_checker.go +++ b/server/schedule/checker/replica_checker.go @@ -18,6 +18,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/schedule/operator" "github.com/tikv/pd/server/schedule/opt" @@ -150,7 +151,7 @@ func (r *ReplicaChecker) checkMakeUpReplica(region *core.RegionInfo) *operator.O newPeer := &metapb.Peer{StoreId: target} op, err := operator.CreateAddPeerOperator("make-up-replica", r.cluster, region, newPeer, operator.OpReplica) if err != nil { - log.Debug("create make-up-replica operator fail", zap.Error(err)) + log.Debug("create make-up-replica operator fail", errs.ZapError(err)) return nil } return op diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 0f312e9e165..993641a5afb 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -18,6 +18,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/schedule/filter" "github.com/tikv/pd/server/schedule/operator" @@ -57,7 +58,7 @@ func (c *RuleChecker) Check(region *core.RegionInfo) *operator.Operator { for _, rf := range fit.RuleFits { op, err := c.fixRulePeer(region, fit, rf) if err != nil { - log.Debug("fail to fix rule peer", zap.Error(err), zap.String("rule-group", rf.Rule.GroupID), zap.String("rule-id", rf.Rule.ID)) + log.Debug("fail to fix rule peer", zap.String("rule-group", rf.Rule.GroupID), zap.String("rule-id", rf.Rule.ID), errs.ZapError(err)) break } if op != nil { @@ -66,7 +67,7 @@ func (c *RuleChecker) Check(region *core.RegionInfo) *operator.Operator { } op, err := c.fixOrphanPeers(region, fit) if err != nil { - log.Debug("fail to fix orphan peer", zap.Error(err)) + log.Debug("fail to fix orphan peer", errs.ZapError(err)) return nil } return op diff --git a/server/schedule/placement/rule_manager.go b/server/schedule/placement/rule_manager.go index 4fadf95b6f3..1d31af674a9 100644 --- a/server/schedule/placement/rule_manager.go +++ b/server/schedule/placement/rule_manager.go @@ -92,22 +92,22 @@ func (m *RuleManager) loadRules() error { err := m.store.LoadRules(func(k, v string) { var r Rule if err := json.Unmarshal([]byte(v), &r); err != nil { - log.Error("failed to unmarshal rule value", zap.String("rule-key", k), zap.String("rule-value", v), zap.Error(errs.ErrLoadRule.FastGenByArgs())) + log.Error("failed to unmarshal rule value", zap.String("rule-key", k), zap.String("rule-value", v), errs.ZapError(errs.ErrLoadRule)) toDelete = append(toDelete, k) return } if err := m.adjustRule(&r); err != nil { - log.Error("rule is in bad format", zap.String("rule-key", k), zap.String("rule-value", v), zap.Error(errs.ErrLoadRule.FastGenByArgs()), zap.NamedError("cause", err)) + log.Error("rule is in bad format", zap.String("rule-key", k), zap.String("rule-value", v), errs.ZapError(errs.ErrLoadRule, err)) toDelete = append(toDelete, k) return } if _, ok := m.ruleConfig.rules[r.Key()]; ok { - log.Error("duplicated rule key", zap.String("rule-key", k), zap.String("rule-value", v), zap.Error(errs.ErrLoadRule.FastGenByArgs())) + log.Error("duplicated rule key", zap.String("rule-key", k), zap.String("rule-value", v), errs.ZapError(errs.ErrLoadRule)) toDelete = append(toDelete, k) return } if k != r.StoreKey() { - log.Error("mismatch data key, need to restore", zap.String("rule-key", k), zap.String("rule-value", v), zap.Error(errs.ErrLoadRule.FastGenByArgs())) + log.Error("mismatch data key, need to restore", zap.String("rule-key", k), zap.String("rule-value", v), errs.ZapError(errs.ErrLoadRule)) toDelete = append(toDelete, k) toSave = append(toSave, &r) } @@ -133,7 +133,7 @@ func (m *RuleManager) loadGroups() error { return m.store.LoadRuleGroups(func(k, v string) { var g RuleGroup if err := json.Unmarshal([]byte(v), &g); err != nil { - log.Error("failed to unmarshal rule group", zap.String("group-id", k), zap.Error(errs.ErrLoadRuleGroup.FastGenByArgs()), zap.NamedError("cause", err)) + log.Error("failed to unmarshal rule group", zap.String("group-id", k), errs.ZapError(errs.ErrLoadRuleGroup, err)) return } m.ruleConfig.groups[g.ID] = &g @@ -145,11 +145,11 @@ func (m *RuleManager) adjustRule(r *Rule) error { var err error r.StartKey, err = hex.DecodeString(r.StartKeyHex) if err != nil { - return errs.ErrRuleContent.FastGenByArgs("start key is not hex format") + return errs.ErrHexDecodingString.FastGenByArgs(r.StartKeyHex) } r.EndKey, err = hex.DecodeString(r.EndKeyHex) if err != nil { - return errs.ErrRuleContent.FastGenByArgs("end key is not hex format") + return errs.ErrHexDecodingString.FastGenByArgs(r.EndKeyHex) } if len(r.EndKey) > 0 && bytes.Compare(r.EndKey, r.StartKey) <= 0 { return errs.ErrRuleContent.FastGenByArgs("endKey should be greater than startKey") diff --git a/server/schedule/region_scatterer.go b/server/schedule/region_scatterer.go index 3c12bef41aa..874ac938ea7 100644 --- a/server/schedule/region_scatterer.go +++ b/server/schedule/region_scatterer.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/schedule/filter" "github.com/tikv/pd/server/schedule/operator" @@ -196,7 +197,7 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo) *operator.Opera op, err := operator.CreateScatterRegionOperator("scatter-region", r.cluster, region, targetPeers, targetLeader) if err != nil { - log.Debug("fail to create scatter region operator", zap.Error(err)) + log.Debug("fail to create scatter region operator", errs.ZapError(err)) return nil } op.SetPriorityLevel(core.HighPriority) diff --git a/server/server.go b/server/server.go index d6dc42e4e0c..0a4003d4bc6 100644 --- a/server/server.go +++ b/server/server.go @@ -571,11 +571,11 @@ func (s *Server) bootstrapCluster(req *pdpb.BootstrapRequest) (*pdpb.BootstrapRe log.Info("bootstrap cluster ok", zap.Uint64("cluster-id", clusterID)) err = s.storage.SaveRegion(req.GetRegion()) if err != nil { - log.Warn("save the bootstrap region failed", zap.Error(err)) + log.Warn("save the bootstrap region failed", errs.ZapError(err)) } err = s.storage.Flush() if err != nil { - log.Warn("flush the bootstrap region failed", zap.Error(err)) + log.Warn("flush the bootstrap region failed", errs.ZapError(err)) } if err := s.cluster.Start(s); err != nil { @@ -1030,7 +1030,7 @@ func (s *Server) SetReplicationModeConfig(cfg config.ReplicationModeConfig) erro if cluster != nil { err := cluster.GetReplicationMode().UpdateConfig(cfg) if err != nil { - log.Warn("failed to update replication mode", zap.Error(err)) + log.Warn("failed to update replication mode", errs.ZapError(err)) // revert to old config // NOTE: since we can't put the 2 storage mutations in a batch, it // is possible that memory and persistent data become different @@ -1207,7 +1207,7 @@ func (s *Server) ReplicateFileToAllMembers(ctx context.Context, name string, dat for _, member := range resp.Members { clientUrls := member.GetClientUrls() if len(clientUrls) == 0 { - log.Warn("failed to replicate file", zap.String("name", name), zap.String("member", member.GetName()), zap.Error(err)) + log.Warn("failed to replicate file", zap.String("name", name), zap.String("member", member.GetName()), errs.ZapError(err)) return errors.Errorf("failed to replicate to member %s: clientUrls is empty", member.GetName()) } url := clientUrls[0] + filepath.Join("/pd/api/v1/admin/persist-file", name) @@ -1215,7 +1215,7 @@ func (s *Server) ReplicateFileToAllMembers(ctx context.Context, name string, dat req.Header.Set("PD-Allow-follower-handle", "true") res, err := s.httpClient.Do(req) if err != nil { - log.Warn("failed to replicate file", zap.String("name", name), zap.String("member", member.GetName()), zap.Error(err)) + log.Warn("failed to replicate file", zap.String("name", name), zap.String("member", member.GetName()), errs.ZapError(err)) return errors.Errorf("failed to replicate to member %s", member.GetName()) } if res.StatusCode != http.StatusOK { diff --git a/server/tso/global_allocator.go b/server/tso/global_allocator.go index bd83a81744f..98da0a70d6d 100644 --- a/server/tso/global_allocator.go +++ b/server/tso/global_allocator.go @@ -104,7 +104,7 @@ func (gta *GlobalTSOAllocator) GenerateTSO(count uint32) (pdpb.Timestamp, error) time.Sleep(200 * time.Millisecond) continue } - log.Error("invalid timestamp", zap.Any("timestamp", current), zap.Error(errs.ErrInvalidTimestamp.FastGenByArgs())) + log.Error("invalid timestamp", zap.Any("timestamp", current), errs.ZapError(errs.ErrInvalidTimestamp)) return pdpb.Timestamp{}, errors.New("can not get timestamp, may be not leader") } @@ -113,7 +113,7 @@ func (gta *GlobalTSOAllocator) GenerateTSO(count uint32) (pdpb.Timestamp, error) if resp.Logical >= maxLogical { log.Error("logical part outside of max logical interval, please check ntp time", zap.Reflect("response", resp), - zap.Int("retry-count", i), zap.Error(errs.ErrLogicOverflow.FastGenByArgs())) + zap.Int("retry-count", i), errs.ZapError(errs.ErrLogicOverflow)) tsoCounter.WithLabelValues("logical_overflow").Inc() time.Sleep(UpdateTimestampStep) continue diff --git a/server/tso/tso.go b/server/tso/tso.go index 9c92e517e4b..d7979ca708b 100644 --- a/server/tso/tso.go +++ b/server/tso/tso.go @@ -116,7 +116,7 @@ func (t *timestampOracle) SyncTimestamp(leadership *election.Leadership) error { // If the current system time minus the saved etcd timestamp is less than `updateTimestampGuard`, // the timestamp allocation will start from the saved etcd timestamp temporarily. if typeutil.SubTimeByWallClock(next, last) < updateTimestampGuard { - log.Error("system time may be incorrect", zap.Time("last", last), zap.Time("next", next), zap.Error(errs.ErrIncorrectSystemTime.FastGenByArgs())) + log.Error("system time may be incorrect", zap.Time("last", last), zap.Time("next", next), errs.ZapError(errs.ErrIncorrectSystemTime)) next = last.Add(updateTimestampGuard) } From 6e08a84f228400afe877217ab7dce3a5a04723b2 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 1 Sep 2020 11:36:26 +0800 Subject: [PATCH 5/7] address comments Signed-off-by: Ryan Leung --- pkg/errs/errno.go | 4 ++-- server/member/member.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 37ba50c1fc7..2a7087e0926 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -25,7 +25,7 @@ var ( // member errors var ( - ErretcdLeaderNotFound = errors.Normalize("etcd leader not found", errors.RFCCodeText("PD:member:ErretcdLeaderNotFound")) + ErrEtcdLeaderNotFound = errors.Normalize("etcd leader not found", errors.RFCCodeText("PD:member:ErrEtcdLeaderNotFound")) ErrMarshalLeader = errors.Normalize("marshal leader failed", errors.RFCCodeText("PD:member:ErrMarshalLeader")) ) @@ -88,7 +88,7 @@ var ( // etcd errors var ( - ErrEtcdTxn = errors.Normalize("etcd Txn failed, %s", errors.RFCCodeText("PD:etcd:ErrEtcdKVSave")) + ErrEtcdTxn = errors.Normalize("etcd Txn failed, %s", errors.RFCCodeText("PD:etcd:ErrEtcdTxn")) ErrEtcdKVPut = errors.Normalize("etcd KV put failed", errors.RFCCodeText("PD:etcd:ErrEtcdKVPut")) ErrEtcdKVDelete = errors.Normalize("etcd KV delete failed", errors.RFCCodeText("PD:etcd:ErrEtcdKVDelete")) ErrEtcdKVGet = errors.Normalize("etcd KV get failed", errors.RFCCodeText("PD:etcd:ErrEtcdKVGet")) diff --git a/server/member/member.go b/server/member/member.go index 8a51fcac2ee..9c7ee42ab2a 100644 --- a/server/member/member.go +++ b/server/member/member.go @@ -157,7 +157,7 @@ func (m *Member) IsStillLeader() bool { // CheckLeader checks returns true if it is needed to check later. func (m *Member) CheckLeader(name string) (*pdpb.Member, int64, bool) { if m.GetEtcdLeader() == 0 { - err := errs.ErretcdLeaderNotFound.FastGenByArgs() + err := errs.ErrEtcdLeaderNotFound.FastGenByArgs() log.Error("no etcd leader, check pd leader later", errs.ZapError(err)) time.Sleep(200 * time.Millisecond) return nil, 0, true From b85e4ac4ea6b28b28742751906f744b8135e2c8b Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 1 Sep 2020 14:00:42 +0800 Subject: [PATCH 6/7] address comments Signed-off-by: Ryan Leung --- pkg/errs/errno.go | 8 +++++++- pkg/etcdutil/etcdutil.go | 8 +++++--- server/election/leadership.go | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 2a7087e0926..08a7263faf6 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -86,12 +86,18 @@ var ( ErrCloseGRPCConn = errors.Normalize("close gRPC connection failed", errors.RFCCodeText("PD:grpc:ErrCloseGRPCConn")) ) +// proto errors +var ( + ErrProtoUnmarshal = errors.Normalize("failed to unmarshal proto", errors.RFCCodeText("PD:proto:ErrProtoUnmarshal")) +) + // etcd errors var ( - ErrEtcdTxn = errors.Normalize("etcd Txn failed, %s", errors.RFCCodeText("PD:etcd:ErrEtcdTxn")) + ErrEtcdTxn = errors.Normalize("etcd Txn failed", errors.RFCCodeText("PD:etcd:ErrEtcdTxn")) ErrEtcdKVPut = errors.Normalize("etcd KV put failed", errors.RFCCodeText("PD:etcd:ErrEtcdKVPut")) ErrEtcdKVDelete = errors.Normalize("etcd KV delete failed", errors.RFCCodeText("PD:etcd:ErrEtcdKVDelete")) ErrEtcdKVGet = errors.Normalize("etcd KV get failed", errors.RFCCodeText("PD:etcd:ErrEtcdKVGet")) + ErrEtcdKVGetResponse = errors.Normalize("etcd invalid get value response %v, must only one", errors.RFCCodeText("PD:etcd:ErrEtcdKVGetResponse")) ErrEtcdGetCluster = errors.Normalize("etcd get cluster from remote peer failed", errors.RFCCodeText("PD:etcd:ErrEtcdGetCluster")) ErrEtcdMoveLeader = errors.Normalize("etcd move leader error", errors.RFCCodeText("PD:etcd:ErrEtcdMoveLeader")) ErrEtcdTLSConfig = errors.Normalize("etcd TLS config error", errors.RFCCodeText("PD:etcd:ErrEtcdTLSConfig")) diff --git a/pkg/etcdutil/etcdutil.go b/pkg/etcdutil/etcdutil.go index a15ef99afca..7cca3cb1dd6 100644 --- a/pkg/etcdutil/etcdutil.go +++ b/pkg/etcdutil/etcdutil.go @@ -110,7 +110,9 @@ func EtcdKVGet(c *clientv3.Client, key string, opts ...clientv3.OpOption) (*clie } if err != nil { - return resp, errs.ErrEtcdKVGet.Wrap(err).GenWithStackByCause() + e := errs.ErrEtcdKVGet.Wrap(err).GenWithStackByCause() + log.Error("load from etcd meet error", zap.String("key", key), errs.ZapError(e)) + return resp, e } return resp, nil } @@ -136,7 +138,7 @@ func get(c *clientv3.Client, key string, opts ...clientv3.OpOption) (*clientv3.G if n := len(resp.Kvs); n == 0 { return nil, nil } else if n > 1 { - return nil, errors.Errorf("invalid get value resp %v, must only one", resp.Kvs) + return nil, errs.ErrEtcdKVGetResponse.FastGenByArgs(resp.Kvs) } return resp, nil } @@ -152,7 +154,7 @@ func GetProtoMsgWithModRev(c *clientv3.Client, key string, msg proto.Message, op } value := resp.Kvs[0].Value if err = proto.Unmarshal(value, msg); err != nil { - return false, 0, errors.WithStack(err) + return false, 0, errs.ErrProtoUnmarshal.Wrap(err).GenWithStackByCause() } return true, resp.Kvs[0].ModRevision, nil } diff --git a/server/election/leadership.go b/server/election/leadership.go index a4c5a553dfc..13cd33b8a0a 100644 --- a/server/election/leadership.go +++ b/server/election/leadership.go @@ -34,7 +34,7 @@ func GetLeader(c *clientv3.Client, leaderPath string) (*pdpb.Member, int64, erro leader := &pdpb.Member{} ok, rev, err := etcdutil.GetProtoMsgWithModRev(c, leaderPath, leader) if err != nil { - return nil, 0, errs.ErrEtcdGetProtoMsgWithModRev.Wrap(err).FastGenWithCause() + return nil, 0, err } if !ok { return nil, 0, nil @@ -141,7 +141,7 @@ func (ls *Leadership) DeleteLeader() error { return errs.ErrEtcdKVDelete.Wrap(err).GenWithStackByCause() } if !resp.Succeeded { - return errs.ErrEtcdTxn.FastGenByArgs("not leader already") + return errs.ErrEtcdTxn.FastGenByArgs() } return nil From 65c448cd4dd75ae6a660c9a8ec571208b8bbd992 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 1 Sep 2020 14:06:00 +0800 Subject: [PATCH 7/7] address comments Signed-off-by: Ryan Leung --- server/member/member.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/member/member.go b/server/member/member.go index 9c7ee42ab2a..0c591531a91 100644 --- a/server/member/member.go +++ b/server/member/member.go @@ -157,8 +157,7 @@ func (m *Member) IsStillLeader() bool { // CheckLeader checks returns true if it is needed to check later. func (m *Member) CheckLeader(name string) (*pdpb.Member, int64, bool) { if m.GetEtcdLeader() == 0 { - err := errs.ErrEtcdLeaderNotFound.FastGenByArgs() - log.Error("no etcd leader, check pd leader later", errs.ZapError(err)) + log.Error("no etcd leader, check pd leader later", errs.ZapError(errs.ErrEtcdLeaderNotFound)) time.Sleep(200 * time.Millisecond) return nil, 0, true }