From ebaa37f46acffea53e5facf7a035aff17bc37b03 Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Wed, 20 Apr 2022 13:14:45 +0800 Subject: [PATCH 01/18] initial implementation of new GC API Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- go.mod | 3 + server/grpc_service.go | 154 +++++++++++++++++++++++ server/server.go | 4 +- server/storage/endpoint/gc_safe_point.go | 133 ++++++++++++++++++++ server/storage/endpoint/key_path.go | 25 ++++ 5 files changed, 318 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 8b86d17439a..96c1d856436 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,9 @@ module github.com/tikv/pd go 1.16 +// TODO: Remove this once kvproto has been updated +replace github.com/pingcap/kvproto => github.com/ystaticy/kvproto v0.0.0-20220419035825-6bb5c11da23d + require ( github.com/AlekSi/gocov-xml v1.0.0 github.com/BurntSushi/toml v0.3.1 diff --git a/server/grpc_service.go b/server/grpc_service.go index 3d43023a66f..2adc84322c3 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -1445,6 +1445,160 @@ func (s *GrpcServer) UpdateServiceGCSafePoint(ctx context.Context, request *pdpb }, nil } +// GetAllServiceGroupGcSafePoint used by RawKV +// returns GCSafePoint for all service groups as well as min GCSafePoint +func (s *GrpcServer) GetAllServiceGroupGcSafePoint(ctx context.Context, request *pdpb.GetAllServiceGroupGcSafePointRequest) (*pdpb.GetAllServiceGroupGcSafePointResponse, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + return pdpb.NewPDClient(client).GetAllServiceGroupGcSafePoint(ctx, request) + } + if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { + return nil, err + } else if rsp != nil { + return rsp.(*pdpb.GetAllServiceGroupGcSafePointResponse), err + } + + rc := s.GetRaftCluster() + if rc == nil { + return &pdpb.GetAllServiceGroupGcSafePointResponse{Header: s.notBootstrappedHeader()}, nil + } + + var storage endpoint.GCSafePointStorage = s.storage + safePoints, err := storage.LoadAllServiceGroupGCSafePoints() + + if err != nil { + return nil, err + } + + return &pdpb.GetAllServiceGroupGcSafePointResponse{ + Header: s.header(), + ServiceGroupSafePoint: safePoints, + }, nil +} + +// UpdateGCSafePointByServiceGroup used by gc_worker to update their gc safe points +func (s *GrpcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request *pdpb.UpdateGCSafePointByServiceGroupRequest) (*pdpb.UpdateGCSafePointByServiceGroupResponse, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + return pdpb.NewPDClient(client).UpdateGCSafePointByServiceGroup(ctx, request) + } + if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { + return nil, err + } else if rsp != nil { + return rsp.(*pdpb.UpdateGCSafePointByServiceGroupResponse), err + } + + rc := s.GetRaftCluster() + if rc == nil { + return &pdpb.UpdateGCSafePointByServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil + } + + var storage endpoint.GCSafePointStorage = s.storage + serviceGroupID := string(request.ServiceGroupId) + oldSafePoint, err := storage.LoadGCWorkerSafePoint(serviceGroupID) + if err != nil { + return nil, err + } + newSafePoint := &endpoint.GCSafePoint{ + ServiceGroupID: serviceGroupID, + SafePoint: request.SafePoint, + } + + // Only save the safe point if it's greater than the previous one + if newSafePoint.SafePoint > oldSafePoint.SafePoint { + if err := storage.SaveGCWorkerSafePoint(serviceGroupID, newSafePoint); err != nil { + return nil, err + } + log.Info("updated gc_worker safe point", + zap.String("service-group-id", serviceGroupID), + zap.Uint64("safe-point", newSafePoint.SafePoint)) + } else if newSafePoint.SafePoint < oldSafePoint.SafePoint { + log.Warn("trying to update gc_worker safe point", + zap.String("service-group-id", serviceGroupID), + zap.Uint64("old-safe-point", oldSafePoint.SafePoint), + zap.Uint64("new-safe-point", newSafePoint.SafePoint)) + newSafePoint = oldSafePoint + } + return &pdpb.UpdateGCSafePointByServiceGroupResponse{ + Header: s.header(), + ServiceGroupId: request.ServiceGroupId, + NewSafePoint: newSafePoint.SafePoint, + }, nil +} + +// UpdateServiceSafePointByServiceGroup for services like CDC/BR/Lightning to update gc safe points in PD +func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, request *pdpb.UpdateServiceSafePointByServiceGroupRequest) (*pdpb.UpdateServiceSafePointByServiceGroupResponse, error) { + s.serviceGroupSafePointLock.Lock() + defer s.serviceGroupSafePointLock.Unlock() + + fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + return pdpb.NewPDClient(client).UpdateServiceSafePointByServiceGroup(ctx, request) + } + if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { + return nil, err + } else if rsp != nil { + return rsp.(*pdpb.UpdateServiceSafePointByServiceGroupResponse), err + } + + rc := s.GetRaftCluster() + if rc == nil { + return &pdpb.UpdateServiceSafePointByServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil + } + + var storage endpoint.GCSafePointStorage = s.storage + serviceGroupID := string(request.ServiceGroupId) + serviceID := string(request.ServiceId) + // a less than 0 ttl means to remove the safe point + if request.TTL <= 0 { + if err := storage.RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID); err != nil { + return nil, err + } + } + + nowTSO, err := s.tsoAllocatorManager.HandleTSORequest(tso.GlobalDCLocation, 1) + if err != nil { + return nil, err + } + now, _ := tsoutil.ParseTimestamp(nowTSO) + min, err := storage.LoadMinServiceSafePointByServiceGroup(serviceGroupID, now) + if err != nil { + return nil, err + } + + // min safe point of all services need + if request.TTL > 0 && (min == nil || request.SafePoint >= min.SafePoint) { + ssp := &endpoint.ServiceSafePoint{ + ServiceID: serviceID, + ExpiredAt: now.Unix() + request.TTL, + SafePoint: request.SafePoint, + } + // handles overflow + if math.MaxInt64-now.Unix() <= request.TTL { + ssp.ExpiredAt = math.MaxInt64 + } + if err := storage.SaveServiceSafePointByServiceGroup(serviceGroupID, ssp); err != nil { + return nil, err + } + log.Info("update service safe point by service group", + zap.String("service-group-id", serviceGroupID), + zap.String("service-id", ssp.ServiceID), + zap.Int64("expire-at", ssp.ExpiredAt), + zap.Uint64("safepoint", ssp.SafePoint)) + // If the updated service is the original min, or if originally it's empty, look for min again + // note that this guarantees that min is not nil + if min == nil || serviceID == min.ServiceID { + min, err = storage.LoadMinServiceSafePointByServiceGroup(serviceGroupID, now) + if err != nil { + return nil, err + } + } + } + return &pdpb.UpdateServiceSafePointByServiceGroupResponse{ + Header: s.header(), + ServiceId: []byte(min.ServiceID), + TTL: min.ExpiredAt - now.Unix(), + MinSafePoint: min.SafePoint, + }, nil +} + // GetOperator gets information about the operator belonging to the specify region. func (s *GrpcServer) GetOperator(ctx context.Context, request *pdpb.GetOperatorRequest) (*pdpb.GetOperatorResponse, error) { fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { diff --git a/server/server.go b/server/server.go index 1af32fc7cac..5dd36a8c742 100644 --- a/server/server.go +++ b/server/server.go @@ -147,8 +147,10 @@ type Server struct { // serviceSafePointLock is a lock for UpdateServiceGCSafePoint serviceSafePointLock syncutil.Mutex + // Lock for UpdateServiceSafePointByServiceGroup + serviceGroupSafePointLock syncutil.Mutex - // hot region history info storeage + // hot region history info storage hotRegionStorage *storage.HotRegionStorage // Store as map[string]*grpc.ClientConn clientConns sync.Map diff --git a/server/storage/endpoint/gc_safe_point.go b/server/storage/endpoint/gc_safe_point.go index e213eca4ed5..89a9178a25b 100644 --- a/server/storage/endpoint/gc_safe_point.go +++ b/server/storage/endpoint/gc_safe_point.go @@ -18,9 +18,11 @@ import ( "encoding/json" "math" "strconv" + "strings" "time" "github.com/pingcap/errors" + "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" "go.etcd.io/etcd/clientv3" @@ -34,6 +36,12 @@ type ServiceSafePoint struct { SafePoint uint64 `json:"safe_point"` } +// GCSafePoint is gcWorker's safepoint for specific service group +type GCSafePoint struct { + ServiceGroupID string `json:"service_group_id"` + SafePoint uint64 `json:"safe_point"` +} + // GCSafePointStorage defines the storage operations on the GC safe point. type GCSafePointStorage interface { LoadGCSafePoint() (uint64, error) @@ -42,6 +50,13 @@ type GCSafePointStorage interface { LoadAllServiceGCSafePoints() ([]*ServiceSafePoint, error) SaveServiceGCSafePoint(ssp *ServiceSafePoint) error RemoveServiceGCSafePoint(serviceID string) error + + LoadGCWorkerSafePoint(serviceGroupID string) (*GCSafePoint, error) + SaveGCWorkerSafePoint(serviceGroupID string, gcSafePoint *GCSafePoint) error + LoadAllServiceGroupGCSafePoints() ([]*pdpb.ServiceGroupSafepoint, error) + RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID string) error + LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) + SaveServiceSafePointByServiceGroup(serviceGroupID string, ssp *ServiceSafePoint) error } var _ GCSafePointStorage = (*StorageEndpoint)(nil) @@ -186,3 +201,121 @@ func (se *StorageEndpoint) RemoveServiceGCSafePoint(serviceID string) error { key := gcSafePointServicePath(serviceID) return se.Remove(key) } + +// LoadGCWorkerSafePoint reads GCSafePoint for the given service group +func (se *StorageEndpoint) LoadGCWorkerSafePoint(serviceGroupID string) (*GCSafePoint, error) { + value, err := se.Load(gcWorkerSafePointPath(serviceGroupID)) + if err != nil || value == "" { + return nil, err + } + gcSafePoint := &GCSafePoint{} + if err := json.Unmarshal([]byte(value), gcSafePoint); err != nil { + return nil, err + } + + return gcSafePoint, nil +} + +// SaveGCWorkerSafePoint saves GCSafePoint under given service group +func (se *StorageEndpoint) SaveGCWorkerSafePoint(serviceGroupID string, gcSafePoint *GCSafePoint) error { + safePoint, err := json.Marshal(gcSafePoint) + if err != nil { + return err + } + return se.Save(gcWorkerSafePointPath(serviceGroupID), string(safePoint)) +} + +// LoadAllServiceGroupGCSafePoints returns a slice contains GCSafePoint for every service group +func (se *StorageEndpoint) LoadAllServiceGroupGCSafePoints() ([]*pdpb.ServiceGroupSafepoint, error) { + prefix := safePointPrefixPath() + prefixEnd := clientv3.GetPrefixRangeEnd(prefix) + keys, values, err := se.LoadRange(prefix, prefixEnd, 0) + if err != nil { + return nil, err + } + if len(keys) == 0 { + return []*pdpb.ServiceGroupSafepoint{}, nil + } + gcSafePoints := make([]*pdpb.ServiceGroupSafepoint, 0, 2) // there are probably only two service groups + for i := range keys { + // skip service safe point + if !strings.HasSuffix(keys[i], gcWorkerSafePointSuffix()) { + continue + } + + gcSafePoint := &GCSafePoint{} + if err := json.Unmarshal([]byte(values[i]), gcSafePoint); err != nil { + return nil, err + } + serviceGroupSafePoint := &pdpb.ServiceGroupSafepoint{ + ServiceGroupId: []byte(gcSafePoint.ServiceGroupID), + SafePoint: gcSafePoint.SafePoint, + } + gcSafePoints = append(gcSafePoints, serviceGroupSafePoint) + } + + return gcSafePoints, nil +} + +// RemoveServiceSafePointByServiceGroup removes a service safe point +func (se *StorageEndpoint) RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID string) error { + key := serviceSafePointPath(serviceGroupID, serviceID) + return se.Remove(key) +} + +// SaveServiceSafePointByServiceGroup saves service safe point under given service group +func (se *StorageEndpoint) SaveServiceSafePointByServiceGroup(serviceGroupID string, ssp *ServiceSafePoint) error { + if ssp.ServiceID == "" { + return errors.New("service id of service safepoint cannot be empty") + } + key := serviceSafePointPath(serviceGroupID, ssp.ServiceID) + value, err := json.Marshal(ssp) + if err != nil { + return err + } + + return se.Save(key, string(value)) +} + +// LoadMinServiceSafePointByServiceGroup returns the minimum safepoint for the given service group +// note that gc worker safe point are store separately +func (se *StorageEndpoint) LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) { + prefix := serviceSafePointPrefixPath(serviceGroupID) + prefixEnd := clientv3.GetPrefixRangeEnd(prefix) + keys, values, err := se.LoadRange(prefix, prefixEnd, 0) + if err != nil { + return nil, err + } + + if len(keys) == 0 { + // the given service group does not have a service safe point yet + return nil, nil + } + + min := &ServiceSafePoint{SafePoint: math.MaxInt64} + for i, key := range keys { + ssp := &ServiceSafePoint{} + if err := json.Unmarshal([]byte(values[i]), ssp); err != nil { + return nil, err + } + + // remove expired safe points + if ssp.ExpiredAt < now.Unix() { + se.Remove(key) + continue + } + + if ssp.SafePoint < min.SafePoint { + min = ssp + } + } + + if min.SafePoint == math.MaxUint64 { + // fail to find a valid service safe point under current service group + // this can be normal behavior if the only safe point just expired + return nil, nil + } + + // successfully found a valid min safe point + return min, nil +} diff --git a/server/storage/endpoint/key_path.go b/server/storage/endpoint/key_path.go index 1f5e05601cf..f3c75e11963 100644 --- a/server/storage/endpoint/key_path.go +++ b/server/storage/endpoint/key_path.go @@ -31,6 +31,7 @@ const ( customScheduleConfigPath = "scheduler_config" gcWorkerServiceSafePointID = "gc_worker" minResolvedTS = "min_resolved_ts" + gcServiceGroupPath = "gc_servicegroup" ) // AppendToRootPath appends the given key to the rootPath. @@ -103,3 +104,27 @@ func gcSafePointServicePath(serviceID string) string { func MinResolvedTSPath() string { return path.Join(clusterPath, minResolvedTS) } + +// gcWorkerSafePointPath returns the path of the gc_worker's safe point +// /gc_servicegroup/$service_group_id/safe_point +func gcWorkerSafePointPath(serviceGroupID string) string { + return path.Join(gcServiceGroupPath, serviceGroupID, "safe_point") +} + +// serviceSafePointPath returns the path of services' safe point +// /gc_servicegroup/$service_group_id/service/$service_id +func serviceSafePointPath(serviceGroupID, serviceID string) string { + return path.Join(gcServiceGroupPath, serviceGroupID, "service", serviceID) +} + +func safePointPrefixPath() string { + return path.Join(gcServiceGroupPath) + "/" +} + +func serviceSafePointPrefixPath(serviceGroupID string) string { + return path.Join(gcServiceGroupPath, serviceGroupID, "service") + "/" +} + +func gcWorkerSafePointSuffix() string { + return "/safe_point" +} From 36ba35041c1c33207d664360a6a133440bdb0290 Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Fri, 22 Apr 2022 11:32:29 +0800 Subject: [PATCH 02/18] rephrased comments Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- server/grpc_service.go | 5 +++-- server/storage/endpoint/gc_safe_point.go | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/server/grpc_service.go b/server/grpc_service.go index 2adc84322c3..ea8e0db0a11 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -1378,7 +1378,7 @@ func (s *GrpcServer) UpdateGCSafePoint(ctx context.Context, request *pdpb.Update }, nil } -// UpdateServiceGCSafePoint update the safepoint for specific service +// UpdateServiceGCSafePoint update the safe point for specific service func (s *GrpcServer) UpdateServiceGCSafePoint(ctx context.Context, request *pdpb.UpdateServiceGCSafePointRequest) (*pdpb.UpdateServiceGCSafePointResponse, error) { s.serviceSafePointLock.Lock() defer s.serviceSafePointLock.Unlock() @@ -1563,7 +1563,8 @@ func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, r return nil, err } - // min safe point of all services need + // ToDO: Should this requirement be stronger? Like say request.SafePoint > old.SafePoint + // ToDO: Add proper requirements for new service if request.TTL > 0 && (min == nil || request.SafePoint >= min.SafePoint) { ssp := &endpoint.ServiceSafePoint{ ServiceID: serviceID, diff --git a/server/storage/endpoint/gc_safe_point.go b/server/storage/endpoint/gc_safe_point.go index 89a9178a25b..10c8269090d 100644 --- a/server/storage/endpoint/gc_safe_point.go +++ b/server/storage/endpoint/gc_safe_point.go @@ -279,6 +279,7 @@ func (se *StorageEndpoint) SaveServiceSafePointByServiceGroup(serviceGroupID str // LoadMinServiceSafePointByServiceGroup returns the minimum safepoint for the given service group // note that gc worker safe point are store separately +// If no service safe point exist for the given service group or the only service safe point just expired, return nil func (se *StorageEndpoint) LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) { prefix := serviceSafePointPrefixPath(serviceGroupID) prefixEnd := clientv3.GetPrefixRangeEnd(prefix) @@ -311,8 +312,7 @@ func (se *StorageEndpoint) LoadMinServiceSafePointByServiceGroup(serviceGroupID } if min.SafePoint == math.MaxUint64 { - // fail to find a valid service safe point under current service group - // this can be normal behavior if the only safe point just expired + // the only service safe point just expired return nil, nil } From 9856c2269ac6859ea730beb8e218274fc93b92be Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Fri, 22 Apr 2022 12:26:47 +0800 Subject: [PATCH 03/18] updated client code Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- client/client.go | 89 ++++++++++++++++++++++++++++++++++++++++++ client/go.mod | 3 ++ client/go.sum | 1 + client/metrics.go | 62 ++++++++++++++++------------- server/grpc_service.go | 5 +++ 5 files changed, 132 insertions(+), 28 deletions(-) diff --git a/client/client.go b/client/client.go index 07598910af4..daa27f9350a 100644 --- a/client/client.go +++ b/client/client.go @@ -106,6 +106,17 @@ type Client interface { // determine the safepoint for multiple services, it does not trigger a GC // job. Use UpdateGCSafePoint to trigger the GC job if needed. UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) + // UpdateGCSafePointByServiceGroup update GC safe point, the update will only be successful if proposed + // safe point is later than the old one + // returns the new safePoint after the update attempt (may return the old safe point if update rejected) + UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64) (uint64, error) + // UpdateServiceSafePointByServiceGroup update service safe point for specific service under given service group + // pass in a ttl less than 0 to remove the target service safe point instead + // will return the min safePoint of the serviceGroup after the update, + // if no service safePoint exists after the given operation, return 0 + UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (uint64, error) + // GetAllServiceGroupGcSafePoint returns a list containing gc safe point for each service group + GetAllServiceGroupGcSafePoint(ctx context.Context) ([]*pdpb.ServiceGroupSafepoint, error) // ScatterRegion scatters the specified region. Should use it for a batch of regions, // and the distribution of these regions will be dispersed. // NOTICE: This method is the old version of ScatterRegions, you should use the later one as your first choice. @@ -1657,6 +1668,84 @@ func (c *client) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, return resp.GetMinSafePoint(), nil } +func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64) (uint64, error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.UpdateGCSafePointByServiceGroup", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + start := time.Now() + defer func() { cmdDurationUpdateGCSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() + + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + req := &pdpb.UpdateGCSafePointByServiceGroupRequest{ + Header: c.requestHeader(), + ServiceGroupId: []byte(serviceGroupID), + SafePoint: safePoint, + } + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + resp, err := c.getClient().UpdateGCSafePointByServiceGroup(ctx, req) + cancel() + + if err != nil { + cmdFailedDurationUpdateGCSafePointByServiceGroup.Observe(time.Since(start).Seconds()) + c.ScheduleCheckLeader() + return 0, errors.WithStack(err) + } + return resp.GetNewSafePoint(), nil +} +func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (uint64, error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.UpdateServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + start := time.Now() + defer func() { cmdDurationUpdateServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + req := &pdpb.UpdateServiceSafePointByServiceGroupRequest{ + Header: c.requestHeader(), + ServiceGroupId: []byte(serviceGroupID), + ServiceId: []byte(serviceID), + TTL: ttl, + SafePoint: safePoint, + } + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + resp, err := c.getClient().UpdateServiceSafePointByServiceGroup(ctx, req) + cancel() + + if err != nil { + cmdFailedDurationUpdateServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) + c.ScheduleCheckLeader() + return 0, errors.WithStack(err) + } + + return resp.GetMinSafePoint(), nil +} + +func (c *client) GetAllServiceGroupGcSafePoint(ctx context.Context) ([]*pdpb.ServiceGroupSafepoint, error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.GetAllServiceGroupGcSafePoint", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + + start := time.Now() + defer func() { cmdDurationGetAllServiceGroupGcSafePoint.Observe(time.Since(start).Seconds()) }() + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + req := &pdpb.GetAllServiceGroupGcSafePointRequest{ + Header: c.requestHeader(), + } + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + resp, err := c.getClient().GetAllServiceGroupGcSafePoint(ctx, req) + cancel() + + if err != nil { + cmdFailedDurationGetAllServiceGroupGcSafePoint.Observe(time.Since(start).Seconds()) + c.ScheduleCheckLeader() + return nil, errors.WithStack(err) + } + + return resp.GetServiceGroupSafePoint(), nil +} + func (c *client) ScatterRegion(ctx context.Context, regionID uint64) error { if span := opentracing.SpanFromContext(ctx); span != nil { span = opentracing.StartSpan("pdclient.ScatterRegion", opentracing.ChildOf(span.Context())) diff --git a/client/go.mod b/client/go.mod index 22ef56aa417..2da9042fce5 100644 --- a/client/go.mod +++ b/client/go.mod @@ -2,6 +2,9 @@ module github.com/tikv/pd/client go 1.16 +// TODO: Remove this once kvproto has been updated +replace github.com/pingcap/kvproto => github.com/ystaticy/kvproto v0.0.0-20220419035825-6bb5c11da23d + require ( github.com/opentracing/opentracing-go v1.2.0 github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 diff --git a/client/go.sum b/client/go.sum index becfbccfe12..47d8acefd2b 100644 --- a/client/go.sum +++ b/client/go.sum @@ -150,6 +150,7 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/ystaticy/kvproto v0.0.0-20220419035825-6bb5c11da23d/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= diff --git a/client/metrics.go b/client/metrics.go index a085f095406..c92567fb0ed 100644 --- a/client/metrics.go +++ b/client/metrics.go @@ -82,35 +82,41 @@ var ( var ( // WithLabelValues is a heavy operation, define variable to avoid call it every time. - cmdDurationWait = cmdDuration.WithLabelValues("wait") - cmdDurationTSO = cmdDuration.WithLabelValues("tso") - cmdDurationTSOAsyncWait = cmdDuration.WithLabelValues("tso_async_wait") - cmdDurationGetRegion = cmdDuration.WithLabelValues("get_region") - cmdDurationGetAllMembers = cmdDuration.WithLabelValues("get_member_info") - cmdDurationGetPrevRegion = cmdDuration.WithLabelValues("get_prev_region") - cmdDurationGetRegionByID = cmdDuration.WithLabelValues("get_region_byid") - cmdDurationScanRegions = cmdDuration.WithLabelValues("scan_regions") - cmdDurationGetStore = cmdDuration.WithLabelValues("get_store") - cmdDurationGetAllStores = cmdDuration.WithLabelValues("get_all_stores") - cmdDurationUpdateGCSafePoint = cmdDuration.WithLabelValues("update_gc_safe_point") - cmdDurationUpdateServiceGCSafePoint = cmdDuration.WithLabelValues("update_service_gc_safe_point") - cmdDurationScatterRegion = cmdDuration.WithLabelValues("scatter_region") - cmdDurationScatterRegions = cmdDuration.WithLabelValues("scatter_regions") - cmdDurationGetOperator = cmdDuration.WithLabelValues("get_operator") - cmdDurationSplitRegions = cmdDuration.WithLabelValues("split_regions") - cmdDurationSplitAndScatterRegions = cmdDuration.WithLabelValues("split_and_scatter_regions") + cmdDurationWait = cmdDuration.WithLabelValues("wait") + cmdDurationTSO = cmdDuration.WithLabelValues("tso") + cmdDurationTSOAsyncWait = cmdDuration.WithLabelValues("tso_async_wait") + cmdDurationGetRegion = cmdDuration.WithLabelValues("get_region") + cmdDurationGetAllMembers = cmdDuration.WithLabelValues("get_member_info") + cmdDurationGetPrevRegion = cmdDuration.WithLabelValues("get_prev_region") + cmdDurationGetRegionByID = cmdDuration.WithLabelValues("get_region_byid") + cmdDurationScanRegions = cmdDuration.WithLabelValues("scan_regions") + cmdDurationGetStore = cmdDuration.WithLabelValues("get_store") + cmdDurationGetAllStores = cmdDuration.WithLabelValues("get_all_stores") + cmdDurationUpdateGCSafePoint = cmdDuration.WithLabelValues("update_gc_safe_point") + cmdDurationUpdateServiceGCSafePoint = cmdDuration.WithLabelValues("update_service_gc_safe_point") + cmdDurationScatterRegion = cmdDuration.WithLabelValues("scatter_region") + cmdDurationScatterRegions = cmdDuration.WithLabelValues("scatter_regions") + cmdDurationGetOperator = cmdDuration.WithLabelValues("get_operator") + cmdDurationSplitRegions = cmdDuration.WithLabelValues("split_regions") + cmdDurationSplitAndScatterRegions = cmdDuration.WithLabelValues("split_and_scatter_regions") + cmdDurationUpdateGCSafePointByServiceGroup = cmdDuration.WithLabelValues("update_gc_safe_point_by_service_group") + cmdDurationUpdateServiceSafePointByServiceGroup = cmdDuration.WithLabelValues("update_service_safe_point_by_service_group") + cmdDurationGetAllServiceGroupGcSafePoint = cmdDuration.WithLabelValues("get_all_service_group_gc_safe_point") - cmdFailDurationGetRegion = cmdFailedDuration.WithLabelValues("get_region") - cmdFailDurationTSO = cmdFailedDuration.WithLabelValues("tso") - cmdFailDurationGetAllMembers = cmdFailedDuration.WithLabelValues("get_member_info") - cmdFailDurationGetPrevRegion = cmdFailedDuration.WithLabelValues("get_prev_region") - cmdFailedDurationGetRegionByID = cmdFailedDuration.WithLabelValues("get_region_byid") - cmdFailedDurationScanRegions = cmdFailedDuration.WithLabelValues("scan_regions") - cmdFailedDurationGetStore = cmdFailedDuration.WithLabelValues("get_store") - cmdFailedDurationGetAllStores = cmdFailedDuration.WithLabelValues("get_all_stores") - cmdFailedDurationUpdateGCSafePoint = cmdFailedDuration.WithLabelValues("update_gc_safe_point") - cmdFailedDurationUpdateServiceGCSafePoint = cmdFailedDuration.WithLabelValues("update_service_gc_safe_point") - requestDurationTSO = requestDuration.WithLabelValues("tso") + cmdFailDurationGetRegion = cmdFailedDuration.WithLabelValues("get_region") + cmdFailDurationTSO = cmdFailedDuration.WithLabelValues("tso") + cmdFailDurationGetAllMembers = cmdFailedDuration.WithLabelValues("get_member_info") + cmdFailDurationGetPrevRegion = cmdFailedDuration.WithLabelValues("get_prev_region") + cmdFailedDurationGetRegionByID = cmdFailedDuration.WithLabelValues("get_region_byid") + cmdFailedDurationScanRegions = cmdFailedDuration.WithLabelValues("scan_regions") + cmdFailedDurationGetStore = cmdFailedDuration.WithLabelValues("get_store") + cmdFailedDurationGetAllStores = cmdFailedDuration.WithLabelValues("get_all_stores") + cmdFailedDurationUpdateGCSafePoint = cmdFailedDuration.WithLabelValues("update_gc_safe_point") + cmdFailedDurationUpdateServiceGCSafePoint = cmdFailedDuration.WithLabelValues("update_service_gc_safe_point") + cmdFailedDurationUpdateGCSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("update_gc_safe_point_by_service_group") + cmdFailedDurationUpdateServiceSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("update_service_safe_point_by_service_group") + cmdFailedDurationGetAllServiceGroupGcSafePoint = cmdFailedDuration.WithLabelValues("get_all_service_group_gc_safe_point") + requestDurationTSO = requestDuration.WithLabelValues("tso") ) func init() { diff --git a/server/grpc_service.go b/server/grpc_service.go index ea8e0db0a11..259abaa65ce 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -1592,6 +1592,11 @@ func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, r } } } + + // the case where we just deleted the last service safe point + if min == nil { + min = &endpoint.ServiceSafePoint{} + } return &pdpb.UpdateServiceSafePointByServiceGroupResponse{ Header: s.header(), ServiceId: []byte(min.ServiceID), From bdd1f0e653616d2aa4ce76e2902505921725b6e8 Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Wed, 27 Apr 2022 15:25:03 +0800 Subject: [PATCH 04/18] server side code Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- client/go.mod | 2 +- client/go.sum | 1 + go.mod | 2 +- go.sum | 1 + server/grpc_service.go | 191 +++++++++++++++++------ server/storage/endpoint/gc_safe_point.go | 168 +++++++++++--------- server/storage/endpoint/key_path.go | 22 ++- 7 files changed, 255 insertions(+), 132 deletions(-) diff --git a/client/go.mod b/client/go.mod index 2da9042fce5..a7a7a22150b 100644 --- a/client/go.mod +++ b/client/go.mod @@ -3,7 +3,7 @@ module github.com/tikv/pd/client go 1.16 // TODO: Remove this once kvproto has been updated -replace github.com/pingcap/kvproto => github.com/ystaticy/kvproto v0.0.0-20220419035825-6bb5c11da23d +replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22 require ( github.com/opentracing/opentracing-go v1.2.0 diff --git a/client/go.sum b/client/go.sum index 47d8acefd2b..8c119d38db2 100644 --- a/client/go.sum +++ b/client/go.sum @@ -1,5 +1,6 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= diff --git a/go.mod b/go.mod index 96c1d856436..740d9e6960a 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/tikv/pd go 1.16 // TODO: Remove this once kvproto has been updated -replace github.com/pingcap/kvproto => github.com/ystaticy/kvproto v0.0.0-20220419035825-6bb5c11da23d +replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22 require ( github.com/AlekSi/gocov-xml v1.0.0 diff --git a/go.sum b/go.sum index 06d674e395b..80ba50193fb 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,7 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/AlekSi/gocov-xml v1.0.0 h1:4QctJBgXEkbzeKz6PJy6bt3JSPNSN4I2mITYW+eKUoQ= github.com/AlekSi/gocov-xml v1.0.0/go.mod h1:J0qYeZ6tDg4oZubW9mAAgxlqw39PDfoEkzB3HXSbEuA= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220427044528-668e540bb708/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= diff --git a/server/grpc_service.go b/server/grpc_service.go index 259abaa65ce..8afd9f4a47e 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -1445,38 +1445,80 @@ func (s *GrpcServer) UpdateServiceGCSafePoint(ctx context.Context, request *pdpb }, nil } -// GetAllServiceGroupGcSafePoint used by RawKV -// returns GCSafePoint for all service groups as well as min GCSafePoint -func (s *GrpcServer) GetAllServiceGroupGcSafePoint(ctx context.Context, request *pdpb.GetAllServiceGroupGcSafePointRequest) (*pdpb.GetAllServiceGroupGcSafePointResponse, error) { +// GetServiceGroup return all service group ids +func (s *GrpcServer) GetServiceGroup(ctx context.Context, request *pdpb.GetServiceGroupRequest) (*pdpb.GetServiceGroupResponse, error) { fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { - return pdpb.NewPDClient(client).GetAllServiceGroupGcSafePoint(ctx, request) + return pdpb.NewPDClient(client).GetServiceGroup(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { return nil, err } else if rsp != nil { - return rsp.(*pdpb.GetAllServiceGroupGcSafePointResponse), err + return rsp.(*pdpb.GetServiceGroupResponse), err } rc := s.GetRaftCluster() if rc == nil { - return &pdpb.GetAllServiceGroupGcSafePointResponse{Header: s.notBootstrappedHeader()}, nil + return &pdpb.GetServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil } var storage endpoint.GCSafePointStorage = s.storage - safePoints, err := storage.LoadAllServiceGroupGCSafePoints() - + serviceGroupList, err := storage.LoadAllServiceGroup() if err != nil { return nil, err } - return &pdpb.GetAllServiceGroupGcSafePointResponse{ - Header: s.header(), - ServiceGroupSafePoint: safePoints, + return &pdpb.GetServiceGroupResponse{ + Header: s.header(), + ServiceGroupId: serviceGroupList, + }, nil +} + +// GetMinServiceSafePointByServiceGroup returns given service group's min service safe point +func (s *GrpcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, request *pdpb.GetMinServiceSafePointByServiceGroupRequest) (*pdpb.GetMinServiceSafePointByServiceGroupResponse, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + return pdpb.NewPDClient(client).GetMinServiceSafePointByServiceGroup(ctx, request) + } + if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { + return nil, err + } else if rsp != nil { + return rsp.(*pdpb.GetMinServiceSafePointByServiceGroupResponse), err + } + + rc := s.GetRaftCluster() + if rc == nil { + return &pdpb.GetMinServiceSafePointByServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil + } + + var storage endpoint.GCSafePointStorage = s.storage + serviceGroupID := string(request.ServiceGroupId) + nowTSO, err := s.tsoAllocatorManager.HandleTSORequest(tso.GlobalDCLocation, 1) + if err != nil { + return nil, err + } + now, _ := tsoutil.ParseTimestamp(nowTSO) + min, err := storage.LoadMinServiceSafePointByServiceGroup(serviceGroupID, now) + if err != nil { + return nil, err + } + var returnSafePoint uint64 + if min != nil { + returnSafePoint = min.SafePoint + } + // perform a get operation on a non-existing key to obtain current etcd revision number from response header + rsp, _ := s.client.Get(ctx, "NA") + currentRevision := rsp.Header.GetRevision() + return &pdpb.GetMinServiceSafePointByServiceGroupResponse{ + Header: s.header(), + SafePoint: returnSafePoint, + Revision: currentRevision, }, nil } // UpdateGCSafePointByServiceGroup used by gc_worker to update their gc safe points func (s *GrpcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request *pdpb.UpdateGCSafePointByServiceGroupRequest) (*pdpb.UpdateGCSafePointByServiceGroupResponse, error) { + s.updateSafePointByServiceGroupLock.Lock() + defer s.updateSafePointByServiceGroupLock.Unlock() + fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { return pdpb.NewPDClient(client).UpdateGCSafePointByServiceGroup(ctx, request) } @@ -1492,42 +1534,59 @@ func (s *GrpcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, reques } var storage endpoint.GCSafePointStorage = s.storage - serviceGroupID := string(request.ServiceGroupId) - oldSafePoint, err := storage.LoadGCWorkerSafePoint(serviceGroupID) - if err != nil { - return nil, err + + // check if revision changed since last min calculation + rsp, _ := s.client.Get(ctx, "NA") + currentRevision := rsp.Header.GetRevision() + if currentRevision != request.GetRevision() { + return &pdpb.UpdateGCSafePointByServiceGroupResponse{ + Header: s.header(), + NewSafePoint: 0, + ValidRevision: false, + }, nil } + serviceGroupID := string(request.ServiceGroupId) newSafePoint := &endpoint.GCSafePoint{ ServiceGroupID: serviceGroupID, SafePoint: request.SafePoint, } - // Only save the safe point if it's greater than the previous one - if newSafePoint.SafePoint > oldSafePoint.SafePoint { - if err := storage.SaveGCWorkerSafePoint(serviceGroupID, newSafePoint); err != nil { + prev, err := storage.LoadGCWorkerSafePoint(serviceGroupID) + if err != nil { + return nil, err + } + // if no previous safepoint, treat it as 0 + var oldSafePoint uint64 = 0 + if prev != nil { + oldSafePoint = prev.SafePoint + } + + // Only save the safe point if it's greater than the previous one, or if no previous one exist + if request.SafePoint > oldSafePoint { + if err := storage.SaveGCWorkerSafePoint(newSafePoint); err != nil { return nil, err } log.Info("updated gc_worker safe point", zap.String("service-group-id", serviceGroupID), zap.Uint64("safe-point", newSafePoint.SafePoint)) - } else if newSafePoint.SafePoint < oldSafePoint.SafePoint { + } else if newSafePoint.SafePoint < request.SafePoint { log.Warn("trying to update gc_worker safe point", zap.String("service-group-id", serviceGroupID), - zap.Uint64("old-safe-point", oldSafePoint.SafePoint), + zap.Uint64("old-safe-point", request.SafePoint), zap.Uint64("new-safe-point", newSafePoint.SafePoint)) - newSafePoint = oldSafePoint + newSafePoint.SafePoint = oldSafePoint } return &pdpb.UpdateGCSafePointByServiceGroupResponse{ - Header: s.header(), - ServiceGroupId: request.ServiceGroupId, - NewSafePoint: newSafePoint.SafePoint, + Header: s.header(), + NewSafePoint: newSafePoint.SafePoint, + ValidRevision: true, }, nil } // UpdateServiceSafePointByServiceGroup for services like CDC/BR/Lightning to update gc safe points in PD func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, request *pdpb.UpdateServiceSafePointByServiceGroupRequest) (*pdpb.UpdateServiceSafePointByServiceGroupResponse, error) { - s.serviceGroupSafePointLock.Lock() - defer s.serviceGroupSafePointLock.Unlock() + s.updateSafePointByServiceGroupLock.Lock() + defer s.updateSafePointByServiceGroupLock.Unlock() fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { return pdpb.NewPDClient(client).UpdateServiceSafePointByServiceGroup(ctx, request) @@ -1546,11 +1605,14 @@ func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, r var storage endpoint.GCSafePointStorage = s.storage serviceGroupID := string(request.ServiceGroupId) serviceID := string(request.ServiceId) - // a less than 0 ttl means to remove the safe point + // a less than 0 ttl means to remove the safe point, immediately return after the deletion request if request.TTL <= 0 { if err := storage.RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID); err != nil { return nil, err } + return &pdpb.UpdateServiceSafePointByServiceGroupResponse{ + Header: s.header(), + }, nil } nowTSO, err := s.tsoAllocatorManager.HandleTSORequest(tso.GlobalDCLocation, 1) @@ -1558,14 +1620,31 @@ func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, r return nil, err } now, _ := tsoutil.ParseTimestamp(nowTSO) - min, err := storage.LoadMinServiceSafePointByServiceGroup(serviceGroupID, now) + + sspOld, err := storage.LoadServiceSafePoint(serviceGroupID, serviceID) if err != nil { return nil, err } + gcsp, err := storage.LoadGCWorkerSafePoint(serviceGroupID) + if err != nil { + return nil, err + } + var oldServiceSafePoint, gcSafePoint, newServiceSafePoint uint64 = 0, 0, 0 + if sspOld != nil { + oldServiceSafePoint = sspOld.SafePoint + newServiceSafePoint = oldServiceSafePoint // case where update denied + } + if gcsp != nil { + gcSafePoint = gcsp.SafePoint + } + + // case where there is an old safepoint for the given service, we have to check that + // new safepoint >= old safepoint + caseUpdate := oldServiceSafePoint != 0 && request.SafePoint >= oldServiceSafePoint + // Or if no old safepoint and new safepoint >= gc safepoint + caseInit := oldServiceSafePoint == 0 && request.SafePoint >= gcSafePoint - // ToDO: Should this requirement be stronger? Like say request.SafePoint > old.SafePoint - // ToDO: Add proper requirements for new service - if request.TTL > 0 && (min == nil || request.SafePoint >= min.SafePoint) { + if caseUpdate || caseInit { ssp := &endpoint.ServiceSafePoint{ ServiceID: serviceID, ExpiredAt: now.Unix() + request.TTL, @@ -1578,30 +1657,48 @@ func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, r if err := storage.SaveServiceSafePointByServiceGroup(serviceGroupID, ssp); err != nil { return nil, err } + newServiceSafePoint = request.SafePoint // case where update performed log.Info("update service safe point by service group", zap.String("service-group-id", serviceGroupID), zap.String("service-id", ssp.ServiceID), zap.Int64("expire-at", ssp.ExpiredAt), zap.Uint64("safepoint", ssp.SafePoint)) - // If the updated service is the original min, or if originally it's empty, look for min again - // note that this guarantees that min is not nil - if min == nil || serviceID == min.ServiceID { - min, err = storage.LoadMinServiceSafePointByServiceGroup(serviceGroupID, now) - if err != nil { - return nil, err - } - } } - // the case where we just deleted the last service safe point - if min == nil { - min = &endpoint.ServiceSafePoint{} - } return &pdpb.UpdateServiceSafePointByServiceGroupResponse{ - Header: s.header(), - ServiceId: []byte(min.ServiceID), - TTL: min.ExpiredAt - now.Unix(), - MinSafePoint: min.SafePoint, + Header: s.header(), + GcSafePoint: gcSafePoint, + OldServiceSafePoint: oldServiceSafePoint, + NewServiceSafePoint: newServiceSafePoint, + }, nil +} + +// GetAllServiceGroupGCSafePoint returns all service group's gc safe point +func (s *GrpcServer) GetAllServiceGroupGCSafePoint(ctx context.Context, request *pdpb.GetAllServiceGroupGCSafePointRequest) (*pdpb.GetAllServiceGroupGCSafePointResponse, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + return pdpb.NewPDClient(client).GetAllServiceGroupGCSafePoint(ctx, request) + } + if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { + return nil, err + } else if rsp != nil { + return rsp.(*pdpb.GetAllServiceGroupGCSafePointResponse), err + } + + rc := s.GetRaftCluster() + if rc == nil { + return &pdpb.GetAllServiceGroupGCSafePointResponse{Header: s.notBootstrappedHeader()}, nil + } + + var storage endpoint.GCSafePointStorage = s.storage + safePoints, err := storage.LoadAllServiceGroupGCSafePoints() + + if err != nil { + return nil, err + } + + return &pdpb.GetAllServiceGroupGCSafePointResponse{ + Header: s.header(), + ServiceGroupSafePoint: safePoints, }, nil } diff --git a/server/storage/endpoint/gc_safe_point.go b/server/storage/endpoint/gc_safe_point.go index 10c8269090d..0a7d1903d8d 100644 --- a/server/storage/endpoint/gc_safe_point.go +++ b/server/storage/endpoint/gc_safe_point.go @@ -17,8 +17,8 @@ package endpoint import ( "encoding/json" "math" + "path" "strconv" - "strings" "time" "github.com/pingcap/errors" @@ -51,12 +51,14 @@ type GCSafePointStorage interface { SaveServiceGCSafePoint(ssp *ServiceSafePoint) error RemoveServiceGCSafePoint(serviceID string) error + LoadAllServiceGroup() ([][]byte, error) + LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) LoadGCWorkerSafePoint(serviceGroupID string) (*GCSafePoint, error) - SaveGCWorkerSafePoint(serviceGroupID string, gcSafePoint *GCSafePoint) error - LoadAllServiceGroupGCSafePoints() ([]*pdpb.ServiceGroupSafepoint, error) + SaveGCWorkerSafePoint(gcSafePoint *GCSafePoint) error RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID string) error - LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) + LoadServiceSafePoint(serviceGroupID, serviceID string) (*ServiceSafePoint, error) SaveServiceSafePointByServiceGroup(serviceGroupID string, ssp *ServiceSafePoint) error + LoadAllServiceGroupGCSafePoints() ([]*pdpb.ServiceGroupSafePoint, error) } var _ GCSafePointStorage = (*StorageEndpoint)(nil) @@ -193,7 +195,7 @@ func (se *StorageEndpoint) SaveServiceGCSafePoint(ssp *ServiceSafePoint) error { return se.Save(key, string(value)) } -// RemoveServiceGCSafePoint removes a GC safepoint for the service +// RemoveServiceGCSafePoint removes a GC safeoint for the service func (se *StorageEndpoint) RemoveServiceGCSafePoint(serviceID string) error { if serviceID == gcWorkerServiceSafePointID { return errors.New("cannot remove service safe point of gc_worker") @@ -202,9 +204,70 @@ func (se *StorageEndpoint) RemoveServiceGCSafePoint(serviceID string) error { return se.Remove(key) } +// LoadMinServiceSafePointByServiceGroup returns the minimum safepoint for the given service group +// note that gc worker safe point are store separately +// If no service safe point exist for the given service group or the only service safe point just expired, return nil +func (se *StorageEndpoint) LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) { + prefix := serviceSafePointPrefixPath(serviceGroupID) + prefixEnd := clientv3.GetPrefixRangeEnd(prefix) + keys, values, err := se.LoadRange(prefix, prefixEnd, 0) + if err != nil { + return nil, err + } + + if len(keys) == 0 { + // the given service group does not have a service safe point yet + return nil, nil + } + + min := &ServiceSafePoint{SafePoint: math.MaxInt64} + for i, key := range keys { + ssp := &ServiceSafePoint{} + if err := json.Unmarshal([]byte(values[i]), ssp); err != nil { + return nil, err + } + + // remove expired safe points + if ssp.ExpiredAt < now.Unix() { + se.Remove(key) + continue + } + + if ssp.SafePoint < min.SafePoint { + min = ssp + } + } + + if min.SafePoint == math.MaxUint64 { + // the only service safe point just expired + return nil, nil + } + + // successfully found a valid min safe point + return min, nil +} + +// LoadAllServiceGroup returns a list of all service group IDs +func (se *StorageEndpoint) LoadAllServiceGroup() ([][]byte, error) { + prefix := gcSafePointPrefixPath() + prefixEnd := clientv3.GetPrefixRangeEnd(prefix) + keys, _, err := se.LoadRange(prefix, prefixEnd, 0) + if err != nil { + return nil, err + } + + serviceGroupIDs := make([][]byte, 0, 2) + for _, key := range keys { + _, serviceGroupID := path.Split(key) + serviceGroupIDs = append(serviceGroupIDs, []byte(serviceGroupID)) + } + return serviceGroupIDs, nil +} + // LoadGCWorkerSafePoint reads GCSafePoint for the given service group +// return nil if no safepoint not exist func (se *StorageEndpoint) LoadGCWorkerSafePoint(serviceGroupID string) (*GCSafePoint, error) { - value, err := se.Load(gcWorkerSafePointPath(serviceGroupID)) + value, err := se.Load(gcSafePointPathByServiceGroup(serviceGroupID)) if err != nil || value == "" { return nil, err } @@ -212,49 +275,16 @@ func (se *StorageEndpoint) LoadGCWorkerSafePoint(serviceGroupID string) (*GCSafe if err := json.Unmarshal([]byte(value), gcSafePoint); err != nil { return nil, err } - return gcSafePoint, nil } // SaveGCWorkerSafePoint saves GCSafePoint under given service group -func (se *StorageEndpoint) SaveGCWorkerSafePoint(serviceGroupID string, gcSafePoint *GCSafePoint) error { +func (se *StorageEndpoint) SaveGCWorkerSafePoint(gcSafePoint *GCSafePoint) error { safePoint, err := json.Marshal(gcSafePoint) if err != nil { return err } - return se.Save(gcWorkerSafePointPath(serviceGroupID), string(safePoint)) -} - -// LoadAllServiceGroupGCSafePoints returns a slice contains GCSafePoint for every service group -func (se *StorageEndpoint) LoadAllServiceGroupGCSafePoints() ([]*pdpb.ServiceGroupSafepoint, error) { - prefix := safePointPrefixPath() - prefixEnd := clientv3.GetPrefixRangeEnd(prefix) - keys, values, err := se.LoadRange(prefix, prefixEnd, 0) - if err != nil { - return nil, err - } - if len(keys) == 0 { - return []*pdpb.ServiceGroupSafepoint{}, nil - } - gcSafePoints := make([]*pdpb.ServiceGroupSafepoint, 0, 2) // there are probably only two service groups - for i := range keys { - // skip service safe point - if !strings.HasSuffix(keys[i], gcWorkerSafePointSuffix()) { - continue - } - - gcSafePoint := &GCSafePoint{} - if err := json.Unmarshal([]byte(values[i]), gcSafePoint); err != nil { - return nil, err - } - serviceGroupSafePoint := &pdpb.ServiceGroupSafepoint{ - ServiceGroupId: []byte(gcSafePoint.ServiceGroupID), - SafePoint: gcSafePoint.SafePoint, - } - gcSafePoints = append(gcSafePoints, serviceGroupSafePoint) - } - - return gcSafePoints, nil + return se.Save(gcSafePointPathByServiceGroup(gcSafePoint.ServiceGroupID), string(safePoint)) } // RemoveServiceSafePointByServiceGroup removes a service safe point @@ -263,6 +293,20 @@ func (se *StorageEndpoint) RemoveServiceSafePointByServiceGroup(serviceGroupID, return se.Remove(key) } +// LoadServiceSafePoint reads ServiceSafePoint for the given service group and service name +// return nil if no safepoint not exist +func (se *StorageEndpoint) LoadServiceSafePoint(serviceGroupID, serviceID string) (*ServiceSafePoint, error) { + value, err := se.Load(serviceSafePointPath(serviceGroupID, serviceID)) + if err != nil || value == "" { + return nil, err + } + serviceSafePoint := &ServiceSafePoint{} + if err := json.Unmarshal([]byte(value), serviceSafePoint); err != nil { + return nil, err + } + return serviceSafePoint, nil +} + // SaveServiceSafePointByServiceGroup saves service safe point under given service group func (se *StorageEndpoint) SaveServiceSafePointByServiceGroup(serviceGroupID string, ssp *ServiceSafePoint) error { if ssp.ServiceID == "" { @@ -277,45 +321,29 @@ func (se *StorageEndpoint) SaveServiceSafePointByServiceGroup(serviceGroupID str return se.Save(key, string(value)) } -// LoadMinServiceSafePointByServiceGroup returns the minimum safepoint for the given service group -// note that gc worker safe point are store separately -// If no service safe point exist for the given service group or the only service safe point just expired, return nil -func (se *StorageEndpoint) LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) { - prefix := serviceSafePointPrefixPath(serviceGroupID) +// LoadAllServiceGroupGCSafePoints returns a slice contains GCSafePoint for every service group +func (se *StorageEndpoint) LoadAllServiceGroupGCSafePoints() ([]*pdpb.ServiceGroupSafePoint, error) { + prefix := gcSafePointPrefixPath() prefixEnd := clientv3.GetPrefixRangeEnd(prefix) keys, values, err := se.LoadRange(prefix, prefixEnd, 0) if err != nil { return nil, err } - if len(keys) == 0 { - // the given service group does not have a service safe point yet - return nil, nil + return []*pdpb.ServiceGroupSafePoint{}, nil } - - min := &ServiceSafePoint{SafePoint: math.MaxInt64} - for i, key := range keys { - ssp := &ServiceSafePoint{} - if err := json.Unmarshal([]byte(values[i]), ssp); err != nil { + gcSafePoints := make([]*pdpb.ServiceGroupSafePoint, 0, 2) // there are probably only two service groups + for i := range keys { + gcSafePoint := &GCSafePoint{} + if err := json.Unmarshal([]byte(values[i]), gcSafePoint); err != nil { return nil, err } - - // remove expired safe points - if ssp.ExpiredAt < now.Unix() { - se.Remove(key) - continue - } - - if ssp.SafePoint < min.SafePoint { - min = ssp + serviceGroupSafePoint := &pdpb.ServiceGroupSafePoint{ + ServiceGroupId: []byte(gcSafePoint.ServiceGroupID), + SafePoint: gcSafePoint.SafePoint, } + gcSafePoints = append(gcSafePoints, serviceGroupSafePoint) } - if min.SafePoint == math.MaxUint64 { - // the only service safe point just expired - return nil, nil - } - - // successfully found a valid min safe point - return min, nil + return gcSafePoints, nil } diff --git a/server/storage/endpoint/key_path.go b/server/storage/endpoint/key_path.go index f3c75e11963..e4bbb1e551d 100644 --- a/server/storage/endpoint/key_path.go +++ b/server/storage/endpoint/key_path.go @@ -105,26 +105,22 @@ func MinResolvedTSPath() string { return path.Join(clusterPath, minResolvedTS) } -// gcWorkerSafePointPath returns the path of the gc_worker's safe point -// /gc_servicegroup/$service_group_id/safe_point -func gcWorkerSafePointPath(serviceGroupID string) string { - return path.Join(gcServiceGroupPath, serviceGroupID, "safe_point") +// gcSafePointPathByServiceGroup returns the path of the gc_worker's safe point +// /gc_servicegroup/gc_safepoint/$service_group_id +func gcSafePointPathByServiceGroup(serviceGroupID string) string { + return path.Join(gcServiceGroupPath, "gc_safepoint", serviceGroupID) } // serviceSafePointPath returns the path of services' safe point -// /gc_servicegroup/$service_group_id/service/$service_id +// /gc_servicegroup/service_safepoint/$service_group_id/$service_id func serviceSafePointPath(serviceGroupID, serviceID string) string { - return path.Join(gcServiceGroupPath, serviceGroupID, "service", serviceID) -} - -func safePointPrefixPath() string { - return path.Join(gcServiceGroupPath) + "/" + return path.Join(gcServiceGroupPath, "service_safepoint", serviceGroupID, serviceID) } func serviceSafePointPrefixPath(serviceGroupID string) string { - return path.Join(gcServiceGroupPath, serviceGroupID, "service") + "/" + return path.Join(gcServiceGroupPath, "service_safepoint", serviceGroupID) + "/" } -func gcWorkerSafePointSuffix() string { - return "/safe_point" +func gcSafePointPrefixPath() string { + return path.Join(gcServiceGroupPath, "gc_safepoint") + "/" } From 2c3c1a1bd702cd180a8c20fb80bcbe8f23b1ee62 Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Wed, 27 Apr 2022 16:56:59 +0800 Subject: [PATCH 05/18] client side code Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- client/client.go | 102 ++++++++++++++++++++++++++++++++++------------ client/metrics.go | 8 +++- 2 files changed, 81 insertions(+), 29 deletions(-) diff --git a/client/client.go b/client/client.go index daa27f9350a..886fbd3ed25 100644 --- a/client/client.go +++ b/client/client.go @@ -106,17 +106,14 @@ type Client interface { // determine the safepoint for multiple services, it does not trigger a GC // job. Use UpdateGCSafePoint to trigger the GC job if needed. UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) - // UpdateGCSafePointByServiceGroup update GC safe point, the update will only be successful if proposed - // safe point is later than the old one - // returns the new safePoint after the update attempt (may return the old safe point if update rejected) - UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64) (uint64, error) - // UpdateServiceSafePointByServiceGroup update service safe point for specific service under given service group - // pass in a ttl less than 0 to remove the target service safe point instead - // will return the min safePoint of the serviceGroup after the update, - // if no service safePoint exists after the given operation, return 0 - UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (uint64, error) - // GetAllServiceGroupGcSafePoint returns a list containing gc safe point for each service group - GetAllServiceGroupGcSafePoint(ctx context.Context) ([]*pdpb.ServiceGroupSafepoint, error) + + // GC API V2 + GetServiceGroup(ctx context.Context) ([]string, error) + GetMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) + UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (isSuccessful bool, newSafePoint uint64, validRevision bool, err error) + UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (isSuccessful bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) + GetAllServiceGroupGCSafePoint(ctx context.Context) ([]*pdpb.ServiceGroupSafePoint, error) + // ScatterRegion scatters the specified region. Should use it for a batch of regions, // and the distribution of these regions will be dispersed. // NOTICE: This method is the old version of ScatterRegions, you should use the later one as your first choice. @@ -1668,19 +1665,71 @@ func (c *client) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, return resp.GetMinSafePoint(), nil } -func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64) (uint64, error) { +func (c *client) GetServiceGroup(ctx context.Context) ([]string, error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.GetServiceGroup", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + start := time.Now() + defer func() { cmdDurationGetServiceGroup.Observe(time.Since(start).Seconds()) }() + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + req := &pdpb.GetServiceGroupRequest{ + Header: c.requestHeader(), + } + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + resp, err := c.getClient().GetServiceGroup(ctx, req) + cancel() + + if err != nil { + cmdFailedDurationGetServiceGroup.Observe(time.Since(start).Seconds()) + c.ScheduleCheckLeader() + return nil, errors.WithStack(err) + } + + // have to return a slice of string + returnSlice := make([]string, len(resp.ServiceGroupId)) + for _, serviceGroupID := range resp.ServiceGroupId { + returnSlice = append(returnSlice, string(serviceGroupID)) + } + return returnSlice, nil +} +func (c *client) GetMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.GetMinServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + start := time.Now() + defer func() { cmdDurationGetMinServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + req := &pdpb.GetMinServiceSafePointByServiceGroupRequest{ + Header: c.requestHeader(), + ServiceGroupId: []byte(serviceGroupID), + } + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + resp, err := c.getClient().GetMinServiceSafePointByServiceGroup(ctx, req) + cancel() + + if err != nil { + cmdFailedDurationGetMinServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) + c.ScheduleCheckLeader() + return 0, 0, errors.WithStack(err) + } + + return resp.SafePoint, resp.Revision, nil +} +func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (isSuccessful bool, newSafePoint uint64, validRevision bool, err error) { if span := opentracing.SpanFromContext(ctx); span != nil { span = opentracing.StartSpan("pdclient.UpdateGCSafePointByServiceGroup", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() defer func() { cmdDurationUpdateGCSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() - ctx, cancel := context.WithTimeout(ctx, c.option.timeout) req := &pdpb.UpdateGCSafePointByServiceGroupRequest{ Header: c.requestHeader(), ServiceGroupId: []byte(serviceGroupID), SafePoint: safePoint, + Revision: revision, } ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) resp, err := c.getClient().UpdateGCSafePointByServiceGroup(ctx, req) @@ -1689,11 +1738,12 @@ func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGro if err != nil { cmdFailedDurationUpdateGCSafePointByServiceGroup.Observe(time.Since(start).Seconds()) c.ScheduleCheckLeader() - return 0, errors.WithStack(err) + return false, 0, false, errors.WithStack(err) } - return resp.GetNewSafePoint(), nil + // if requested safepoint is the new safepoint, then update succeeded + return safePoint == resp.NewSafePoint, resp.NewSafePoint, resp.ValidRevision, nil } -func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (uint64, error) { +func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (isSuccessful bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) { if span := opentracing.SpanFromContext(ctx); span != nil { span = opentracing.StartSpan("pdclient.UpdateServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) defer span.Finish() @@ -1715,35 +1765,33 @@ func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, servi if err != nil { cmdFailedDurationUpdateServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) c.ScheduleCheckLeader() - return 0, errors.WithStack(err) + return false, 0, 0, 0, errors.WithStack(err) } - return resp.GetMinSafePoint(), nil + return resp.NewServiceSafePoint == safePoint, resp.GcSafePoint, resp.OldServiceSafePoint, resp.NewServiceSafePoint, nil } - -func (c *client) GetAllServiceGroupGcSafePoint(ctx context.Context) ([]*pdpb.ServiceGroupSafepoint, error) { +func (c *client) GetAllServiceGroupGCSafePoint(ctx context.Context) ([]*pdpb.ServiceGroupSafePoint, error) { if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetAllServiceGroupGcSafePoint", opentracing.ChildOf(span.Context())) + span = opentracing.StartSpan("pdclient.GetAllServiceGroupGCSafePoint", opentracing.ChildOf(span.Context())) defer span.Finish() } - start := time.Now() - defer func() { cmdDurationGetAllServiceGroupGcSafePoint.Observe(time.Since(start).Seconds()) }() + defer func() { cmdDurationGetAllServiceGroupGCSafePoint.Observe(time.Since(start).Seconds()) }() ctx, cancel := context.WithTimeout(ctx, c.option.timeout) - req := &pdpb.GetAllServiceGroupGcSafePointRequest{ + req := &pdpb.GetAllServiceGroupGCSafePointRequest{ Header: c.requestHeader(), } ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - resp, err := c.getClient().GetAllServiceGroupGcSafePoint(ctx, req) + resp, err := c.getClient().GetAllServiceGroupGCSafePoint(ctx, req) cancel() if err != nil { - cmdFailedDurationGetAllServiceGroupGcSafePoint.Observe(time.Since(start).Seconds()) + cmdFailedDurationGetAllServiceGroupGCSafePoint.Observe(time.Since(start).Seconds()) c.ScheduleCheckLeader() return nil, errors.WithStack(err) } - return resp.GetServiceGroupSafePoint(), nil + return resp.ServiceGroupSafePoint, nil } func (c *client) ScatterRegion(ctx context.Context, regionID uint64) error { diff --git a/client/metrics.go b/client/metrics.go index c92567fb0ed..0f950421102 100644 --- a/client/metrics.go +++ b/client/metrics.go @@ -99,9 +99,11 @@ var ( cmdDurationGetOperator = cmdDuration.WithLabelValues("get_operator") cmdDurationSplitRegions = cmdDuration.WithLabelValues("split_regions") cmdDurationSplitAndScatterRegions = cmdDuration.WithLabelValues("split_and_scatter_regions") + cmdDurationGetServiceGroup = cmdDuration.WithLabelValues("get_service_group") + cmdDurationGetMinServiceSafePointByServiceGroup = cmdDuration.WithLabelValues("get_min_service_safe_point_by_service_group") cmdDurationUpdateGCSafePointByServiceGroup = cmdDuration.WithLabelValues("update_gc_safe_point_by_service_group") cmdDurationUpdateServiceSafePointByServiceGroup = cmdDuration.WithLabelValues("update_service_safe_point_by_service_group") - cmdDurationGetAllServiceGroupGcSafePoint = cmdDuration.WithLabelValues("get_all_service_group_gc_safe_point") + cmdDurationGetAllServiceGroupGCSafePoint = cmdDuration.WithLabelValues("get_all_service_group_gc_safe_point") cmdFailDurationGetRegion = cmdFailedDuration.WithLabelValues("get_region") cmdFailDurationTSO = cmdFailedDuration.WithLabelValues("tso") @@ -113,9 +115,11 @@ var ( cmdFailedDurationGetAllStores = cmdFailedDuration.WithLabelValues("get_all_stores") cmdFailedDurationUpdateGCSafePoint = cmdFailedDuration.WithLabelValues("update_gc_safe_point") cmdFailedDurationUpdateServiceGCSafePoint = cmdFailedDuration.WithLabelValues("update_service_gc_safe_point") + cmdFailedDurationGetServiceGroup = cmdFailedDuration.WithLabelValues("get_service_group") + cmdFailedDurationGetMinServiceSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("get_min_service_safe_point_by_service_group") cmdFailedDurationUpdateGCSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("update_gc_safe_point_by_service_group") cmdFailedDurationUpdateServiceSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("update_service_safe_point_by_service_group") - cmdFailedDurationGetAllServiceGroupGcSafePoint = cmdFailedDuration.WithLabelValues("get_all_service_group_gc_safe_point") + cmdFailedDurationGetAllServiceGroupGCSafePoint = cmdFailedDuration.WithLabelValues("get_all_service_group_gc_safe_point") requestDurationTSO = requestDuration.WithLabelValues("tso") ) From 0094a1e9341ec7c70cc718be93a8b6a6a2094faf Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Fri, 29 Apr 2022 14:29:56 +0800 Subject: [PATCH 06/18] update reference Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- client/go.mod | 4 ++-- client/go.sum | 2 ++ go.mod | 4 ++-- go.sum | 4 ++++ 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/client/go.mod b/client/go.mod index a7a7a22150b..d0fbcc2dbe0 100644 --- a/client/go.mod +++ b/client/go.mod @@ -3,14 +3,14 @@ module github.com/tikv/pd/client go 1.16 // TODO: Remove this once kvproto has been updated -replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22 +replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7 require ( github.com/opentracing/opentracing-go v1.2.0 github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748 + github.com/pingcap/kvproto v0.0.0-20220425052816-e33ae9239820 github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee github.com/prometheus/client_golang v1.11.0 go.uber.org/goleak v1.1.11 diff --git a/client/go.sum b/client/go.sum index 8c119d38db2..0a74f8597d9 100644 --- a/client/go.sum +++ b/client/go.sum @@ -1,6 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7 h1:lbd1F9oWMbFRDXpzOehO90j69L4s4JCtEzjbe+NLdC4= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= diff --git a/go.mod b/go.mod index 740d9e6960a..2c8a57e55e4 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/tikv/pd go 1.16 // TODO: Remove this once kvproto has been updated -replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22 +replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7 require ( github.com/AlekSi/gocov-xml v1.0.0 @@ -33,7 +33,7 @@ require ( github.com/pingcap/errcode v0.3.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce - github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748 + github.com/pingcap/kvproto v0.0.0-20220425052816-e33ae9239820 github.com/pingcap/log v0.0.0-20210906054005-afc726e70354 github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d github.com/pingcap/tidb-dashboard v0.0.0-20220331105802-5ac69661755c diff --git a/go.sum b/go.sum index 80ba50193fb..f5237c93fee 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,10 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT github.com/AlekSi/gocov-xml v1.0.0 h1:4QctJBgXEkbzeKz6PJy6bt3JSPNSN4I2mITYW+eKUoQ= github.com/AlekSi/gocov-xml v1.0.0/go.mod h1:J0qYeZ6tDg4oZubW9mAAgxlqw39PDfoEkzB3HXSbEuA= github.com/AmoebaProtozoa/kvproto v0.0.0-20220427044528-668e540bb708/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22 h1:f32Y8ZPmZ0bpGe+FEXRu1XpHb/veNvEX3XsrH0ytMkU= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7 h1:lbd1F9oWMbFRDXpzOehO90j69L4s4JCtEzjbe+NLdC4= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= From fd070493d70e9c0b260c4fdcb6860644ad6fa9d3 Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Fri, 29 Apr 2022 15:01:08 +0800 Subject: [PATCH 07/18] updated client and server according to new proto Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- client/client.go | 55 ++++++++++++++---------- client/metrics.go | 8 ++-- server/grpc_service.go | 55 +++++++++++++----------- server/storage/endpoint/gc_safe_point.go | 6 +-- 4 files changed, 69 insertions(+), 55 deletions(-) diff --git a/client/client.go b/client/client.go index 886fbd3ed25..e0964528a1f 100644 --- a/client/client.go +++ b/client/client.go @@ -107,12 +107,21 @@ type Client interface { // job. Use UpdateGCSafePoint to trigger the GC job if needed. UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) - // GC API V2 - GetServiceGroup(ctx context.Context) ([]string, error) + // GetAllServiceGroups returns a list containing all service groups that has safe point in pd + GetAllServiceGroups(ctx context.Context) ([]string, error) + // GetMinServiceSafePointByServiceGroup return the minimum of all service safe point of the given group + // it also returns the current revision of the pd storage, with in which the min is valid + // if none is found, it will return 0 as min GetMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) - UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (isSuccessful bool, newSafePoint uint64, validRevision bool, err error) - UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (isSuccessful bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) - GetAllServiceGroupGCSafePoint(ctx context.Context) ([]*pdpb.ServiceGroupSafePoint, error) + // UpdateGCSafePointByServiceGroup update the target safe point, along with revision obtained previously + // if failed, caller should retry form GetMinServiceSafePointByServiceGroup + UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (succeeded bool, newSafePoint uint64, err error) + // UpdateServiceSafePointByServiceGroup update the given service's safe point + // pass in a negative ttl to remove it + // if failed, caller should retry with higher safe point + UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (succeeded bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) + // GetAllServiceGroupGCSafePoints returns GC safe point for all service groups + GetAllServiceGroupGCSafePoints(ctx context.Context) ([]*pdpb.ServiceGroupSafePoint, error) // ScatterRegion scatters the specified region. Should use it for a batch of regions, // and the distribution of these regions will be dispersed. @@ -1665,23 +1674,23 @@ func (c *client) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, return resp.GetMinSafePoint(), nil } -func (c *client) GetServiceGroup(ctx context.Context) ([]string, error) { +func (c *client) GetAllServiceGroups(ctx context.Context) ([]string, error) { if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetServiceGroup", opentracing.ChildOf(span.Context())) + span = opentracing.StartSpan("pdclient.GetAllServiceGroups", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() - defer func() { cmdDurationGetServiceGroup.Observe(time.Since(start).Seconds()) }() + defer func() { cmdDurationGetAllServiceGroups.Observe(time.Since(start).Seconds()) }() ctx, cancel := context.WithTimeout(ctx, c.option.timeout) - req := &pdpb.GetServiceGroupRequest{ + req := &pdpb.GetAllServiceGroupsRequest{ Header: c.requestHeader(), } ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - resp, err := c.getClient().GetServiceGroup(ctx, req) + resp, err := c.getClient().GetAllServiceGroups(ctx, req) cancel() if err != nil { - cmdFailedDurationGetServiceGroup.Observe(time.Since(start).Seconds()) + cmdFailedDurationGetAllServiceGroups.Observe(time.Since(start).Seconds()) c.ScheduleCheckLeader() return nil, errors.WithStack(err) } @@ -1717,7 +1726,7 @@ func (c *client) GetMinServiceSafePointByServiceGroup(ctx context.Context, servi return resp.SafePoint, resp.Revision, nil } -func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (isSuccessful bool, newSafePoint uint64, validRevision bool, err error) { +func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (succeeded bool, newSafePoint uint64, err error) { if span := opentracing.SpanFromContext(ctx); span != nil { span = opentracing.StartSpan("pdclient.UpdateGCSafePointByServiceGroup", opentracing.ChildOf(span.Context())) defer span.Finish() @@ -1738,12 +1747,12 @@ func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGro if err != nil { cmdFailedDurationUpdateGCSafePointByServiceGroup.Observe(time.Since(start).Seconds()) c.ScheduleCheckLeader() - return false, 0, false, errors.WithStack(err) + return false, 0, errors.WithStack(err) } // if requested safepoint is the new safepoint, then update succeeded - return safePoint == resp.NewSafePoint, resp.NewSafePoint, resp.ValidRevision, nil + return resp.Succeeded, resp.NewSafePoint, nil } -func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (isSuccessful bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) { +func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (succeeded bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) { if span := opentracing.SpanFromContext(ctx); span != nil { span = opentracing.StartSpan("pdclient.UpdateServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) defer span.Finish() @@ -1768,30 +1777,30 @@ func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, servi return false, 0, 0, 0, errors.WithStack(err) } - return resp.NewServiceSafePoint == safePoint, resp.GcSafePoint, resp.OldServiceSafePoint, resp.NewServiceSafePoint, nil + return resp.Succeeded, resp.GcSafePoint, resp.OldSafePoint, resp.NewSafePoint, nil } -func (c *client) GetAllServiceGroupGCSafePoint(ctx context.Context) ([]*pdpb.ServiceGroupSafePoint, error) { +func (c *client) GetAllServiceGroupGCSafePoints(ctx context.Context) ([]*pdpb.ServiceGroupSafePoint, error) { if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetAllServiceGroupGCSafePoint", opentracing.ChildOf(span.Context())) + span = opentracing.StartSpan("pdclient.GetAllServiceGroupGCSafePoints", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() - defer func() { cmdDurationGetAllServiceGroupGCSafePoint.Observe(time.Since(start).Seconds()) }() + defer func() { cmdDurationGetAllServiceGroupGCSafePoints.Observe(time.Since(start).Seconds()) }() ctx, cancel := context.WithTimeout(ctx, c.option.timeout) - req := &pdpb.GetAllServiceGroupGCSafePointRequest{ + req := &pdpb.GetAllServiceGroupGCSafePointsRequest{ Header: c.requestHeader(), } ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - resp, err := c.getClient().GetAllServiceGroupGCSafePoint(ctx, req) + resp, err := c.getClient().GetAllServiceGroupGCSafePoints(ctx, req) cancel() if err != nil { - cmdFailedDurationGetAllServiceGroupGCSafePoint.Observe(time.Since(start).Seconds()) + cmdFailedDurationGetAllServiceGroupGCSafePoints.Observe(time.Since(start).Seconds()) c.ScheduleCheckLeader() return nil, errors.WithStack(err) } - return resp.ServiceGroupSafePoint, nil + return resp.SafePoints, nil } func (c *client) ScatterRegion(ctx context.Context, regionID uint64) error { diff --git a/client/metrics.go b/client/metrics.go index 0f950421102..90ed6f0402e 100644 --- a/client/metrics.go +++ b/client/metrics.go @@ -99,11 +99,11 @@ var ( cmdDurationGetOperator = cmdDuration.WithLabelValues("get_operator") cmdDurationSplitRegions = cmdDuration.WithLabelValues("split_regions") cmdDurationSplitAndScatterRegions = cmdDuration.WithLabelValues("split_and_scatter_regions") - cmdDurationGetServiceGroup = cmdDuration.WithLabelValues("get_service_group") + cmdDurationGetAllServiceGroups = cmdDuration.WithLabelValues("get_all_service_groups") cmdDurationGetMinServiceSafePointByServiceGroup = cmdDuration.WithLabelValues("get_min_service_safe_point_by_service_group") cmdDurationUpdateGCSafePointByServiceGroup = cmdDuration.WithLabelValues("update_gc_safe_point_by_service_group") cmdDurationUpdateServiceSafePointByServiceGroup = cmdDuration.WithLabelValues("update_service_safe_point_by_service_group") - cmdDurationGetAllServiceGroupGCSafePoint = cmdDuration.WithLabelValues("get_all_service_group_gc_safe_point") + cmdDurationGetAllServiceGroupGCSafePoints = cmdDuration.WithLabelValues("get_all_service_group_gc_safe_points") cmdFailDurationGetRegion = cmdFailedDuration.WithLabelValues("get_region") cmdFailDurationTSO = cmdFailedDuration.WithLabelValues("tso") @@ -115,11 +115,11 @@ var ( cmdFailedDurationGetAllStores = cmdFailedDuration.WithLabelValues("get_all_stores") cmdFailedDurationUpdateGCSafePoint = cmdFailedDuration.WithLabelValues("update_gc_safe_point") cmdFailedDurationUpdateServiceGCSafePoint = cmdFailedDuration.WithLabelValues("update_service_gc_safe_point") - cmdFailedDurationGetServiceGroup = cmdFailedDuration.WithLabelValues("get_service_group") + cmdFailedDurationGetAllServiceGroups = cmdFailedDuration.WithLabelValues("get_all_service_groups") cmdFailedDurationGetMinServiceSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("get_min_service_safe_point_by_service_group") cmdFailedDurationUpdateGCSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("update_gc_safe_point_by_service_group") cmdFailedDurationUpdateServiceSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("update_service_safe_point_by_service_group") - cmdFailedDurationGetAllServiceGroupGCSafePoint = cmdFailedDuration.WithLabelValues("get_all_service_group_gc_safe_point") + cmdFailedDurationGetAllServiceGroupGCSafePoints = cmdFailedDuration.WithLabelValues("get_all_service_group_gc_safe_points") requestDurationTSO = requestDuration.WithLabelValues("tso") ) diff --git a/server/grpc_service.go b/server/grpc_service.go index 8afd9f4a47e..89712835dd5 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -1446,28 +1446,28 @@ func (s *GrpcServer) UpdateServiceGCSafePoint(ctx context.Context, request *pdpb } // GetServiceGroup return all service group ids -func (s *GrpcServer) GetServiceGroup(ctx context.Context, request *pdpb.GetServiceGroupRequest) (*pdpb.GetServiceGroupResponse, error) { +func (s *GrpcServer) GetAllServiceGroups(ctx context.Context, request *pdpb.GetAllServiceGroupsRequest) (*pdpb.GetAllServiceGroupsResponse, error) { fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { - return pdpb.NewPDClient(client).GetServiceGroup(ctx, request) + return pdpb.NewPDClient(client).GetAllServiceGroups(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { return nil, err } else if rsp != nil { - return rsp.(*pdpb.GetServiceGroupResponse), err + return rsp.(*pdpb.GetAllServiceGroupsResponse), err } rc := s.GetRaftCluster() if rc == nil { - return &pdpb.GetServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil + return &pdpb.GetAllServiceGroupsResponse{Header: s.notBootstrappedHeader()}, nil } var storage endpoint.GCSafePointStorage = s.storage - serviceGroupList, err := storage.LoadAllServiceGroup() + serviceGroupList, err := storage.LoadAllServiceGroups() if err != nil { return nil, err } - return &pdpb.GetServiceGroupResponse{ + return &pdpb.GetAllServiceGroupsResponse{ Header: s.header(), ServiceGroupId: serviceGroupList, }, nil @@ -1540,9 +1540,9 @@ func (s *GrpcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, reques currentRevision := rsp.Header.GetRevision() if currentRevision != request.GetRevision() { return &pdpb.UpdateGCSafePointByServiceGroupResponse{ - Header: s.header(), - NewSafePoint: 0, - ValidRevision: false, + Header: s.header(), + Succeeded: false, + NewSafePoint: 0, }, nil } serviceGroupID := string(request.ServiceGroupId) @@ -1577,9 +1577,9 @@ func (s *GrpcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, reques newSafePoint.SafePoint = oldSafePoint } return &pdpb.UpdateGCSafePointByServiceGroupResponse{ - Header: s.header(), - NewSafePoint: newSafePoint.SafePoint, - ValidRevision: true, + Header: s.header(), + Succeeded: true, + NewSafePoint: newSafePoint.SafePoint, }, nil } @@ -1611,7 +1611,8 @@ func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, r return nil, err } return &pdpb.UpdateServiceSafePointByServiceGroupResponse{ - Header: s.header(), + Header: s.header(), + Succeeded: true, }, nil } @@ -1630,6 +1631,8 @@ func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, r return nil, err } var oldServiceSafePoint, gcSafePoint, newServiceSafePoint uint64 = 0, 0, 0 + succeeded := false + if sspOld != nil { oldServiceSafePoint = sspOld.SafePoint newServiceSafePoint = oldServiceSafePoint // case where update denied @@ -1645,6 +1648,7 @@ func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, r caseInit := oldServiceSafePoint == 0 && request.SafePoint >= gcSafePoint if caseUpdate || caseInit { + succeeded = true ssp := &endpoint.ServiceSafePoint{ ServiceID: serviceID, ExpiredAt: now.Unix() + request.TTL, @@ -1666,27 +1670,28 @@ func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, r } return &pdpb.UpdateServiceSafePointByServiceGroupResponse{ - Header: s.header(), - GcSafePoint: gcSafePoint, - OldServiceSafePoint: oldServiceSafePoint, - NewServiceSafePoint: newServiceSafePoint, + Header: s.header(), + Succeeded: succeeded, + GcSafePoint: gcSafePoint, + OldSafePoint: oldServiceSafePoint, + NewSafePoint: newServiceSafePoint, }, nil } -// GetAllServiceGroupGCSafePoint returns all service group's gc safe point -func (s *GrpcServer) GetAllServiceGroupGCSafePoint(ctx context.Context, request *pdpb.GetAllServiceGroupGCSafePointRequest) (*pdpb.GetAllServiceGroupGCSafePointResponse, error) { +// GetAllServiceGroupGCSafePoints returns all service group's gc safe point +func (s *GrpcServer) GetAllServiceGroupGCSafePoints(ctx context.Context, request *pdpb.GetAllServiceGroupGCSafePointsRequest) (*pdpb.GetAllServiceGroupGCSafePointsResponse, error) { fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { - return pdpb.NewPDClient(client).GetAllServiceGroupGCSafePoint(ctx, request) + return pdpb.NewPDClient(client).GetAllServiceGroupGCSafePoints(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { return nil, err } else if rsp != nil { - return rsp.(*pdpb.GetAllServiceGroupGCSafePointResponse), err + return rsp.(*pdpb.GetAllServiceGroupGCSafePointsResponse), err } rc := s.GetRaftCluster() if rc == nil { - return &pdpb.GetAllServiceGroupGCSafePointResponse{Header: s.notBootstrappedHeader()}, nil + return &pdpb.GetAllServiceGroupGCSafePointsResponse{Header: s.notBootstrappedHeader()}, nil } var storage endpoint.GCSafePointStorage = s.storage @@ -1696,9 +1701,9 @@ func (s *GrpcServer) GetAllServiceGroupGCSafePoint(ctx context.Context, request return nil, err } - return &pdpb.GetAllServiceGroupGCSafePointResponse{ - Header: s.header(), - ServiceGroupSafePoint: safePoints, + return &pdpb.GetAllServiceGroupGCSafePointsResponse{ + Header: s.header(), + SafePoints: safePoints, }, nil } diff --git a/server/storage/endpoint/gc_safe_point.go b/server/storage/endpoint/gc_safe_point.go index 0a7d1903d8d..f28154fa607 100644 --- a/server/storage/endpoint/gc_safe_point.go +++ b/server/storage/endpoint/gc_safe_point.go @@ -51,7 +51,7 @@ type GCSafePointStorage interface { SaveServiceGCSafePoint(ssp *ServiceSafePoint) error RemoveServiceGCSafePoint(serviceID string) error - LoadAllServiceGroup() ([][]byte, error) + LoadAllServiceGroups() ([][]byte, error) LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) LoadGCWorkerSafePoint(serviceGroupID string) (*GCSafePoint, error) SaveGCWorkerSafePoint(gcSafePoint *GCSafePoint) error @@ -247,8 +247,8 @@ func (se *StorageEndpoint) LoadMinServiceSafePointByServiceGroup(serviceGroupID return min, nil } -// LoadAllServiceGroup returns a list of all service group IDs -func (se *StorageEndpoint) LoadAllServiceGroup() ([][]byte, error) { +// LoadAllServiceGroups returns a list of all service group IDs +func (se *StorageEndpoint) LoadAllServiceGroups() ([][]byte, error) { prefix := gcSafePointPrefixPath() prefixEnd := clientv3.GetPrefixRangeEnd(prefix) keys, _, err := se.LoadRange(prefix, prefixEnd, 0) From 2db7af13128d2423d37de4b40150feaabd2a7a7e Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Thu, 5 May 2022 14:41:41 +0800 Subject: [PATCH 08/18] changed lock name Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- server/grpc_service.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/grpc_service.go b/server/grpc_service.go index 89712835dd5..65172755a18 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -1516,8 +1516,8 @@ func (s *GrpcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, r // UpdateGCSafePointByServiceGroup used by gc_worker to update their gc safe points func (s *GrpcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request *pdpb.UpdateGCSafePointByServiceGroupRequest) (*pdpb.UpdateGCSafePointByServiceGroupResponse, error) { - s.updateSafePointByServiceGroupLock.Lock() - defer s.updateSafePointByServiceGroupLock.Unlock() + s.serviceGroupSafePointLock.Lock() + defer s.serviceGroupSafePointLock.Unlock() fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { return pdpb.NewPDClient(client).UpdateGCSafePointByServiceGroup(ctx, request) @@ -1585,8 +1585,8 @@ func (s *GrpcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, reques // UpdateServiceSafePointByServiceGroup for services like CDC/BR/Lightning to update gc safe points in PD func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, request *pdpb.UpdateServiceSafePointByServiceGroupRequest) (*pdpb.UpdateServiceSafePointByServiceGroupResponse, error) { - s.updateSafePointByServiceGroupLock.Lock() - defer s.updateSafePointByServiceGroupLock.Unlock() + s.serviceGroupSafePointLock.Lock() + defer s.serviceGroupSafePointLock.Unlock() fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { return pdpb.NewPDClient(client).UpdateServiceSafePointByServiceGroup(ctx, request) From 74d216a35b6aca0920b6480cdda1c139660f50ac Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Fri, 6 May 2022 00:03:42 +0800 Subject: [PATCH 09/18] change server and client to use new gcpb Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- client/client.go | 304 ++++++++++++----------- client/go.mod | 2 +- client/go.sum | 2 + go.mod | 2 +- go.sum | 18 +- server/gc_service.go | 292 ++++++++++++++++++++++ server/grpc_service.go | 264 +------------------- server/server.go | 2 + server/storage/endpoint/gc_safe_point.go | 27 +- 9 files changed, 469 insertions(+), 444 deletions(-) create mode 100644 server/gc_service.go diff --git a/client/client.go b/client/client.go index e0964528a1f..4d6b5902aec 100644 --- a/client/client.go +++ b/client/client.go @@ -26,6 +26,7 @@ import ( "github.com/opentracing/opentracing-go" "github.com/pingcap/errors" "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/gcpb" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" @@ -106,23 +107,6 @@ type Client interface { // determine the safepoint for multiple services, it does not trigger a GC // job. Use UpdateGCSafePoint to trigger the GC job if needed. UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) - - // GetAllServiceGroups returns a list containing all service groups that has safe point in pd - GetAllServiceGroups(ctx context.Context) ([]string, error) - // GetMinServiceSafePointByServiceGroup return the minimum of all service safe point of the given group - // it also returns the current revision of the pd storage, with in which the min is valid - // if none is found, it will return 0 as min - GetMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) - // UpdateGCSafePointByServiceGroup update the target safe point, along with revision obtained previously - // if failed, caller should retry form GetMinServiceSafePointByServiceGroup - UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (succeeded bool, newSafePoint uint64, err error) - // UpdateServiceSafePointByServiceGroup update the given service's safe point - // pass in a negative ttl to remove it - // if failed, caller should retry with higher safe point - UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (succeeded bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) - // GetAllServiceGroupGCSafePoints returns GC safe point for all service groups - GetAllServiceGroupGCSafePoints(ctx context.Context) ([]*pdpb.ServiceGroupSafePoint, error) - // ScatterRegion scatters the specified region. Should use it for a batch of regions, // and the distribution of these regions will be dispersed. // NOTICE: This method is the old version of ScatterRegions, you should use the later one as your first choice. @@ -147,6 +131,22 @@ type Client interface { UpdateOption(option DynamicOption, value interface{}) error // Close closes the client. Close() + + // GetAllServiceGroups returns a list containing all service groups that has safe point in pd + GetAllServiceGroups(ctx context.Context) ([]string, error) + // GetMinServiceSafePointByServiceGroup return the minimum of all service safe point of the given group + // it also returns the current revision of the pd storage, with in which the min is valid + // if none is found, it will return 0 as min + GetMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) + // UpdateGCSafePointByServiceGroup update the target safe point, along with revision obtained previously + // if failed, caller should retry form GetMinServiceSafePointByServiceGroup + UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (succeeded bool, newSafePoint uint64, err error) + // UpdateServiceSafePointByServiceGroup update the given service's safe point + // pass in a negative ttl to remove it + // if failed, caller should retry with higher safe point + UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (succeeded bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) + // GetAllServiceGroupGCSafePoints returns GC safe point for all service groups + GetAllServiceGroupGCSafePoints(ctx context.Context) ([]*gcpb.ServiceGroupSafePoint, error) } // GetStoreOp represents available options when getting stores. @@ -1674,135 +1674,6 @@ func (c *client) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, return resp.GetMinSafePoint(), nil } -func (c *client) GetAllServiceGroups(ctx context.Context) ([]string, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetAllServiceGroups", opentracing.ChildOf(span.Context())) - defer span.Finish() - } - start := time.Now() - defer func() { cmdDurationGetAllServiceGroups.Observe(time.Since(start).Seconds()) }() - ctx, cancel := context.WithTimeout(ctx, c.option.timeout) - req := &pdpb.GetAllServiceGroupsRequest{ - Header: c.requestHeader(), - } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - resp, err := c.getClient().GetAllServiceGroups(ctx, req) - cancel() - - if err != nil { - cmdFailedDurationGetAllServiceGroups.Observe(time.Since(start).Seconds()) - c.ScheduleCheckLeader() - return nil, errors.WithStack(err) - } - - // have to return a slice of string - returnSlice := make([]string, len(resp.ServiceGroupId)) - for _, serviceGroupID := range resp.ServiceGroupId { - returnSlice = append(returnSlice, string(serviceGroupID)) - } - return returnSlice, nil -} -func (c *client) GetMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetMinServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) - defer span.Finish() - } - start := time.Now() - defer func() { cmdDurationGetMinServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() - ctx, cancel := context.WithTimeout(ctx, c.option.timeout) - req := &pdpb.GetMinServiceSafePointByServiceGroupRequest{ - Header: c.requestHeader(), - ServiceGroupId: []byte(serviceGroupID), - } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - resp, err := c.getClient().GetMinServiceSafePointByServiceGroup(ctx, req) - cancel() - - if err != nil { - cmdFailedDurationGetMinServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) - c.ScheduleCheckLeader() - return 0, 0, errors.WithStack(err) - } - - return resp.SafePoint, resp.Revision, nil -} -func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (succeeded bool, newSafePoint uint64, err error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.UpdateGCSafePointByServiceGroup", opentracing.ChildOf(span.Context())) - defer span.Finish() - } - start := time.Now() - defer func() { cmdDurationUpdateGCSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() - ctx, cancel := context.WithTimeout(ctx, c.option.timeout) - req := &pdpb.UpdateGCSafePointByServiceGroupRequest{ - Header: c.requestHeader(), - ServiceGroupId: []byte(serviceGroupID), - SafePoint: safePoint, - Revision: revision, - } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - resp, err := c.getClient().UpdateGCSafePointByServiceGroup(ctx, req) - cancel() - - if err != nil { - cmdFailedDurationUpdateGCSafePointByServiceGroup.Observe(time.Since(start).Seconds()) - c.ScheduleCheckLeader() - return false, 0, errors.WithStack(err) - } - // if requested safepoint is the new safepoint, then update succeeded - return resp.Succeeded, resp.NewSafePoint, nil -} -func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (succeeded bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.UpdateServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) - defer span.Finish() - } - start := time.Now() - defer func() { cmdDurationUpdateServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() - ctx, cancel := context.WithTimeout(ctx, c.option.timeout) - req := &pdpb.UpdateServiceSafePointByServiceGroupRequest{ - Header: c.requestHeader(), - ServiceGroupId: []byte(serviceGroupID), - ServiceId: []byte(serviceID), - TTL: ttl, - SafePoint: safePoint, - } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - resp, err := c.getClient().UpdateServiceSafePointByServiceGroup(ctx, req) - cancel() - - if err != nil { - cmdFailedDurationUpdateServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) - c.ScheduleCheckLeader() - return false, 0, 0, 0, errors.WithStack(err) - } - - return resp.Succeeded, resp.GcSafePoint, resp.OldSafePoint, resp.NewSafePoint, nil -} -func (c *client) GetAllServiceGroupGCSafePoints(ctx context.Context) ([]*pdpb.ServiceGroupSafePoint, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetAllServiceGroupGCSafePoints", opentracing.ChildOf(span.Context())) - defer span.Finish() - } - start := time.Now() - defer func() { cmdDurationGetAllServiceGroupGCSafePoints.Observe(time.Since(start).Seconds()) }() - ctx, cancel := context.WithTimeout(ctx, c.option.timeout) - req := &pdpb.GetAllServiceGroupGCSafePointsRequest{ - Header: c.requestHeader(), - } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - resp, err := c.getClient().GetAllServiceGroupGCSafePoints(ctx, req) - cancel() - - if err != nil { - cmdFailedDurationGetAllServiceGroupGCSafePoints.Observe(time.Since(start).Seconds()) - c.ScheduleCheckLeader() - return nil, errors.WithStack(err) - } - - return resp.SafePoints, nil -} - func (c *client) ScatterRegion(ctx context.Context, regionID uint64) error { if span := opentracing.SpanFromContext(ctx); span != nil { span = opentracing.StartSpan("pdclient.ScatterRegion", opentracing.ChildOf(span.Context())) @@ -2037,3 +1908,144 @@ func (c *client) WatchGlobalConfig(ctx context.Context) (chan []GlobalConfigItem }() return globalConfigWatcherCh, err } + +func (c *client) gcHeader() *gcpb.RequestHeader { + return &gcpb.RequestHeader{ + ClusterId: c.clusterID, + } +} + +func (c *client) gcClient() gcpb.GCClient { + if cc, ok := c.clientConns.Load(c.GetLeaderAddr()); ok { + return gcpb.NewGCClient(cc.(*grpc.ClientConn)) + } + return nil +} +func (c *client) GetAllServiceGroups(ctx context.Context) ([]string, error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.GetAllServiceGroups", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + start := time.Now() + defer func() { cmdDurationGetAllServiceGroups.Observe(time.Since(start).Seconds()) }() + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + req := &gcpb.GetAllServiceGroupsRequest{ + Header: c.gcHeader(), + } + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + resp, err := c.gcClient().GetAllServiceGroups(ctx, req) + cancel() + + if err != nil { + cmdFailedDurationGetAllServiceGroups.Observe(time.Since(start).Seconds()) + c.ScheduleCheckLeader() + return nil, errors.WithStack(err) + } + + // have to return a slice of string + returnSlice := make([]string, len(resp.ServiceGroupId)) + for _, serviceGroupID := range resp.ServiceGroupId { + returnSlice = append(returnSlice, string(serviceGroupID)) + } + return returnSlice, nil +} +func (c *client) GetMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.GetMinServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + start := time.Now() + defer func() { cmdDurationGetMinServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + req := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ + Header: c.gcHeader(), + ServiceGroupId: []byte(serviceGroupID), + } + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + resp, err := c.gcClient().GetMinServiceSafePointByServiceGroup(ctx, req) + cancel() + + if err != nil { + cmdFailedDurationGetMinServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) + c.ScheduleCheckLeader() + return 0, 0, errors.WithStack(err) + } + + return resp.SafePoint, resp.Revision, nil +} +func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (succeeded bool, newSafePoint uint64, err error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.UpdateGCSafePointByServiceGroup", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + start := time.Now() + defer func() { cmdDurationUpdateGCSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + req := &gcpb.UpdateGCSafePointByServiceGroupRequest{ + Header: c.gcHeader(), + ServiceGroupId: []byte(serviceGroupID), + SafePoint: safePoint, + Revision: revision, + } + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + resp, err := c.gcClient().UpdateGCSafePointByServiceGroup(ctx, req) + cancel() + + if err != nil { + cmdFailedDurationUpdateGCSafePointByServiceGroup.Observe(time.Since(start).Seconds()) + c.ScheduleCheckLeader() + return false, 0, errors.WithStack(err) + } + // if requested safepoint is the new safepoint, then update succeeded + return resp.Succeeded, resp.NewSafePoint, nil +} +func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (succeeded bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.UpdateServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + start := time.Now() + defer func() { cmdDurationUpdateServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + req := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ + Header: c.gcHeader(), + ServiceGroupId: []byte(serviceGroupID), + ServiceId: []byte(serviceID), + TTL: ttl, + SafePoint: safePoint, + } + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + resp, err := c.gcClient().UpdateServiceSafePointByServiceGroup(ctx, req) + cancel() + + if err != nil { + cmdFailedDurationUpdateServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) + c.ScheduleCheckLeader() + return false, 0, 0, 0, errors.WithStack(err) + } + + return resp.Succeeded, resp.GcSafePoint, resp.OldSafePoint, resp.NewSafePoint, nil +} +func (c *client) GetAllServiceGroupGCSafePoints(ctx context.Context) ([]*gcpb.ServiceGroupSafePoint, error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.GetAllServiceGroupGCSafePoints", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + start := time.Now() + defer func() { cmdDurationGetAllServiceGroupGCSafePoints.Observe(time.Since(start).Seconds()) }() + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + req := &gcpb.GetAllServiceGroupGCSafePointsRequest{ + Header: c.gcHeader(), + } + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + resp, err := c.gcClient().GetAllServiceGroupGCSafePoints(ctx, req) + cancel() + + if err != nil { + cmdFailedDurationGetAllServiceGroupGCSafePoints.Observe(time.Since(start).Seconds()) + c.ScheduleCheckLeader() + return nil, errors.WithStack(err) + } + + return resp.SafePoints, nil +} diff --git a/client/go.mod b/client/go.mod index d0fbcc2dbe0..aa6c6177c6a 100644 --- a/client/go.mod +++ b/client/go.mod @@ -3,7 +3,7 @@ module github.com/tikv/pd/client go 1.16 // TODO: Remove this once kvproto has been updated -replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7 +replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636 require ( github.com/opentracing/opentracing-go v1.2.0 diff --git a/client/go.sum b/client/go.sum index 0a74f8597d9..aa96d8c8221 100644 --- a/client/go.sum +++ b/client/go.sum @@ -3,6 +3,8 @@ cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7 h1:lbd1F9oWMbFRDXpzOehO90j69L4s4JCtEzjbe+NLdC4= github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636 h1:25B7U79UYUqe5qntmMsszD+9nfJ34lK6vyp1Gez0z14= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= diff --git a/go.mod b/go.mod index 2c8a57e55e4..5c5ff3fc300 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/tikv/pd go 1.16 // TODO: Remove this once kvproto has been updated -replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7 +replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636 require ( github.com/AlekSi/gocov-xml v1.0.0 diff --git a/go.sum b/go.sum index f5237c93fee..aa435095039 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/AlekSi/gocov-xml v1.0.0 h1:4QctJBgXEkbzeKz6PJy6bt3JSPNSN4I2mITYW+eKUoQ= github.com/AlekSi/gocov-xml v1.0.0/go.mod h1:J0qYeZ6tDg4oZubW9mAAgxlqw39PDfoEkzB3HXSbEuA= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220427044528-668e540bb708/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22 h1:f32Y8ZPmZ0bpGe+FEXRu1XpHb/veNvEX3XsrH0ytMkU= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7 h1:lbd1F9oWMbFRDXpzOehO90j69L4s4JCtEzjbe+NLdC4= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636 h1:25B7U79UYUqe5qntmMsszD+9nfJ34lK6vyp1Gez0z14= +github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= @@ -174,10 +171,8 @@ github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/me github.com/goccy/go-graphviz v0.0.9 h1:s/FMMJ1Joj6La3S5ApO3Jk2cwM4LpXECC2muFx3IPQQ= github.com/goccy/go-graphviz v0.0.9/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQFC6TlNvLhk= github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= -github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= -github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt v3.2.1+incompatible h1:73Z+4BJcrTC+KczS6WvTPvRGOp1WmfEP4Q1lOd9Z/+c= @@ -191,7 +186,6 @@ github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4er github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef h1:veQD95Isof8w9/WXiA+pa3tz3fJXkt5B7QaRBrM62gk= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v0.0.0-20180814211427-aa810b61a9c7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -297,7 +291,6 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= -github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -403,10 +396,6 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTm github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce h1:Y1kCxlCtlPTMtVcOkjUcuQKh+YrluSo7+7YMCQSzy30= github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= -github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= -github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748 h1:i4MBe1zGq9/r3BH6rTRunizi4T59fpNk8hvBCrB5UAY= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= @@ -699,7 +688,6 @@ golang.org/x/time v0.0.0-20220224211638-0e9765cccd65 h1:M73Iuj3xbbb9Uk1DYhzydths golang.org/x/time v0.0.0-20220224211638-0e9765cccd65/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= @@ -739,11 +727,9 @@ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9Ywl google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190927181202-20e1ac93f88c h1:hrpEMCZ2O7DR5gC1n2AJGVhrwiEjOi35+jxtIuZpTMo= google.golang.org/genproto v0.0.0-20190927181202-20e1ac93f88c/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= -google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= diff --git a/server/gc_service.go b/server/gc_service.go new file mode 100644 index 00000000000..5a1b03ea732 --- /dev/null +++ b/server/gc_service.go @@ -0,0 +1,292 @@ +// Copyright 2017 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "context" + "fmt" + "math" + + "github.com/pingcap/kvproto/pkg/gcpb" + "github.com/pingcap/log" + "github.com/tikv/pd/pkg/tsoutil" + "github.com/tikv/pd/server/storage/endpoint" + "github.com/tikv/pd/server/tso" + "go.uber.org/zap" +) + +type GcServer struct { + *Server +} + +func (s *GcServer) header() *gcpb.ResponseHeader { + return &gcpb.ResponseHeader{ClusterId: s.clusterID} +} + +func (s *GcServer) errorHeader(err *gcpb.Error) *gcpb.ResponseHeader { + return &gcpb.ResponseHeader{ + ClusterId: s.clusterID, + Error: err, + } +} + +func (s *GcServer) notBootstrappedHeader() *gcpb.ResponseHeader { + return s.errorHeader(&gcpb.Error{ + Type: gcpb.ErrorType_NOT_BOOTSTRAPPED, + Message: "cluster is not bootstrapped", + }) +} + +func (s *GcServer) revisionMismatchHeader(requestRevision, currentRevision int64) *gcpb.ResponseHeader { + return s.errorHeader(&gcpb.Error{ + Type: gcpb.ErrorType_REVISION_MISMATCH, + Message: fmt.Sprintf("revision mismatch, requested revision %v but current revision %v", requestRevision, currentRevision), + }) +} + +func (s *GcServer) safePointRollbackHeader(requestSafePoint, requiredSafePoint uint64) *gcpb.ResponseHeader { + return s.errorHeader(&gcpb.Error{ + Type: gcpb.ErrorType_SAFEPOINT_ROLLBACK, + Message: fmt.Sprintf("safe point rollback, requested safe point %v is less than required safe point %v", requestSafePoint, requiredSafePoint), + }) +} + +// GetAllServiceGroups return all service group ids +func (s *GcServer) GetAllServiceGroups(ctx context.Context, request *gcpb.GetAllServiceGroupsRequest) (*gcpb.GetAllServiceGroupsResponse, error) { + + rc := s.GetRaftCluster() + if rc == nil { + return &gcpb.GetAllServiceGroupsResponse{Header: s.notBootstrappedHeader()}, nil + } + + var storage endpoint.GCSafePointStorage = s.storage + serviceGroupList, err := storage.LoadAllServiceGroups() + if err != nil { + return nil, err + } + + return &gcpb.GetAllServiceGroupsResponse{ + Header: s.header(), + ServiceGroupId: serviceGroupList, + }, nil +} + +// GetMinServiceSafePointByServiceGroup returns given service group's min service safe point +func (s *GcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, request *gcpb.GetMinServiceSafePointByServiceGroupRequest) (*gcpb.GetMinServiceSafePointByServiceGroupResponse, error) { + + rc := s.GetRaftCluster() + if rc == nil { + return &gcpb.GetMinServiceSafePointByServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil + } + + var storage endpoint.GCSafePointStorage = s.storage + serviceGroupID := string(request.ServiceGroupId) + nowTSO, err := s.tsoAllocatorManager.HandleTSORequest(tso.GlobalDCLocation, 1) + if err != nil { + return nil, err + } + now, _ := tsoutil.ParseTimestamp(nowTSO) + min, err := storage.LoadMinServiceSafePointByServiceGroup(serviceGroupID, now) + if err != nil { + return nil, err + } + var returnSafePoint uint64 + if min != nil { + returnSafePoint = min.SafePoint + } + // perform a get operation on a non-existing key to obtain current etcd revision number from response header + rsp, _ := s.client.Get(ctx, "NA") + currentRevision := rsp.Header.GetRevision() + return &gcpb.GetMinServiceSafePointByServiceGroupResponse{ + Header: s.header(), + SafePoint: returnSafePoint, + Revision: currentRevision, + }, nil +} + +// UpdateGCSafePointByServiceGroup used by gc_worker to update their gc safe points +func (s *GcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request *gcpb.UpdateGCSafePointByServiceGroupRequest) (*gcpb.UpdateGCSafePointByServiceGroupResponse, error) { + s.serviceGroupSafePointLock.Lock() + defer s.serviceGroupSafePointLock.Unlock() + + rc := s.GetRaftCluster() + if rc == nil { + return &gcpb.UpdateGCSafePointByServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil + } + + var storage endpoint.GCSafePointStorage = s.storage + + // check if revision changed since last min calculation + rsp, _ := s.client.Get(ctx, "NA") + currentRevision := rsp.Header.GetRevision() + requestRevision := request.GetRevision() + if currentRevision != requestRevision { + return &gcpb.UpdateGCSafePointByServiceGroupResponse{ + Header: s.revisionMismatchHeader(requestRevision, currentRevision), + Succeeded: false, + NewSafePoint: 0, + }, nil + } + serviceGroupID := string(request.ServiceGroupId) + newSafePoint := &endpoint.GCSafePoint{ + ServiceGroupID: serviceGroupID, + SafePoint: request.SafePoint, + } + + prev, err := storage.LoadGCWorkerSafePoint(serviceGroupID) + if err != nil { + return nil, err + } + // if no previous safepoint, treat it as 0 + var oldSafePoint uint64 = 0 + if prev != nil { + oldSafePoint = prev.SafePoint + } + + response := &gcpb.UpdateGCSafePointByServiceGroupResponse{} + + // fail to store due to safe point rollback + if newSafePoint.SafePoint < oldSafePoint { + log.Warn("trying to update gc_worker safe point", + zap.String("service-group-id", serviceGroupID), + zap.Uint64("old-safe-point", request.SafePoint), + zap.Uint64("new-safe-point", newSafePoint.SafePoint)) + response.Header = s.safePointRollbackHeader(newSafePoint.SafePoint, oldSafePoint) + response.Succeeded = false + response.NewSafePoint = oldSafePoint + return response, nil + } + + // save the safe point to storage + if err := storage.SaveGCWorkerSafePoint(newSafePoint); err != nil { + return nil, err + } + response.Header = s.header() + response.Succeeded = true + response.NewSafePoint = newSafePoint.SafePoint + log.Info("updated gc_worker safe point", + zap.String("service-group-id", serviceGroupID), + zap.Uint64("safe-point", newSafePoint.SafePoint)) + return response, nil +} + +// UpdateServiceSafePointByServiceGroup for services like CDC/BR/Lightning to update gc safe points in PD +func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, request *gcpb.UpdateServiceSafePointByServiceGroupRequest) (*gcpb.UpdateServiceSafePointByServiceGroupResponse, error) { + s.serviceGroupSafePointLock.Lock() + defer s.serviceGroupSafePointLock.Unlock() + + rc := s.GetRaftCluster() + if rc == nil { + return &gcpb.UpdateServiceSafePointByServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil + } + + var storage endpoint.GCSafePointStorage = s.storage + serviceGroupID := string(request.ServiceGroupId) + serviceID := string(request.ServiceId) + // a less than 0 ttl means to remove the safe point, immediately return after the deletion request + if request.TTL <= 0 { + if err := storage.RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID); err != nil { + return nil, err + } + return &gcpb.UpdateServiceSafePointByServiceGroupResponse{ + Header: s.header(), + Succeeded: true, + }, nil + } + + nowTSO, err := s.tsoAllocatorManager.HandleTSORequest(tso.GlobalDCLocation, 1) + if err != nil { + return nil, err + } + now, _ := tsoutil.ParseTimestamp(nowTSO) + + sspOld, err := storage.LoadServiceSafePoint(serviceGroupID, serviceID) + if err != nil { + return nil, err + } + gcsp, err := storage.LoadGCWorkerSafePoint(serviceGroupID) + if err != nil { + return nil, err + } + + response := &gcpb.UpdateServiceSafePointByServiceGroupResponse{} + // safePointLowerBound is the minimum request.SafePoint for update request to succeed + // it is oldServiceSafePoint if oldServiceSafePoint exists, else gcSafePoint if it exists + // otherwise it's set to 0, indicate all safePoint accepted + var safePointLowerBound uint64 = 0 + if gcsp != nil { + safePointLowerBound = gcsp.SafePoint + response.GcSafePoint = gcsp.SafePoint + } + if sspOld != nil { + safePointLowerBound = sspOld.SafePoint + response.OldSafePoint = sspOld.SafePoint + } + + // request.SafePoint smaller than safePointLowerBound, we have a safePointRollBack + if request.SafePoint < safePointLowerBound { + response.Header = s.safePointRollbackHeader(request.SafePoint, safePointLowerBound) + response.Succeeded = false + return response, nil + } + + response.Succeeded = true + response.NewSafePoint = request.SafePoint + ssp := &endpoint.ServiceSafePoint{ + ServiceID: serviceID, + ExpiredAt: now.Unix() + request.TTL, + SafePoint: request.SafePoint, + } + // handles overflow + if math.MaxInt64-now.Unix() <= request.TTL { + ssp.ExpiredAt = math.MaxInt64 + } + if err := storage.SaveServiceSafePointByServiceGroup(serviceGroupID, ssp); err != nil { + return nil, err + } + log.Info("update service safe point by service group", + zap.String("service-group-id", serviceGroupID), + zap.String("service-id", ssp.ServiceID), + zap.Int64("expire-at", ssp.ExpiredAt), + zap.Uint64("safepoint", ssp.SafePoint)) + return response, nil +} + +// GetAllServiceGroupGCSafePoints returns all service group's gc safe point +func (s *GcServer) GetAllServiceGroupGCSafePoints(ctx context.Context, request *gcpb.GetAllServiceGroupGCSafePointsRequest) (*gcpb.GetAllServiceGroupGCSafePointsResponse, error) { + + rc := s.GetRaftCluster() + if rc == nil { + return &gcpb.GetAllServiceGroupGCSafePointsResponse{Header: s.notBootstrappedHeader()}, nil + } + + var storage endpoint.GCSafePointStorage = s.storage + serviceIDs, gcSafePoints, err := storage.LoadAllServiceGroupGCSafePoints() + if err != nil { + return nil, err + } + + safePoints := make([]*gcpb.ServiceGroupSafePoint, 0, 2) + for i := range serviceIDs { + safePoints = append(safePoints, &gcpb.ServiceGroupSafePoint{ + ServiceGroupId: serviceIDs[i], + SafePoint: gcSafePoints[i], + }) + } + return &gcpb.GetAllServiceGroupGCSafePointsResponse{ + Header: s.header(), + SafePoints: safePoints, + }, nil +} diff --git a/server/grpc_service.go b/server/grpc_service.go index 65172755a18..3d43023a66f 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -1378,7 +1378,7 @@ func (s *GrpcServer) UpdateGCSafePoint(ctx context.Context, request *pdpb.Update }, nil } -// UpdateServiceGCSafePoint update the safe point for specific service +// UpdateServiceGCSafePoint update the safepoint for specific service func (s *GrpcServer) UpdateServiceGCSafePoint(ctx context.Context, request *pdpb.UpdateServiceGCSafePointRequest) (*pdpb.UpdateServiceGCSafePointResponse, error) { s.serviceSafePointLock.Lock() defer s.serviceSafePointLock.Unlock() @@ -1445,268 +1445,6 @@ func (s *GrpcServer) UpdateServiceGCSafePoint(ctx context.Context, request *pdpb }, nil } -// GetServiceGroup return all service group ids -func (s *GrpcServer) GetAllServiceGroups(ctx context.Context, request *pdpb.GetAllServiceGroupsRequest) (*pdpb.GetAllServiceGroupsResponse, error) { - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { - return pdpb.NewPDClient(client).GetAllServiceGroups(ctx, request) - } - if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { - return nil, err - } else if rsp != nil { - return rsp.(*pdpb.GetAllServiceGroupsResponse), err - } - - rc := s.GetRaftCluster() - if rc == nil { - return &pdpb.GetAllServiceGroupsResponse{Header: s.notBootstrappedHeader()}, nil - } - - var storage endpoint.GCSafePointStorage = s.storage - serviceGroupList, err := storage.LoadAllServiceGroups() - if err != nil { - return nil, err - } - - return &pdpb.GetAllServiceGroupsResponse{ - Header: s.header(), - ServiceGroupId: serviceGroupList, - }, nil -} - -// GetMinServiceSafePointByServiceGroup returns given service group's min service safe point -func (s *GrpcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, request *pdpb.GetMinServiceSafePointByServiceGroupRequest) (*pdpb.GetMinServiceSafePointByServiceGroupResponse, error) { - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { - return pdpb.NewPDClient(client).GetMinServiceSafePointByServiceGroup(ctx, request) - } - if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { - return nil, err - } else if rsp != nil { - return rsp.(*pdpb.GetMinServiceSafePointByServiceGroupResponse), err - } - - rc := s.GetRaftCluster() - if rc == nil { - return &pdpb.GetMinServiceSafePointByServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil - } - - var storage endpoint.GCSafePointStorage = s.storage - serviceGroupID := string(request.ServiceGroupId) - nowTSO, err := s.tsoAllocatorManager.HandleTSORequest(tso.GlobalDCLocation, 1) - if err != nil { - return nil, err - } - now, _ := tsoutil.ParseTimestamp(nowTSO) - min, err := storage.LoadMinServiceSafePointByServiceGroup(serviceGroupID, now) - if err != nil { - return nil, err - } - var returnSafePoint uint64 - if min != nil { - returnSafePoint = min.SafePoint - } - // perform a get operation on a non-existing key to obtain current etcd revision number from response header - rsp, _ := s.client.Get(ctx, "NA") - currentRevision := rsp.Header.GetRevision() - return &pdpb.GetMinServiceSafePointByServiceGroupResponse{ - Header: s.header(), - SafePoint: returnSafePoint, - Revision: currentRevision, - }, nil -} - -// UpdateGCSafePointByServiceGroup used by gc_worker to update their gc safe points -func (s *GrpcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request *pdpb.UpdateGCSafePointByServiceGroupRequest) (*pdpb.UpdateGCSafePointByServiceGroupResponse, error) { - s.serviceGroupSafePointLock.Lock() - defer s.serviceGroupSafePointLock.Unlock() - - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { - return pdpb.NewPDClient(client).UpdateGCSafePointByServiceGroup(ctx, request) - } - if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { - return nil, err - } else if rsp != nil { - return rsp.(*pdpb.UpdateGCSafePointByServiceGroupResponse), err - } - - rc := s.GetRaftCluster() - if rc == nil { - return &pdpb.UpdateGCSafePointByServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil - } - - var storage endpoint.GCSafePointStorage = s.storage - - // check if revision changed since last min calculation - rsp, _ := s.client.Get(ctx, "NA") - currentRevision := rsp.Header.GetRevision() - if currentRevision != request.GetRevision() { - return &pdpb.UpdateGCSafePointByServiceGroupResponse{ - Header: s.header(), - Succeeded: false, - NewSafePoint: 0, - }, nil - } - serviceGroupID := string(request.ServiceGroupId) - newSafePoint := &endpoint.GCSafePoint{ - ServiceGroupID: serviceGroupID, - SafePoint: request.SafePoint, - } - - prev, err := storage.LoadGCWorkerSafePoint(serviceGroupID) - if err != nil { - return nil, err - } - // if no previous safepoint, treat it as 0 - var oldSafePoint uint64 = 0 - if prev != nil { - oldSafePoint = prev.SafePoint - } - - // Only save the safe point if it's greater than the previous one, or if no previous one exist - if request.SafePoint > oldSafePoint { - if err := storage.SaveGCWorkerSafePoint(newSafePoint); err != nil { - return nil, err - } - log.Info("updated gc_worker safe point", - zap.String("service-group-id", serviceGroupID), - zap.Uint64("safe-point", newSafePoint.SafePoint)) - } else if newSafePoint.SafePoint < request.SafePoint { - log.Warn("trying to update gc_worker safe point", - zap.String("service-group-id", serviceGroupID), - zap.Uint64("old-safe-point", request.SafePoint), - zap.Uint64("new-safe-point", newSafePoint.SafePoint)) - newSafePoint.SafePoint = oldSafePoint - } - return &pdpb.UpdateGCSafePointByServiceGroupResponse{ - Header: s.header(), - Succeeded: true, - NewSafePoint: newSafePoint.SafePoint, - }, nil -} - -// UpdateServiceSafePointByServiceGroup for services like CDC/BR/Lightning to update gc safe points in PD -func (s *GrpcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, request *pdpb.UpdateServiceSafePointByServiceGroupRequest) (*pdpb.UpdateServiceSafePointByServiceGroupResponse, error) { - s.serviceGroupSafePointLock.Lock() - defer s.serviceGroupSafePointLock.Unlock() - - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { - return pdpb.NewPDClient(client).UpdateServiceSafePointByServiceGroup(ctx, request) - } - if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { - return nil, err - } else if rsp != nil { - return rsp.(*pdpb.UpdateServiceSafePointByServiceGroupResponse), err - } - - rc := s.GetRaftCluster() - if rc == nil { - return &pdpb.UpdateServiceSafePointByServiceGroupResponse{Header: s.notBootstrappedHeader()}, nil - } - - var storage endpoint.GCSafePointStorage = s.storage - serviceGroupID := string(request.ServiceGroupId) - serviceID := string(request.ServiceId) - // a less than 0 ttl means to remove the safe point, immediately return after the deletion request - if request.TTL <= 0 { - if err := storage.RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID); err != nil { - return nil, err - } - return &pdpb.UpdateServiceSafePointByServiceGroupResponse{ - Header: s.header(), - Succeeded: true, - }, nil - } - - nowTSO, err := s.tsoAllocatorManager.HandleTSORequest(tso.GlobalDCLocation, 1) - if err != nil { - return nil, err - } - now, _ := tsoutil.ParseTimestamp(nowTSO) - - sspOld, err := storage.LoadServiceSafePoint(serviceGroupID, serviceID) - if err != nil { - return nil, err - } - gcsp, err := storage.LoadGCWorkerSafePoint(serviceGroupID) - if err != nil { - return nil, err - } - var oldServiceSafePoint, gcSafePoint, newServiceSafePoint uint64 = 0, 0, 0 - succeeded := false - - if sspOld != nil { - oldServiceSafePoint = sspOld.SafePoint - newServiceSafePoint = oldServiceSafePoint // case where update denied - } - if gcsp != nil { - gcSafePoint = gcsp.SafePoint - } - - // case where there is an old safepoint for the given service, we have to check that - // new safepoint >= old safepoint - caseUpdate := oldServiceSafePoint != 0 && request.SafePoint >= oldServiceSafePoint - // Or if no old safepoint and new safepoint >= gc safepoint - caseInit := oldServiceSafePoint == 0 && request.SafePoint >= gcSafePoint - - if caseUpdate || caseInit { - succeeded = true - ssp := &endpoint.ServiceSafePoint{ - ServiceID: serviceID, - ExpiredAt: now.Unix() + request.TTL, - SafePoint: request.SafePoint, - } - // handles overflow - if math.MaxInt64-now.Unix() <= request.TTL { - ssp.ExpiredAt = math.MaxInt64 - } - if err := storage.SaveServiceSafePointByServiceGroup(serviceGroupID, ssp); err != nil { - return nil, err - } - newServiceSafePoint = request.SafePoint // case where update performed - log.Info("update service safe point by service group", - zap.String("service-group-id", serviceGroupID), - zap.String("service-id", ssp.ServiceID), - zap.Int64("expire-at", ssp.ExpiredAt), - zap.Uint64("safepoint", ssp.SafePoint)) - } - - return &pdpb.UpdateServiceSafePointByServiceGroupResponse{ - Header: s.header(), - Succeeded: succeeded, - GcSafePoint: gcSafePoint, - OldSafePoint: oldServiceSafePoint, - NewSafePoint: newServiceSafePoint, - }, nil -} - -// GetAllServiceGroupGCSafePoints returns all service group's gc safe point -func (s *GrpcServer) GetAllServiceGroupGCSafePoints(ctx context.Context, request *pdpb.GetAllServiceGroupGCSafePointsRequest) (*pdpb.GetAllServiceGroupGCSafePointsResponse, error) { - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { - return pdpb.NewPDClient(client).GetAllServiceGroupGCSafePoints(ctx, request) - } - if rsp, err := s.unaryMiddleware(ctx, request.GetHeader(), fn); err != nil { - return nil, err - } else if rsp != nil { - return rsp.(*pdpb.GetAllServiceGroupGCSafePointsResponse), err - } - - rc := s.GetRaftCluster() - if rc == nil { - return &pdpb.GetAllServiceGroupGCSafePointsResponse{Header: s.notBootstrappedHeader()}, nil - } - - var storage endpoint.GCSafePointStorage = s.storage - safePoints, err := storage.LoadAllServiceGroupGCSafePoints() - - if err != nil { - return nil, err - } - - return &pdpb.GetAllServiceGroupGCSafePointsResponse{ - Header: s.header(), - SafePoints: safePoints, - }, nil -} - // GetOperator gets information about the operator belonging to the specify region. func (s *GrpcServer) GetOperator(ctx context.Context, request *pdpb.GetOperatorRequest) (*pdpb.GetOperatorResponse, error) { fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { diff --git a/server/server.go b/server/server.go index 5dd36a8c742..ca336182ca8 100644 --- a/server/server.go +++ b/server/server.go @@ -35,6 +35,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/diagnosticspb" + "github.com/pingcap/kvproto/pkg/gcpb" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" @@ -276,6 +277,7 @@ func CreateServer(ctx context.Context, cfg *config.Config, serviceBuilders ...Ha etcdCfg.ServiceRegister = func(gs *grpc.Server) { pdpb.RegisterPDServer(gs, &GrpcServer{Server: s}) diagnosticspb.RegisterDiagnosticsServer(gs, s) + gcpb.RegisterGCServer(gs, &GcServer{Server: s}) } s.etcdCfg = etcdCfg if EnableZap { diff --git a/server/storage/endpoint/gc_safe_point.go b/server/storage/endpoint/gc_safe_point.go index f28154fa607..98f0b56de49 100644 --- a/server/storage/endpoint/gc_safe_point.go +++ b/server/storage/endpoint/gc_safe_point.go @@ -22,7 +22,6 @@ import ( "time" "github.com/pingcap/errors" - "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" "go.etcd.io/etcd/clientv3" @@ -58,7 +57,7 @@ type GCSafePointStorage interface { RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID string) error LoadServiceSafePoint(serviceGroupID, serviceID string) (*ServiceSafePoint, error) SaveServiceSafePointByServiceGroup(serviceGroupID string, ssp *ServiceSafePoint) error - LoadAllServiceGroupGCSafePoints() ([]*pdpb.ServiceGroupSafePoint, error) + LoadAllServiceGroupGCSafePoints() ([][]byte, []uint64, error) } var _ GCSafePointStorage = (*StorageEndpoint)(nil) @@ -321,29 +320,23 @@ func (se *StorageEndpoint) SaveServiceSafePointByServiceGroup(serviceGroupID str return se.Save(key, string(value)) } -// LoadAllServiceGroupGCSafePoints returns a slice contains GCSafePoint for every service group -func (se *StorageEndpoint) LoadAllServiceGroupGCSafePoints() ([]*pdpb.ServiceGroupSafePoint, error) { +// LoadAllServiceGroupGCSafePoints returns two slices of ServiceGroupIDs and their corresponding safe points +func (se *StorageEndpoint) LoadAllServiceGroupGCSafePoints() ([][]byte, []uint64, error) { prefix := gcSafePointPrefixPath() prefixEnd := clientv3.GetPrefixRangeEnd(prefix) keys, values, err := se.LoadRange(prefix, prefixEnd, 0) if err != nil { - return nil, err - } - if len(keys) == 0 { - return []*pdpb.ServiceGroupSafePoint{}, nil + return nil, nil, err } - gcSafePoints := make([]*pdpb.ServiceGroupSafePoint, 0, 2) // there are probably only two service groups + serviceIDs := make([][]byte, 0, 2) // there are probably only two service groups + safePoints := make([]uint64, 0, 2) for i := range keys { gcSafePoint := &GCSafePoint{} if err := json.Unmarshal([]byte(values[i]), gcSafePoint); err != nil { - return nil, err - } - serviceGroupSafePoint := &pdpb.ServiceGroupSafePoint{ - ServiceGroupId: []byte(gcSafePoint.ServiceGroupID), - SafePoint: gcSafePoint.SafePoint, + return nil, nil, err } - gcSafePoints = append(gcSafePoints, serviceGroupSafePoint) + serviceIDs = append(serviceIDs, []byte(gcSafePoint.ServiceGroupID)) + safePoints = append(safePoints, gcSafePoint.SafePoint) } - - return gcSafePoints, nil + return serviceIDs, safePoints, nil } From 911a091c3efe64ef2f75987fcd352c19f8007847 Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Fri, 6 May 2022 11:47:47 +0800 Subject: [PATCH 10/18] update reference, remove go mod replace Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- client/go.mod | 5 +---- client/go.sum | 10 ++-------- go.mod | 5 +---- go.sum | 13 +++++++++++-- 4 files changed, 15 insertions(+), 18 deletions(-) diff --git a/client/go.mod b/client/go.mod index aa6c6177c6a..7545e94ea21 100644 --- a/client/go.mod +++ b/client/go.mod @@ -2,15 +2,12 @@ module github.com/tikv/pd/client go 1.16 -// TODO: Remove this once kvproto has been updated -replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636 - require ( github.com/opentracing/opentracing-go v1.2.0 github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20220425052816-e33ae9239820 + github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee github.com/prometheus/client_golang v1.11.0 go.uber.org/goleak v1.1.11 diff --git a/client/go.sum b/client/go.sum index aa96d8c8221..e4d6245dd7f 100644 --- a/client/go.sum +++ b/client/go.sum @@ -1,10 +1,5 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220427045408-abeb7dbc9f22/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7 h1:lbd1F9oWMbFRDXpzOehO90j69L4s4JCtEzjbe+NLdC4= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220429023313-555dcb1080e7/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636 h1:25B7U79UYUqe5qntmMsszD+9nfJ34lK6vyp1Gez0z14= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= @@ -111,8 +106,8 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTm github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748 h1:i4MBe1zGq9/r3BH6rTRunizi4T59fpNk8hvBCrB5UAY= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e h1:iquj/SVNullS8+llCooL3Pk2DWQPW/HDDpF1EHwsnq0= +github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee h1:VO2t6IBpfvW34TdtD/G10VvnGqjLic1jzOuHjUb5VqM= github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= @@ -155,7 +150,6 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/ystaticy/kvproto v0.0.0-20220419035825-6bb5c11da23d/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= diff --git a/go.mod b/go.mod index 5c5ff3fc300..7a337434c1b 100644 --- a/go.mod +++ b/go.mod @@ -2,9 +2,6 @@ module github.com/tikv/pd go 1.16 -// TODO: Remove this once kvproto has been updated -replace github.com/pingcap/kvproto => github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636 - require ( github.com/AlekSi/gocov-xml v1.0.0 github.com/BurntSushi/toml v0.3.1 @@ -33,7 +30,7 @@ require ( github.com/pingcap/errcode v0.3.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce - github.com/pingcap/kvproto v0.0.0-20220425052816-e33ae9239820 + github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e github.com/pingcap/log v0.0.0-20210906054005-afc726e70354 github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d github.com/pingcap/tidb-dashboard v0.0.0-20220331105802-5ac69661755c diff --git a/go.sum b/go.sum index aa435095039..2e35d8f5901 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,6 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/AlekSi/gocov-xml v1.0.0 h1:4QctJBgXEkbzeKz6PJy6bt3JSPNSN4I2mITYW+eKUoQ= github.com/AlekSi/gocov-xml v1.0.0/go.mod h1:J0qYeZ6tDg4oZubW9mAAgxlqw39PDfoEkzB3HXSbEuA= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636 h1:25B7U79UYUqe5qntmMsszD+9nfJ34lK6vyp1Gez0z14= -github.com/AmoebaProtozoa/kvproto v0.0.0-20220505035154-33f7827ec636/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= @@ -171,8 +169,10 @@ github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/me github.com/goccy/go-graphviz v0.0.9 h1:s/FMMJ1Joj6La3S5ApO3Jk2cwM4LpXECC2muFx3IPQQ= github.com/goccy/go-graphviz v0.0.9/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQFC6TlNvLhk= github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= +github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt v3.2.1+incompatible h1:73Z+4BJcrTC+KczS6WvTPvRGOp1WmfEP4Q1lOd9Z/+c= @@ -186,6 +186,7 @@ github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4er github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef h1:veQD95Isof8w9/WXiA+pa3tz3fJXkt5B7QaRBrM62gk= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v0.0.0-20180814211427-aa810b61a9c7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -291,6 +292,7 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -396,6 +398,10 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTm github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce h1:Y1kCxlCtlPTMtVcOkjUcuQKh+YrluSo7+7YMCQSzy30= github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= +github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= +github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= +github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e h1:iquj/SVNullS8+llCooL3Pk2DWQPW/HDDpF1EHwsnq0= +github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= @@ -688,6 +694,7 @@ golang.org/x/time v0.0.0-20220224211638-0e9765cccd65 h1:M73Iuj3xbbb9Uk1DYhzydths golang.org/x/time v0.0.0-20220224211638-0e9765cccd65/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= @@ -727,9 +734,11 @@ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9Ywl google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190927181202-20e1ac93f88c h1:hrpEMCZ2O7DR5gC1n2AJGVhrwiEjOi35+jxtIuZpTMo= google.golang.org/genproto v0.0.0-20190927181202-20e1ac93f88c/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= +google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= From 1e682fa6e9e12fc5695ff16198aaaacc42604eca Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Fri, 6 May 2022 15:43:40 +0800 Subject: [PATCH 11/18] added unit tests for storage endpoint Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- server/storage/endpoint/gc_safe_point.go | 8 +- server/storage/endpoint/key_path.go | 4 +- server/storage/storage_test.go | 183 +++++++++++++++++++++++ 3 files changed, 189 insertions(+), 6 deletions(-) diff --git a/server/storage/endpoint/gc_safe_point.go b/server/storage/endpoint/gc_safe_point.go index 98f0b56de49..443a6220caa 100644 --- a/server/storage/endpoint/gc_safe_point.go +++ b/server/storage/endpoint/gc_safe_point.go @@ -219,7 +219,7 @@ func (se *StorageEndpoint) LoadMinServiceSafePointByServiceGroup(serviceGroupID return nil, nil } - min := &ServiceSafePoint{SafePoint: math.MaxInt64} + min := &ServiceSafePoint{SafePoint: math.MaxUint64} for i, key := range keys { ssp := &ServiceSafePoint{} if err := json.Unmarshal([]byte(values[i]), ssp); err != nil { @@ -288,14 +288,14 @@ func (se *StorageEndpoint) SaveGCWorkerSafePoint(gcSafePoint *GCSafePoint) error // RemoveServiceSafePointByServiceGroup removes a service safe point func (se *StorageEndpoint) RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID string) error { - key := serviceSafePointPath(serviceGroupID, serviceID) + key := ServiceSafePointPath(serviceGroupID, serviceID) return se.Remove(key) } // LoadServiceSafePoint reads ServiceSafePoint for the given service group and service name // return nil if no safepoint not exist func (se *StorageEndpoint) LoadServiceSafePoint(serviceGroupID, serviceID string) (*ServiceSafePoint, error) { - value, err := se.Load(serviceSafePointPath(serviceGroupID, serviceID)) + value, err := se.Load(ServiceSafePointPath(serviceGroupID, serviceID)) if err != nil || value == "" { return nil, err } @@ -311,7 +311,7 @@ func (se *StorageEndpoint) SaveServiceSafePointByServiceGroup(serviceGroupID str if ssp.ServiceID == "" { return errors.New("service id of service safepoint cannot be empty") } - key := serviceSafePointPath(serviceGroupID, ssp.ServiceID) + key := ServiceSafePointPath(serviceGroupID, ssp.ServiceID) value, err := json.Marshal(ssp) if err != nil { return err diff --git a/server/storage/endpoint/key_path.go b/server/storage/endpoint/key_path.go index e4bbb1e551d..24dccde2346 100644 --- a/server/storage/endpoint/key_path.go +++ b/server/storage/endpoint/key_path.go @@ -111,9 +111,9 @@ func gcSafePointPathByServiceGroup(serviceGroupID string) string { return path.Join(gcServiceGroupPath, "gc_safepoint", serviceGroupID) } -// serviceSafePointPath returns the path of services' safe point +// ServiceSafePointPath returns the path of services' safe point // /gc_servicegroup/service_safepoint/$service_group_id/$service_id -func serviceSafePointPath(serviceGroupID, serviceID string) string { +func ServiceSafePointPath(serviceGroupID, serviceID string) string { return path.Join(gcServiceGroupPath, "service_safepoint", serviceGroupID, serviceID) } diff --git a/server/storage/storage_test.go b/server/storage/storage_test.go index 51870a62133..106efe42408 100644 --- a/server/storage/storage_test.go +++ b/server/storage/storage_test.go @@ -279,3 +279,186 @@ func (s *testStorageSuite) TestLoadRegionsExceedRangeLimit(c *C) { } c.Assert(failpoint.Disable("github.com/tikv/pd/server/storage/kv/withRangeLimit"), IsNil) } + +func testGCSafePoints() []*endpoint.GCSafePoint { + return []*endpoint.GCSafePoint{ + { + ServiceGroupID: "testServiceGroup1", + SafePoint: 0, + }, + { + ServiceGroupID: "testServiceGroup2", + SafePoint: 1, + }, + { + ServiceGroupID: "testServiceGroup3", + SafePoint: 4396, + }, + { + ServiceGroupID: "testServiceGroup4", + SafePoint: 23333333333, + }, + { + ServiceGroupID: "testServiceGroup5", + SafePoint: math.MaxUint64, + }, + } +} + +func (s *testStorageSuite) TestLoadGCWorkerSafePoint(c *C) { + storage := NewStorageWithMemoryBackend() + testData := testGCSafePoints() + r, e := storage.LoadGCWorkerSafePoint("testServiceGroup") + c.Assert(r, IsNil) + c.Assert(e, IsNil) + for _, safePoint := range testData { + err := storage.SaveGCWorkerSafePoint(safePoint) + c.Assert(err, IsNil) + loaded, err := storage.LoadGCWorkerSafePoint(safePoint.ServiceGroupID) + c.Assert(err, IsNil) + c.Assert(safePoint.ServiceGroupID, Equals, loaded.ServiceGroupID) + c.Assert(safePoint.SafePoint, Equals, loaded.SafePoint) + } +} + +func (s *testStorageSuite) TestLoadAllServiceGroupGCSafePoints(c *C) { + storage := NewStorageWithMemoryBackend() + testData := testGCSafePoints() + for _, safePoint := range testData { + err := storage.SaveGCWorkerSafePoint(safePoint) + c.Assert(err, IsNil) + } + serviceGroupIDs, safePoints, err := storage.LoadAllServiceGroupGCSafePoints() + c.Assert(err, IsNil) + for i, safePoint := range testData { + c.Assert(string(serviceGroupIDs[i]), Equals, safePoint.ServiceGroupID) + c.Assert(safePoints[i], Equals, safePoint.SafePoint) + } +} + +func (s *testStorageSuite) TestLoadAllServiceGroup(c *C) { + storage := NewStorageWithMemoryBackend() + testData := testGCSafePoints() + for _, safePoint := range testData { + err := storage.SaveGCWorkerSafePoint(safePoint) + c.Assert(err, IsNil) + } + serviceGroups, err := storage.LoadAllServiceGroups() + c.Assert(err, IsNil) + for i, safePoint := range testData { + c.Assert(string(serviceGroups[i]), Equals, safePoint.ServiceGroupID) + } +} + +func (s *testStorageSuite) TestLoadServiceSafePointByServiceGroup(c *C) { + storage := NewStorageWithMemoryBackend() + expireAt := time.Now().Add(100 * time.Second).Unix() + serviceSafePoints := []*endpoint.ServiceSafePoint{ + {ServiceID: "1", ExpiredAt: expireAt, SafePoint: 1}, + {ServiceID: "2", ExpiredAt: expireAt, SafePoint: 2}, + {ServiceID: "3", ExpiredAt: expireAt, SafePoint: 3}, + } + serviceGroups := []string{ + "serviceGroup1", + "serviceGroup2", + "serviceGroup3", + } + + for _, serviceGroup := range serviceGroups { + for _, serviceSafePoint := range serviceSafePoints { + c.Assert(storage.SaveServiceSafePointByServiceGroup(serviceGroup, serviceSafePoint), IsNil) + } + } + for _, serviceGroup := range serviceGroups { + for _, serviceSafePoint := range serviceSafePoints { + key := endpoint.ServiceSafePointPath(serviceGroup, serviceSafePoint.ServiceID) + value, err := storage.Load(key) + c.Assert(err, IsNil) + ssp := &endpoint.ServiceSafePoint{} + c.Assert(json.Unmarshal([]byte(value), ssp), IsNil) + c.Assert(ssp.ServiceID, Equals, serviceSafePoint.ServiceID) + c.Assert(ssp.ExpiredAt, Equals, serviceSafePoint.ExpiredAt) + c.Assert(ssp.SafePoint, Equals, serviceSafePoint.SafePoint) + + } + } +} + +func (s *testStorageSuite) TestRemoveServiceSafePointByServiceGroup(c *C) { + storage := NewStorageWithMemoryBackend() + expireAt := time.Now().Add(100 * time.Second).Unix() + + serviceSafePoints := []*endpoint.ServiceSafePoint{ + {ServiceID: "1", ExpiredAt: expireAt, SafePoint: 1}, + {ServiceID: "2", ExpiredAt: expireAt, SafePoint: 2}, + {ServiceID: "3", ExpiredAt: expireAt, SafePoint: 3}, + } + serviceGroups := []string{ + "serviceGroup1", + "serviceGroup2", + "serviceGroup3", + } + // save service safe points + for _, serviceGroup := range serviceGroups { + for _, serviceSafePoint := range serviceSafePoints { + c.Assert(storage.SaveServiceSafePointByServiceGroup(serviceGroup, serviceSafePoint), IsNil) + } + } + + // remove service safe points + for _, serviceGroup := range serviceGroups { + for _, serviceSafePoint := range serviceSafePoints { + c.Assert(storage.RemoveServiceSafePointByServiceGroup(serviceGroup, serviceSafePoint.ServiceID), IsNil) + } + } + + // check that service safe points are empty + for _, serviceGroup := range serviceGroups { + for _, serviceSafePoint := range serviceSafePoints { + safepoint, err := storage.LoadServiceSafePoint(serviceGroup, serviceSafePoint.ServiceID) + c.Assert(err, IsNil) + c.Assert(safepoint, IsNil) + } + } +} + +func (s *testStorageSuite) TestLoadMinServiceSafePointByServiceGroup(c *C) { + storage := NewStorageWithMemoryBackend() + currentTime := time.Now() + expireAt1 := currentTime.Add(100 * time.Second).Unix() + expireAt2 := currentTime.Add(200 * time.Second).Unix() + expireAt3 := currentTime.Add(300 * time.Second).Unix() + + serviceSafePoints := []*endpoint.ServiceSafePoint{ + {ServiceID: "1", ExpiredAt: expireAt1, SafePoint: 100}, + {ServiceID: "2", ExpiredAt: expireAt2, SafePoint: 200}, + {ServiceID: "3", ExpiredAt: expireAt3, SafePoint: 300}, + } + + for _, serviceSafePoint := range serviceSafePoints { + c.Assert(storage.SaveServiceSafePointByServiceGroup("testServiceGroup1", serviceSafePoint), IsNil) + } + minSafePoint, err := storage.LoadMinServiceSafePointByServiceGroup("testServiceGroup1", currentTime) + c.Assert(err, IsNil) + c.Assert(minSafePoint.ServiceID, Equals, serviceSafePoints[0].ServiceID) + c.Assert(minSafePoint.ExpiredAt, Equals, serviceSafePoints[0].ExpiredAt) + c.Assert(minSafePoint.SafePoint, Equals, serviceSafePoints[0].SafePoint) + + // this should remove safePoint with ServiceID 1 due to expiration + // and find the safePoint with ServiceID 2 + minSafePoint2, err := storage.LoadMinServiceSafePointByServiceGroup("testServiceGroup1", currentTime.Add(150*time.Second)) + c.Assert(err, IsNil) + c.Assert(minSafePoint2.ServiceID, Equals, serviceSafePoints[1].ServiceID) + c.Assert(minSafePoint2.ExpiredAt, Equals, serviceSafePoints[1].ExpiredAt) + c.Assert(minSafePoint2.SafePoint, Equals, serviceSafePoints[1].SafePoint) + // verify that one with ServiceID 1 has been removed + ssp, err := storage.LoadServiceSafePoint("testServiceGroup1", "1") + c.Assert(err, IsNil) + c.Assert(ssp, IsNil) + + // this should remove all service safe points + // and return nil + ssp, err = storage.LoadMinServiceSafePointByServiceGroup("testServiceGroup1", currentTime.Add(500*time.Second)) + c.Assert(err, IsNil) + c.Assert(ssp, IsNil) +} From d4d757c4ca52ab06d669d232efc9286019d01d6f Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Fri, 6 May 2022 16:26:44 +0800 Subject: [PATCH 12/18] typo fix Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- server/storage/endpoint/gc_safe_point.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/storage/endpoint/gc_safe_point.go b/server/storage/endpoint/gc_safe_point.go index 443a6220caa..27c463c9b03 100644 --- a/server/storage/endpoint/gc_safe_point.go +++ b/server/storage/endpoint/gc_safe_point.go @@ -194,7 +194,7 @@ func (se *StorageEndpoint) SaveServiceGCSafePoint(ssp *ServiceSafePoint) error { return se.Save(key, string(value)) } -// RemoveServiceGCSafePoint removes a GC safeoint for the service +// RemoveServiceGCSafePoint removes a GC safepoint for the service func (se *StorageEndpoint) RemoveServiceGCSafePoint(serviceID string) error { if serviceID == gcWorkerServiceSafePointID { return errors.New("cannot remove service safe point of gc_worker") From f67792f49fe50cb44e809b4f7a8d465e34489bba Mon Sep 17 00:00:00 2001 From: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> Date: Fri, 6 May 2022 18:04:33 +0800 Subject: [PATCH 13/18] add serviceGroupServicePath and serviceGroupGCPath as constant to key_path.go Signed-off-by: AmoebaProtozoa <8039876+AmoebaProtozoa@users.noreply.github.com> --- server/storage/endpoint/key_path.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/server/storage/endpoint/key_path.go b/server/storage/endpoint/key_path.go index 24dccde2346..26838437a03 100644 --- a/server/storage/endpoint/key_path.go +++ b/server/storage/endpoint/key_path.go @@ -31,7 +31,8 @@ const ( customScheduleConfigPath = "scheduler_config" gcWorkerServiceSafePointID = "gc_worker" minResolvedTS = "min_resolved_ts" - gcServiceGroupPath = "gc_servicegroup" + serviceGroupServicePath = "gc_servicegroup/service_safepoint" + serviceGroupGCPath = "gc_servicegroup/gc_safepoint" ) // AppendToRootPath appends the given key to the rootPath. @@ -108,19 +109,19 @@ func MinResolvedTSPath() string { // gcSafePointPathByServiceGroup returns the path of the gc_worker's safe point // /gc_servicegroup/gc_safepoint/$service_group_id func gcSafePointPathByServiceGroup(serviceGroupID string) string { - return path.Join(gcServiceGroupPath, "gc_safepoint", serviceGroupID) + return path.Join(serviceGroupGCPath, serviceGroupID) } // ServiceSafePointPath returns the path of services' safe point // /gc_servicegroup/service_safepoint/$service_group_id/$service_id func ServiceSafePointPath(serviceGroupID, serviceID string) string { - return path.Join(gcServiceGroupPath, "service_safepoint", serviceGroupID, serviceID) + return path.Join(serviceGroupServicePath, serviceGroupID, serviceID) } func serviceSafePointPrefixPath(serviceGroupID string) string { - return path.Join(gcServiceGroupPath, "service_safepoint", serviceGroupID) + "/" + return path.Join(serviceGroupServicePath, serviceGroupID) + "/" } func gcSafePointPrefixPath() string { - return path.Join(gcServiceGroupPath, "gc_safepoint") + "/" + return serviceGroupGCPath + "/" } From 7429f2eaa5e02706d44803fd22a9b06c6854b208 Mon Sep 17 00:00:00 2001 From: pingyu Date: Mon, 9 May 2022 20:08:27 +0800 Subject: [PATCH 14/18] server, endpoint, client: Add PD APIs for RawKV GC Issue Number: #4865 Signed-off-by: pingyu --- client/client.go | 65 ++++--- client/metrics.go | 78 ++++---- server/gc_service.go | 102 ++++++---- server/server.go | 2 +- server/storage/endpoint/gc_safe_point.go | 157 +--------------- server/storage/endpoint/gc_service_group.go | 154 +++++++++++++++ server/storage/endpoint/key_path.go | 39 ++-- server/storage/storage_gc_test.go | 197 ++++++++++++++++++++ server/storage/storage_test.go | 183 ------------------ tests/client/go.mod | 2 +- tests/client/go.sum | 4 +- 11 files changed, 527 insertions(+), 456 deletions(-) create mode 100644 server/storage/endpoint/gc_service_group.go create mode 100644 server/storage/storage_gc_test.go diff --git a/client/client.go b/client/client.go index 4d6b5902aec..1d6dce01171 100644 --- a/client/client.go +++ b/client/client.go @@ -132,21 +132,21 @@ type Client interface { // Close closes the client. Close() - // GetAllServiceGroups returns a list containing all service groups that has safe point in pd - GetAllServiceGroups(ctx context.Context) ([]string, error) - // GetMinServiceSafePointByServiceGroup return the minimum of all service safe point of the given group - // it also returns the current revision of the pd storage, with in which the min is valid - // if none is found, it will return 0 as min - GetMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) + // GetGCAllServiceGroups returns a list containing all service groups that has safe point in pd + GetGCAllServiceGroups(ctx context.Context) ([]string, error) + // GetGCMinServiceSafePointByServiceGroup return the minimum of all service safe point of the given group + // It also returns the current revision of the pd storage, with in which the min is valid + // If none is found, it will return 0 as min + GetGCMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) // UpdateGCSafePointByServiceGroup update the target safe point, along with revision obtained previously - // if failed, caller should retry form GetMinServiceSafePointByServiceGroup + // If failed, caller should retry from GetGCMinServiceSafePointByServiceGroup UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (succeeded bool, newSafePoint uint64, err error) - // UpdateServiceSafePointByServiceGroup update the given service's safe point - // pass in a negative ttl to remove it - // if failed, caller should retry with higher safe point - UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (succeeded bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) - // GetAllServiceGroupGCSafePoints returns GC safe point for all service groups - GetAllServiceGroupGCSafePoints(ctx context.Context) ([]*gcpb.ServiceGroupSafePoint, error) + // UpdateGCServiceSafePointByServiceGroup update the given service's safe point + // Pass in a negative ttl to remove it + // If failed, caller should retry with higher safe point + UpdateGCServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (succeeded bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) + // GetGCAllServiceGroupSafePoints returns GC safe point for all service groups + GetGCAllServiceGroupSafePoints(ctx context.Context) ([]*gcpb.ServiceGroupSafePoint, error) } // GetStoreOp represents available options when getting stores. @@ -1921,13 +1921,14 @@ func (c *client) gcClient() gcpb.GCClient { } return nil } -func (c *client) GetAllServiceGroups(ctx context.Context) ([]string, error) { + +func (c *client) GetGCAllServiceGroups(ctx context.Context) ([]string, error) { if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetAllServiceGroups", opentracing.ChildOf(span.Context())) + span = opentracing.StartSpan("pdclient.GetGCAllServiceGroups", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() - defer func() { cmdDurationGetAllServiceGroups.Observe(time.Since(start).Seconds()) }() + defer func() { cmdDurationGetGCAllServiceGroups.Observe(time.Since(start).Seconds()) }() ctx, cancel := context.WithTimeout(ctx, c.option.timeout) req := &gcpb.GetAllServiceGroupsRequest{ Header: c.gcHeader(), @@ -1937,25 +1938,26 @@ func (c *client) GetAllServiceGroups(ctx context.Context) ([]string, error) { cancel() if err != nil { - cmdFailedDurationGetAllServiceGroups.Observe(time.Since(start).Seconds()) + cmdFailedDurationGetGCAllServiceGroups.Observe(time.Since(start).Seconds()) c.ScheduleCheckLeader() return nil, errors.WithStack(err) } // have to return a slice of string - returnSlice := make([]string, len(resp.ServiceGroupId)) + returnSlice := make([]string, 0, len(resp.ServiceGroupId)) for _, serviceGroupID := range resp.ServiceGroupId { returnSlice = append(returnSlice, string(serviceGroupID)) } return returnSlice, nil } -func (c *client) GetMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) { + +func (c *client) GetGCMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) { if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetMinServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) + span = opentracing.StartSpan("pdclient.GetGCMinServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() - defer func() { cmdDurationGetMinServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() + defer func() { cmdDurationGetGCMinServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() ctx, cancel := context.WithTimeout(ctx, c.option.timeout) req := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ Header: c.gcHeader(), @@ -1966,13 +1968,14 @@ func (c *client) GetMinServiceSafePointByServiceGroup(ctx context.Context, servi cancel() if err != nil { - cmdFailedDurationGetMinServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) + cmdFailedDurationGetGCMinServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) c.ScheduleCheckLeader() return 0, 0, errors.WithStack(err) } return resp.SafePoint, resp.Revision, nil } + func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGroupID string, safePoint uint64, revision int64) (succeeded bool, newSafePoint uint64, err error) { if span := opentracing.SpanFromContext(ctx); span != nil { span = opentracing.StartSpan("pdclient.UpdateGCSafePointByServiceGroup", opentracing.ChildOf(span.Context())) @@ -1999,13 +2002,14 @@ func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGro // if requested safepoint is the new safepoint, then update succeeded return resp.Succeeded, resp.NewSafePoint, nil } -func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (succeeded bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) { + +func (c *client) UpdateGCServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID, serviceID string, ttl int64, safePoint uint64) (succeeded bool, gcSafePoint, oldSafePoint, newSafePoint uint64, err error) { if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.UpdateServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) + span = opentracing.StartSpan("pdclient.UpdateGCServiceSafePointByServiceGroup", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() - defer func() { cmdDurationUpdateServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() + defer func() { cmdDurationUpdateGCServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) }() ctx, cancel := context.WithTimeout(ctx, c.option.timeout) req := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ Header: c.gcHeader(), @@ -2019,20 +2023,21 @@ func (c *client) UpdateServiceSafePointByServiceGroup(ctx context.Context, servi cancel() if err != nil { - cmdFailedDurationUpdateServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) + cmdFailedDurationUpdateGCServiceSafePointByServiceGroup.Observe(time.Since(start).Seconds()) c.ScheduleCheckLeader() return false, 0, 0, 0, errors.WithStack(err) } return resp.Succeeded, resp.GcSafePoint, resp.OldSafePoint, resp.NewSafePoint, nil } -func (c *client) GetAllServiceGroupGCSafePoints(ctx context.Context) ([]*gcpb.ServiceGroupSafePoint, error) { + +func (c *client) GetGCAllServiceGroupSafePoints(ctx context.Context) ([]*gcpb.ServiceGroupSafePoint, error) { if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetAllServiceGroupGCSafePoints", opentracing.ChildOf(span.Context())) + span = opentracing.StartSpan("pdclient.GetGCAllServiceGroupSafePoints", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() - defer func() { cmdDurationGetAllServiceGroupGCSafePoints.Observe(time.Since(start).Seconds()) }() + defer func() { cmdDurationGetGCAllServiceGroupSafePoints.Observe(time.Since(start).Seconds()) }() ctx, cancel := context.WithTimeout(ctx, c.option.timeout) req := &gcpb.GetAllServiceGroupGCSafePointsRequest{ Header: c.gcHeader(), @@ -2042,7 +2047,7 @@ func (c *client) GetAllServiceGroupGCSafePoints(ctx context.Context) ([]*gcpb.Se cancel() if err != nil { - cmdFailedDurationGetAllServiceGroupGCSafePoints.Observe(time.Since(start).Seconds()) + cmdFailedDurationGetGCAllServiceGroupSafePoints.Observe(time.Since(start).Seconds()) c.ScheduleCheckLeader() return nil, errors.WithStack(err) } diff --git a/client/metrics.go b/client/metrics.go index 90ed6f0402e..1e9eb5fe80b 100644 --- a/client/metrics.go +++ b/client/metrics.go @@ -82,45 +82,47 @@ var ( var ( // WithLabelValues is a heavy operation, define variable to avoid call it every time. - cmdDurationWait = cmdDuration.WithLabelValues("wait") - cmdDurationTSO = cmdDuration.WithLabelValues("tso") - cmdDurationTSOAsyncWait = cmdDuration.WithLabelValues("tso_async_wait") - cmdDurationGetRegion = cmdDuration.WithLabelValues("get_region") - cmdDurationGetAllMembers = cmdDuration.WithLabelValues("get_member_info") - cmdDurationGetPrevRegion = cmdDuration.WithLabelValues("get_prev_region") - cmdDurationGetRegionByID = cmdDuration.WithLabelValues("get_region_byid") - cmdDurationScanRegions = cmdDuration.WithLabelValues("scan_regions") - cmdDurationGetStore = cmdDuration.WithLabelValues("get_store") - cmdDurationGetAllStores = cmdDuration.WithLabelValues("get_all_stores") - cmdDurationUpdateGCSafePoint = cmdDuration.WithLabelValues("update_gc_safe_point") - cmdDurationUpdateServiceGCSafePoint = cmdDuration.WithLabelValues("update_service_gc_safe_point") - cmdDurationScatterRegion = cmdDuration.WithLabelValues("scatter_region") - cmdDurationScatterRegions = cmdDuration.WithLabelValues("scatter_regions") - cmdDurationGetOperator = cmdDuration.WithLabelValues("get_operator") - cmdDurationSplitRegions = cmdDuration.WithLabelValues("split_regions") - cmdDurationSplitAndScatterRegions = cmdDuration.WithLabelValues("split_and_scatter_regions") - cmdDurationGetAllServiceGroups = cmdDuration.WithLabelValues("get_all_service_groups") - cmdDurationGetMinServiceSafePointByServiceGroup = cmdDuration.WithLabelValues("get_min_service_safe_point_by_service_group") - cmdDurationUpdateGCSafePointByServiceGroup = cmdDuration.WithLabelValues("update_gc_safe_point_by_service_group") - cmdDurationUpdateServiceSafePointByServiceGroup = cmdDuration.WithLabelValues("update_service_safe_point_by_service_group") - cmdDurationGetAllServiceGroupGCSafePoints = cmdDuration.WithLabelValues("get_all_service_group_gc_safe_points") + cmdDurationWait = cmdDuration.WithLabelValues("wait") + cmdDurationTSO = cmdDuration.WithLabelValues("tso") + cmdDurationTSOAsyncWait = cmdDuration.WithLabelValues("tso_async_wait") + cmdDurationGetRegion = cmdDuration.WithLabelValues("get_region") + cmdDurationGetAllMembers = cmdDuration.WithLabelValues("get_member_info") + cmdDurationGetPrevRegion = cmdDuration.WithLabelValues("get_prev_region") + cmdDurationGetRegionByID = cmdDuration.WithLabelValues("get_region_byid") + cmdDurationScanRegions = cmdDuration.WithLabelValues("scan_regions") + cmdDurationGetStore = cmdDuration.WithLabelValues("get_store") + cmdDurationGetAllStores = cmdDuration.WithLabelValues("get_all_stores") + cmdDurationUpdateGCSafePoint = cmdDuration.WithLabelValues("update_gc_safe_point") + cmdDurationUpdateServiceGCSafePoint = cmdDuration.WithLabelValues("update_service_gc_safe_point") + cmdDurationScatterRegion = cmdDuration.WithLabelValues("scatter_region") + cmdDurationScatterRegions = cmdDuration.WithLabelValues("scatter_regions") + cmdDurationGetOperator = cmdDuration.WithLabelValues("get_operator") + cmdDurationSplitRegions = cmdDuration.WithLabelValues("split_regions") + cmdDurationSplitAndScatterRegions = cmdDuration.WithLabelValues("split_and_scatter_regions") - cmdFailDurationGetRegion = cmdFailedDuration.WithLabelValues("get_region") - cmdFailDurationTSO = cmdFailedDuration.WithLabelValues("tso") - cmdFailDurationGetAllMembers = cmdFailedDuration.WithLabelValues("get_member_info") - cmdFailDurationGetPrevRegion = cmdFailedDuration.WithLabelValues("get_prev_region") - cmdFailedDurationGetRegionByID = cmdFailedDuration.WithLabelValues("get_region_byid") - cmdFailedDurationScanRegions = cmdFailedDuration.WithLabelValues("scan_regions") - cmdFailedDurationGetStore = cmdFailedDuration.WithLabelValues("get_store") - cmdFailedDurationGetAllStores = cmdFailedDuration.WithLabelValues("get_all_stores") - cmdFailedDurationUpdateGCSafePoint = cmdFailedDuration.WithLabelValues("update_gc_safe_point") - cmdFailedDurationUpdateServiceGCSafePoint = cmdFailedDuration.WithLabelValues("update_service_gc_safe_point") - cmdFailedDurationGetAllServiceGroups = cmdFailedDuration.WithLabelValues("get_all_service_groups") - cmdFailedDurationGetMinServiceSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("get_min_service_safe_point_by_service_group") - cmdFailedDurationUpdateGCSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("update_gc_safe_point_by_service_group") - cmdFailedDurationUpdateServiceSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("update_service_safe_point_by_service_group") - cmdFailedDurationGetAllServiceGroupGCSafePoints = cmdFailedDuration.WithLabelValues("get_all_service_group_gc_safe_points") - requestDurationTSO = requestDuration.WithLabelValues("tso") + cmdDurationGetGCAllServiceGroups = cmdDuration.WithLabelValues("get_gc_all_service_groups") + cmdDurationGetGCMinServiceSafePointByServiceGroup = cmdDuration.WithLabelValues("get_gc_min_service_safe_point_by_service_group") + cmdDurationUpdateGCSafePointByServiceGroup = cmdDuration.WithLabelValues("update_gc_safe_point_by_service_group") + cmdDurationUpdateGCServiceSafePointByServiceGroup = cmdDuration.WithLabelValues("update_gc_service_safe_point_by_service_group") + cmdDurationGetGCAllServiceGroupSafePoints = cmdDuration.WithLabelValues("get_gc_all_service_group_safe_points") + + cmdFailDurationGetRegion = cmdFailedDuration.WithLabelValues("get_region") + cmdFailDurationTSO = cmdFailedDuration.WithLabelValues("tso") + cmdFailDurationGetAllMembers = cmdFailedDuration.WithLabelValues("get_member_info") + cmdFailDurationGetPrevRegion = cmdFailedDuration.WithLabelValues("get_prev_region") + cmdFailedDurationGetRegionByID = cmdFailedDuration.WithLabelValues("get_region_byid") + cmdFailedDurationScanRegions = cmdFailedDuration.WithLabelValues("scan_regions") + cmdFailedDurationGetStore = cmdFailedDuration.WithLabelValues("get_store") + cmdFailedDurationGetAllStores = cmdFailedDuration.WithLabelValues("get_all_stores") + cmdFailedDurationUpdateGCSafePoint = cmdFailedDuration.WithLabelValues("update_gc_safe_point") + cmdFailedDurationUpdateServiceGCSafePoint = cmdFailedDuration.WithLabelValues("update_service_gc_safe_point") + requestDurationTSO = requestDuration.WithLabelValues("tso") + + cmdFailedDurationGetGCAllServiceGroups = cmdFailedDuration.WithLabelValues("get_gc_all_service_groups") + cmdFailedDurationGetGCMinServiceSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("get_gc_min_service_safe_point_by_service_group") + cmdFailedDurationUpdateGCSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("update_gc_safe_point_by_service_group") + cmdFailedDurationUpdateGCServiceSafePointByServiceGroup = cmdFailedDuration.WithLabelValues("update_gc_service_safe_point_by_service_group") + cmdFailedDurationGetGCAllServiceGroupSafePoints = cmdFailedDuration.WithLabelValues("get_gc_all_service_group_safe_points") ) func init() { diff --git a/server/gc_service.go b/server/gc_service.go index 5a1b03ea732..f60f924b712 100644 --- a/server/gc_service.go +++ b/server/gc_service.go @@ -1,4 +1,4 @@ -// Copyright 2017 TiKV Project Authors. +// Copyright 2022 TiKV Project Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ import ( "go.uber.org/zap" ) +// GcServer wraps Server to provide garbage collection service. type GcServer struct { *Server } @@ -63,9 +64,8 @@ func (s *GcServer) safePointRollbackHeader(requestSafePoint, requiredSafePoint u }) } -// GetAllServiceGroups return all service group ids +// GetAllServiceGroups return all service group IDs. func (s *GcServer) GetAllServiceGroups(ctx context.Context, request *gcpb.GetAllServiceGroupsRequest) (*gcpb.GetAllServiceGroupsResponse, error) { - rc := s.GetRaftCluster() if rc == nil { return &gcpb.GetAllServiceGroupsResponse{Header: s.notBootstrappedHeader()}, nil @@ -77,14 +77,37 @@ func (s *GcServer) GetAllServiceGroups(ctx context.Context, request *gcpb.GetAll return nil, err } + serviceGroupIDs := make([][]byte, 0, len(serviceGroupList)) + for _, sg := range serviceGroupList { + serviceGroupIDs = append(serviceGroupIDs, []byte(sg)) + } + return &gcpb.GetAllServiceGroupsResponse{ Header: s.header(), - ServiceGroupId: serviceGroupList, + ServiceGroupId: serviceGroupIDs, }, nil } -// GetMinServiceSafePointByServiceGroup returns given service group's min service safe point +// getServiceRevisionByServiceGroup return etcd ModRevision of given service group. +// It's used to detect new service safe point between `GetMinServiceSafePointByServiceGroup` & `UpdateGCSafePointByServiceGroup`. +// Return -1 if the service group is not existed. +func (s *GcServer) getServiceRevisionByServiceGroup(ctx context.Context, serviceGroupID string) (int64, error) { + servicePath := endpoint.GCServiceSafePointPrefixPathByServiceGroup(serviceGroupID) + rsp, err := s.client.Get(ctx, servicePath) + if err != nil { + return -1, err + } + if rsp == nil { + return -1, nil + } + return rsp.Kvs[0].ModRevision, nil +} + +// GetMinServiceSafePointByServiceGroup returns given service group's min service safe point. func (s *GcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, request *gcpb.GetMinServiceSafePointByServiceGroupRequest) (*gcpb.GetMinServiceSafePointByServiceGroupResponse, error) { + // Lock to ensure that there is no other change between `min` and `currentRevison`. + s.serviceGroupSafePointLock.Lock() + defer s.serviceGroupSafePointLock.Unlock() rc := s.GetRaftCluster() if rc == nil { @@ -106,9 +129,12 @@ func (s *GcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, req if min != nil { returnSafePoint = min.SafePoint } - // perform a get operation on a non-existing key to obtain current etcd revision number from response header - rsp, _ := s.client.Get(ctx, "NA") - currentRevision := rsp.Header.GetRevision() + + currentRevision, err := s.getServiceRevisionByServiceGroup(ctx, serviceGroupID) + if err != nil { + return nil, err + } + return &gcpb.GetMinServiceSafePointByServiceGroupResponse{ Header: s.header(), SafePoint: returnSafePoint, @@ -116,7 +142,7 @@ func (s *GcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, req }, nil } -// UpdateGCSafePointByServiceGroup used by gc_worker to update their gc safe points +// UpdateGCSafePointByServiceGroup used by gc_worker to update their gc safe points. func (s *GcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request *gcpb.UpdateGCSafePointByServiceGroupRequest) (*gcpb.UpdateGCSafePointByServiceGroupResponse, error) { s.serviceGroupSafePointLock.Lock() defer s.serviceGroupSafePointLock.Unlock() @@ -127,10 +153,13 @@ func (s *GcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request } var storage endpoint.GCSafePointStorage = s.storage + serviceGroupID := string(request.ServiceGroupId) - // check if revision changed since last min calculation - rsp, _ := s.client.Get(ctx, "NA") - currentRevision := rsp.Header.GetRevision() + // check if revision changed since last min calculation. + currentRevision, err := s.getServiceRevisionByServiceGroup(ctx, serviceGroupID) + if err != nil { + return nil, err + } requestRevision := request.GetRevision() if currentRevision != requestRevision { return &gcpb.UpdateGCSafePointByServiceGroupResponse{ @@ -139,17 +168,16 @@ func (s *GcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request NewSafePoint: 0, }, nil } - serviceGroupID := string(request.ServiceGroupId) - newSafePoint := &endpoint.GCSafePoint{ + + newSafePoint := &endpoint.ServiceGroupGCSafePoint{ ServiceGroupID: serviceGroupID, SafePoint: request.SafePoint, } - - prev, err := storage.LoadGCWorkerSafePoint(serviceGroupID) + prev, err := storage.LoadGCSafePointByServiceGroup(serviceGroupID) if err != nil { return nil, err } - // if no previous safepoint, treat it as 0 + // if no previous safepoint, treat it as 0. var oldSafePoint uint64 = 0 if prev != nil { oldSafePoint = prev.SafePoint @@ -157,7 +185,7 @@ func (s *GcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request response := &gcpb.UpdateGCSafePointByServiceGroupResponse{} - // fail to store due to safe point rollback + // fail to store due to safe point rollback. if newSafePoint.SafePoint < oldSafePoint { log.Warn("trying to update gc_worker safe point", zap.String("service-group-id", serviceGroupID), @@ -169,8 +197,8 @@ func (s *GcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request return response, nil } - // save the safe point to storage - if err := storage.SaveGCWorkerSafePoint(newSafePoint); err != nil { + // save the safe point to storage. + if err := storage.SaveGCSafePointByServiceGroup(newSafePoint); err != nil { return nil, err } response.Header = s.header() @@ -178,11 +206,12 @@ func (s *GcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request response.NewSafePoint = newSafePoint.SafePoint log.Info("updated gc_worker safe point", zap.String("service-group-id", serviceGroupID), - zap.Uint64("safe-point", newSafePoint.SafePoint)) + zap.Uint64("safe-point", newSafePoint.SafePoint), + zap.Uint64("old-safe-point", oldSafePoint)) return response, nil } -// UpdateServiceSafePointByServiceGroup for services like CDC/BR/Lightning to update gc safe points in PD +// UpdateServiceSafePointByServiceGroup for services like CDC/BR/Lightning to update gc safe points in PD. func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, request *gcpb.UpdateServiceSafePointByServiceGroupRequest) (*gcpb.UpdateServiceSafePointByServiceGroupResponse, error) { s.serviceGroupSafePointLock.Lock() defer s.serviceGroupSafePointLock.Unlock() @@ -195,7 +224,7 @@ func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, req var storage endpoint.GCSafePointStorage = s.storage serviceGroupID := string(request.ServiceGroupId) serviceID := string(request.ServiceId) - // a less than 0 ttl means to remove the safe point, immediately return after the deletion request + // a less than 0 ttl means to remove the safe point, immediately return after the deletion request. if request.TTL <= 0 { if err := storage.RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID); err != nil { return nil, err @@ -212,19 +241,19 @@ func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, req } now, _ := tsoutil.ParseTimestamp(nowTSO) - sspOld, err := storage.LoadServiceSafePoint(serviceGroupID, serviceID) + sspOld, err := storage.LoadServiceSafePointByServiceGroup(serviceGroupID, serviceID) if err != nil { return nil, err } - gcsp, err := storage.LoadGCWorkerSafePoint(serviceGroupID) + gcsp, err := storage.LoadGCSafePointByServiceGroup(serviceGroupID) if err != nil { return nil, err } response := &gcpb.UpdateServiceSafePointByServiceGroupResponse{} - // safePointLowerBound is the minimum request.SafePoint for update request to succeed - // it is oldServiceSafePoint if oldServiceSafePoint exists, else gcSafePoint if it exists - // otherwise it's set to 0, indicate all safePoint accepted + // safePointLowerBound is the minimum request.SafePoint for update request to succeed. + // It is oldServiceSafePoint if oldServiceSafePoint exists, else gcSafePoint if it exists. + // Otherwise it's set to 0, indicate all safePoint accepted. var safePointLowerBound uint64 = 0 if gcsp != nil { safePointLowerBound = gcsp.SafePoint @@ -235,7 +264,7 @@ func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, req response.OldSafePoint = sspOld.SafePoint } - // request.SafePoint smaller than safePointLowerBound, we have a safePointRollBack + // request.SafePoint smaller than safePointLowerBound, we have a safePointRollBack. if request.SafePoint < safePointLowerBound { response.Header = s.safePointRollbackHeader(request.SafePoint, safePointLowerBound) response.Succeeded = false @@ -249,7 +278,7 @@ func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, req ExpiredAt: now.Unix() + request.TTL, SafePoint: request.SafePoint, } - // handles overflow + // Handles overflow. if math.MaxInt64-now.Unix() <= request.TTL { ssp.ExpiredAt = math.MaxInt64 } @@ -264,25 +293,24 @@ func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, req return response, nil } -// GetAllServiceGroupGCSafePoints returns all service group's gc safe point +// GetAllServiceGroupGCSafePoints returns all service group's gc safe point. func (s *GcServer) GetAllServiceGroupGCSafePoints(ctx context.Context, request *gcpb.GetAllServiceGroupGCSafePointsRequest) (*gcpb.GetAllServiceGroupGCSafePointsResponse, error) { - rc := s.GetRaftCluster() if rc == nil { return &gcpb.GetAllServiceGroupGCSafePointsResponse{Header: s.notBootstrappedHeader()}, nil } var storage endpoint.GCSafePointStorage = s.storage - serviceIDs, gcSafePoints, err := storage.LoadAllServiceGroupGCSafePoints() + gcSafePoints, err := storage.LoadAllServiceGroupGCSafePoints() if err != nil { return nil, err } - safePoints := make([]*gcpb.ServiceGroupSafePoint, 0, 2) - for i := range serviceIDs { + safePoints := make([]*gcpb.ServiceGroupSafePoint, 0, len(gcSafePoints)) + for _, sp := range gcSafePoints { safePoints = append(safePoints, &gcpb.ServiceGroupSafePoint{ - ServiceGroupId: serviceIDs[i], - SafePoint: gcSafePoints[i], + ServiceGroupId: []byte(sp.ServiceGroupID), + SafePoint: sp.SafePoint, }) } return &gcpb.GetAllServiceGroupGCSafePointsResponse{ diff --git a/server/server.go b/server/server.go index 5f0beb20aff..27ccd8b2d7b 100644 --- a/server/server.go +++ b/server/server.go @@ -274,8 +274,8 @@ func CreateServer(ctx context.Context, cfg *config.Config, serviceBuilders ...Ha } etcdCfg.ServiceRegister = func(gs *grpc.Server) { pdpb.RegisterPDServer(gs, &GrpcServer{Server: s}) - diagnosticspb.RegisterDiagnosticsServer(gs, s) gcpb.RegisterGCServer(gs, &GcServer{Server: s}) + diagnosticspb.RegisterDiagnosticsServer(gs, s) } s.etcdCfg = etcdCfg if EnableZap { diff --git a/server/storage/endpoint/gc_safe_point.go b/server/storage/endpoint/gc_safe_point.go index 27c463c9b03..ffbfe9cdcd5 100644 --- a/server/storage/endpoint/gc_safe_point.go +++ b/server/storage/endpoint/gc_safe_point.go @@ -17,7 +17,6 @@ package endpoint import ( "encoding/json" "math" - "path" "strconv" "time" @@ -35,8 +34,8 @@ type ServiceSafePoint struct { SafePoint uint64 `json:"safe_point"` } -// GCSafePoint is gcWorker's safepoint for specific service group -type GCSafePoint struct { +// ServiceGroupGCSafePoint is gcWorker's safepoint for specific service group +type ServiceGroupGCSafePoint struct { ServiceGroupID string `json:"service_group_id"` SafePoint uint64 `json:"safe_point"` } @@ -50,14 +49,16 @@ type GCSafePointStorage interface { SaveServiceGCSafePoint(ssp *ServiceSafePoint) error RemoveServiceGCSafePoint(serviceID string) error - LoadAllServiceGroups() ([][]byte, error) + LoadAllServiceGroups() ([]string, error) + // Service safe point interfaces. + SaveServiceSafePointByServiceGroup(serviceGroupID string, ssp *ServiceSafePoint) error + LoadServiceSafePointByServiceGroup(serviceGroupID, serviceID string) (*ServiceSafePoint, error) LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) - LoadGCWorkerSafePoint(serviceGroupID string) (*GCSafePoint, error) - SaveGCWorkerSafePoint(gcSafePoint *GCSafePoint) error RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID string) error - LoadServiceSafePoint(serviceGroupID, serviceID string) (*ServiceSafePoint, error) - SaveServiceSafePointByServiceGroup(serviceGroupID string, ssp *ServiceSafePoint) error - LoadAllServiceGroupGCSafePoints() ([][]byte, []uint64, error) + // GC safe point interfaces. + SaveGCSafePointByServiceGroup(gcSafePoint *ServiceGroupGCSafePoint) error + LoadGCSafePointByServiceGroup(serviceGroupID string) (*ServiceGroupGCSafePoint, error) + LoadAllServiceGroupGCSafePoints() ([]*ServiceGroupGCSafePoint, error) } var _ GCSafePointStorage = (*StorageEndpoint)(nil) @@ -202,141 +203,3 @@ func (se *StorageEndpoint) RemoveServiceGCSafePoint(serviceID string) error { key := gcSafePointServicePath(serviceID) return se.Remove(key) } - -// LoadMinServiceSafePointByServiceGroup returns the minimum safepoint for the given service group -// note that gc worker safe point are store separately -// If no service safe point exist for the given service group or the only service safe point just expired, return nil -func (se *StorageEndpoint) LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) { - prefix := serviceSafePointPrefixPath(serviceGroupID) - prefixEnd := clientv3.GetPrefixRangeEnd(prefix) - keys, values, err := se.LoadRange(prefix, prefixEnd, 0) - if err != nil { - return nil, err - } - - if len(keys) == 0 { - // the given service group does not have a service safe point yet - return nil, nil - } - - min := &ServiceSafePoint{SafePoint: math.MaxUint64} - for i, key := range keys { - ssp := &ServiceSafePoint{} - if err := json.Unmarshal([]byte(values[i]), ssp); err != nil { - return nil, err - } - - // remove expired safe points - if ssp.ExpiredAt < now.Unix() { - se.Remove(key) - continue - } - - if ssp.SafePoint < min.SafePoint { - min = ssp - } - } - - if min.SafePoint == math.MaxUint64 { - // the only service safe point just expired - return nil, nil - } - - // successfully found a valid min safe point - return min, nil -} - -// LoadAllServiceGroups returns a list of all service group IDs -func (se *StorageEndpoint) LoadAllServiceGroups() ([][]byte, error) { - prefix := gcSafePointPrefixPath() - prefixEnd := clientv3.GetPrefixRangeEnd(prefix) - keys, _, err := se.LoadRange(prefix, prefixEnd, 0) - if err != nil { - return nil, err - } - - serviceGroupIDs := make([][]byte, 0, 2) - for _, key := range keys { - _, serviceGroupID := path.Split(key) - serviceGroupIDs = append(serviceGroupIDs, []byte(serviceGroupID)) - } - return serviceGroupIDs, nil -} - -// LoadGCWorkerSafePoint reads GCSafePoint for the given service group -// return nil if no safepoint not exist -func (se *StorageEndpoint) LoadGCWorkerSafePoint(serviceGroupID string) (*GCSafePoint, error) { - value, err := se.Load(gcSafePointPathByServiceGroup(serviceGroupID)) - if err != nil || value == "" { - return nil, err - } - gcSafePoint := &GCSafePoint{} - if err := json.Unmarshal([]byte(value), gcSafePoint); err != nil { - return nil, err - } - return gcSafePoint, nil -} - -// SaveGCWorkerSafePoint saves GCSafePoint under given service group -func (se *StorageEndpoint) SaveGCWorkerSafePoint(gcSafePoint *GCSafePoint) error { - safePoint, err := json.Marshal(gcSafePoint) - if err != nil { - return err - } - return se.Save(gcSafePointPathByServiceGroup(gcSafePoint.ServiceGroupID), string(safePoint)) -} - -// RemoveServiceSafePointByServiceGroup removes a service safe point -func (se *StorageEndpoint) RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID string) error { - key := ServiceSafePointPath(serviceGroupID, serviceID) - return se.Remove(key) -} - -// LoadServiceSafePoint reads ServiceSafePoint for the given service group and service name -// return nil if no safepoint not exist -func (se *StorageEndpoint) LoadServiceSafePoint(serviceGroupID, serviceID string) (*ServiceSafePoint, error) { - value, err := se.Load(ServiceSafePointPath(serviceGroupID, serviceID)) - if err != nil || value == "" { - return nil, err - } - serviceSafePoint := &ServiceSafePoint{} - if err := json.Unmarshal([]byte(value), serviceSafePoint); err != nil { - return nil, err - } - return serviceSafePoint, nil -} - -// SaveServiceSafePointByServiceGroup saves service safe point under given service group -func (se *StorageEndpoint) SaveServiceSafePointByServiceGroup(serviceGroupID string, ssp *ServiceSafePoint) error { - if ssp.ServiceID == "" { - return errors.New("service id of service safepoint cannot be empty") - } - key := ServiceSafePointPath(serviceGroupID, ssp.ServiceID) - value, err := json.Marshal(ssp) - if err != nil { - return err - } - - return se.Save(key, string(value)) -} - -// LoadAllServiceGroupGCSafePoints returns two slices of ServiceGroupIDs and their corresponding safe points -func (se *StorageEndpoint) LoadAllServiceGroupGCSafePoints() ([][]byte, []uint64, error) { - prefix := gcSafePointPrefixPath() - prefixEnd := clientv3.GetPrefixRangeEnd(prefix) - keys, values, err := se.LoadRange(prefix, prefixEnd, 0) - if err != nil { - return nil, nil, err - } - serviceIDs := make([][]byte, 0, 2) // there are probably only two service groups - safePoints := make([]uint64, 0, 2) - for i := range keys { - gcSafePoint := &GCSafePoint{} - if err := json.Unmarshal([]byte(values[i]), gcSafePoint); err != nil { - return nil, nil, err - } - serviceIDs = append(serviceIDs, []byte(gcSafePoint.ServiceGroupID)) - safePoints = append(safePoints, gcSafePoint.SafePoint) - } - return serviceIDs, safePoints, nil -} diff --git a/server/storage/endpoint/gc_service_group.go b/server/storage/endpoint/gc_service_group.go new file mode 100644 index 00000000000..57547c16224 --- /dev/null +++ b/server/storage/endpoint/gc_service_group.go @@ -0,0 +1,154 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package endpoint + +import ( + "encoding/json" + "math" + "time" + + "github.com/pingcap/errors" + "go.etcd.io/etcd/clientv3" +) + +// Predefine service groups. More service groups would come from "Multi-tenant". +const ( + // ServiceGroupRawKVDefault is service group ID for RawKV. + ServiceGroupRawKVDefault = "default_rawkv" +) + +// LoadAllServiceGroups returns a list of all service group IDs. +// We have only predefine service groups by now. +// More service groups would come from "Multi-tenant". +func (se *StorageEndpoint) LoadAllServiceGroups() ([]string, error) { + serviceGroupIDs := []string{ + ServiceGroupRawKVDefault, + } + + return serviceGroupIDs, nil +} + +// SaveServiceSafePointByServiceGroup saves service safe point under given service group. +func (se *StorageEndpoint) SaveServiceSafePointByServiceGroup(serviceGroupID string, ssp *ServiceSafePoint) error { + if ssp.ServiceID == "" { + return errors.New("service id of service safepoint cannot be empty") + } + key := GCServiceSafePointPathByServiceGroup(serviceGroupID, ssp.ServiceID) + value, err := json.Marshal(ssp) + if err != nil { + return err + } + return se.Save(key, string(value)) +} + +// LoadServiceSafePointByServiceGroup reads ServiceSafePoint for the given service group and service name. +// Return nil if no safepoint not exist. +func (se *StorageEndpoint) LoadServiceSafePointByServiceGroup(serviceGroupID, serviceID string) (*ServiceSafePoint, error) { + value, err := se.Load(GCServiceSafePointPathByServiceGroup(serviceGroupID, serviceID)) + if err != nil || value == "" { + return nil, err + } + ssp := &ServiceSafePoint{} + if err := json.Unmarshal([]byte(value), ssp); err != nil { + return nil, err + } + return ssp, nil +} + +// LoadMinServiceSafePointByServiceGroup returns the minimum safepoint for the given service group. +// Note that gc worker safe point are store separately. +// If no service safe point exist for the given service group or all the service safe points just expired, return nil. +func (se *StorageEndpoint) LoadMinServiceSafePointByServiceGroup(serviceGroupID string, now time.Time) (*ServiceSafePoint, error) { + prefix := GCServiceSafePointPrefixPathByServiceGroup(serviceGroupID) + prefixEnd := clientv3.GetPrefixRangeEnd(prefix) + keys, values, err := se.LoadRange(prefix, prefixEnd, 0) + if err != nil { + return nil, err + } + + min := &ServiceSafePoint{SafePoint: math.MaxUint64} + for i, key := range keys { + ssp := &ServiceSafePoint{} + if err := json.Unmarshal([]byte(values[i]), ssp); err != nil { + return nil, err + } + + // remove expired safe points. + if ssp.ExpiredAt < now.Unix() { + se.Remove(key) + continue + } + + if ssp.SafePoint < min.SafePoint { + min = ssp + } + } + + if min.SafePoint == math.MaxUint64 { + // no service safe point or all of them are expired. + return nil, nil + } + + // successfully found a valid min safe point. + return min, nil +} + +// RemoveServiceSafePointByServiceGroup removes a service safe point. +func (se *StorageEndpoint) RemoveServiceSafePointByServiceGroup(serviceGroupID, serviceID string) error { + key := GCServiceSafePointPathByServiceGroup(serviceGroupID, serviceID) + return se.Remove(key) +} + +// SaveGCSafePointByServiceGroup saves GCSafePoint under given service group. +func (se *StorageEndpoint) SaveGCSafePointByServiceGroup(gcSafePoint *ServiceGroupGCSafePoint) error { + safePoint, err := json.Marshal(gcSafePoint) + if err != nil { + return err + } + return se.Save(gcSafePointPathByServiceGroup(gcSafePoint.ServiceGroupID), string(safePoint)) +} + +// LoadGCSafePointByServiceGroup reads GCSafePoint for the given service group. +// return nil if no safepoint not exist. +func (se *StorageEndpoint) LoadGCSafePointByServiceGroup(serviceGroupID string) (*ServiceGroupGCSafePoint, error) { + value, err := se.Load(gcSafePointPathByServiceGroup(serviceGroupID)) + if err != nil || value == "" { + return nil, err + } + gcSafePoint := &ServiceGroupGCSafePoint{} + if err := json.Unmarshal([]byte(value), gcSafePoint); err != nil { + return nil, err + } + return gcSafePoint, nil +} + +// LoadAllServiceGroupGCSafePoints returns two slices of ServiceGroupIDs and their corresponding safe points. +func (se *StorageEndpoint) LoadAllServiceGroupGCSafePoints() ([]*ServiceGroupGCSafePoint, error) { + prefix := gcServiceGroupGCSafePointPrefixPath() + prefixEnd := clientv3.GetPrefixRangeEnd(prefix) + _, values, err := se.LoadRange(prefix, prefixEnd, 0) + if err != nil { + return nil, err + } + safePoints := make([]*ServiceGroupGCSafePoint, 0, len(values)) + for _, value := range values { + gcSafePoint := &ServiceGroupGCSafePoint{} + if err := json.Unmarshal([]byte(value), gcSafePoint); err != nil { + return nil, err + } + safePoints = append(safePoints, gcSafePoint) + } + return safePoints, nil +} diff --git a/server/storage/endpoint/key_path.go b/server/storage/endpoint/key_path.go index 26838437a03..d0b5edd144a 100644 --- a/server/storage/endpoint/key_path.go +++ b/server/storage/endpoint/key_path.go @@ -31,8 +31,9 @@ const ( customScheduleConfigPath = "scheduler_config" gcWorkerServiceSafePointID = "gc_worker" minResolvedTS = "min_resolved_ts" - serviceGroupServicePath = "gc_servicegroup/service_safepoint" - serviceGroupGCPath = "gc_servicegroup/gc_safepoint" + + gcServiceGroupGCSafePointPath = "gc_servicegroup/gc_safepoint" + gcServiceGroupServiceSafePointPath = "gc_servicegroup/service_safepoint" ) // AppendToRootPath appends the given key to the rootPath. @@ -101,27 +102,31 @@ func gcSafePointServicePath(serviceID string) string { return path.Join(gcSafePointPath(), "service", serviceID) } -// MinResolvedTSPath returns the min resolved ts path -func MinResolvedTSPath() string { - return path.Join(clusterPath, minResolvedTS) +// gcSafePointPathByServiceGroup returns the path of the gc safe point of speicified service group. +// Path: /gc_servicegroup/gc_safepoint/$service_group_id +func gcSafePointPathByServiceGroup(serviceGroupID string) string { + return path.Join(gcServiceGroupGCSafePointPath, serviceGroupID) } -// gcSafePointPathByServiceGroup returns the path of the gc_worker's safe point -// /gc_servicegroup/gc_safepoint/$service_group_id -func gcSafePointPathByServiceGroup(serviceGroupID string) string { - return path.Join(serviceGroupGCPath, serviceGroupID) +// GCServiceSafePointPrefixPathByServiceGroup returns the prefix path of the service safe point of speicified service group. +// Path: /gc_servicegroup/service_safepoint/$service_group_id +func GCServiceSafePointPrefixPathByServiceGroup(serviceGroupID string) string { + return path.Join(gcServiceGroupServiceSafePointPath, serviceGroupID) + "/" } -// ServiceSafePointPath returns the path of services' safe point -// /gc_servicegroup/service_safepoint/$service_group_id/$service_id -func ServiceSafePointPath(serviceGroupID, serviceID string) string { - return path.Join(serviceGroupServicePath, serviceGroupID, serviceID) +// GCServiceSafePointPathByServiceGroup returns the path of a service's safe point of speicified service group. +// Path: /gc_servicegroup/service_safepoint/$service_group_id/$service_id +func GCServiceSafePointPathByServiceGroup(serviceGroupID, serviceID string) string { + return path.Join(GCServiceSafePointPrefixPathByServiceGroup(serviceGroupID), serviceID) } -func serviceSafePointPrefixPath(serviceGroupID string) string { - return path.Join(serviceGroupServicePath, serviceGroupID) + "/" +// gcServiceGroupGCSafePointPrefixPath returns the prefix path of gc safe point for all service groups. +// Path: /gc_servicegroup/gc_safepoint/ +func gcServiceGroupGCSafePointPrefixPath() string { + return gcServiceGroupGCSafePointPath + "/" } -func gcSafePointPrefixPath() string { - return serviceGroupGCPath + "/" +// MinResolvedTSPath returns the min resolved ts path +func MinResolvedTSPath() string { + return path.Join(clusterPath, minResolvedTS) } diff --git a/server/storage/storage_gc_test.go b/server/storage/storage_gc_test.go new file mode 100644 index 00000000000..b624fc5d508 --- /dev/null +++ b/server/storage/storage_gc_test.go @@ -0,0 +1,197 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "encoding/json" + "math" + "time" + + . "github.com/pingcap/check" + "github.com/tikv/pd/server/storage/endpoint" +) + +var _ = Suite(&testStorageFopGCSuite{}) + +type testStorageFopGCSuite struct { +} + +func testGCSafePoints() []*endpoint.ServiceGroupGCSafePoint { + return []*endpoint.ServiceGroupGCSafePoint{ + { + ServiceGroupID: "testServiceGroup1", + SafePoint: 0, + }, + { + ServiceGroupID: "testServiceGroup2", + SafePoint: 1, + }, + { + ServiceGroupID: "testServiceGroup3", + SafePoint: 4396, + }, + { + ServiceGroupID: "testServiceGroup4", + SafePoint: 23333333333, + }, + { + ServiceGroupID: "testServiceGroup5", + SafePoint: math.MaxUint64, + }, + } +} + +func (s *testStorageFopGCSuite) TestLoadGCWorkerSafePoint(c *C) { + storage := NewStorageWithMemoryBackend() + testData := testGCSafePoints() + r, e := storage.LoadGCSafePointByServiceGroup("testServiceGroup") + c.Assert(r, IsNil) + c.Assert(e, IsNil) + for _, safePoint := range testData { + err := storage.SaveGCSafePointByServiceGroup(safePoint) + c.Assert(err, IsNil) + loaded, err := storage.LoadGCSafePointByServiceGroup(safePoint.ServiceGroupID) + c.Assert(err, IsNil) + c.Assert(safePoint, DeepEquals, loaded) + } +} + +func (s *testStorageFopGCSuite) TestLoadAllServiceGroupGCSafePoints(c *C) { + storage := NewStorageWithMemoryBackend() + testData := testGCSafePoints() + for _, safePoint := range testData { + err := storage.SaveGCSafePointByServiceGroup(safePoint) + c.Assert(err, IsNil) + } + safePoints, err := storage.LoadAllServiceGroupGCSafePoints() + c.Assert(err, IsNil) + for i, safePoint := range testData { + c.Assert(safePoints[i], DeepEquals, safePoint) + } +} + +func (s *testStorageFopGCSuite) TestLoadAllServiceGroup(c *C) { + storage := NewStorageWithMemoryBackend() + serviceGroups, err := storage.LoadAllServiceGroups() + c.Assert(err, IsNil) + c.Assert(serviceGroups, DeepEquals, []string{"default_rawkv"}) +} + +func (s *testStorageFopGCSuite) TestLoadServiceSafePointByServiceGroup(c *C) { + storage := NewStorageWithMemoryBackend() + expireAt := time.Now().Add(100 * time.Second).Unix() + serviceSafePoints := []*endpoint.ServiceSafePoint{ + {ServiceID: "1", ExpiredAt: expireAt, SafePoint: 1}, + {ServiceID: "2", ExpiredAt: expireAt, SafePoint: 2}, + {ServiceID: "3", ExpiredAt: expireAt, SafePoint: 3}, + } + serviceGroups := []string{ + "serviceGroup1", + "serviceGroup2", + "serviceGroup3", + } + + for _, serviceGroup := range serviceGroups { + for _, serviceSafePoint := range serviceSafePoints { + c.Assert(storage.SaveServiceSafePointByServiceGroup(serviceGroup, serviceSafePoint), IsNil) + } + } + for _, serviceGroup := range serviceGroups { + for _, serviceSafePoint := range serviceSafePoints { + key := endpoint.GCServiceSafePointPathByServiceGroup(serviceGroup, serviceSafePoint.ServiceID) + value, err := storage.Load(key) + c.Assert(err, IsNil) + ssp := &endpoint.ServiceSafePoint{} + c.Assert(json.Unmarshal([]byte(value), ssp), IsNil) + c.Assert(ssp, DeepEquals, serviceSafePoint) + } + } +} + +func (s *testStorageFopGCSuite) TestRemoveServiceSafePointByServiceGroup(c *C) { + storage := NewStorageWithMemoryBackend() + expireAt := time.Now().Add(100 * time.Second).Unix() + + serviceSafePoints := []*endpoint.ServiceSafePoint{ + {ServiceID: "1", ExpiredAt: expireAt, SafePoint: 1}, + {ServiceID: "2", ExpiredAt: expireAt, SafePoint: 2}, + {ServiceID: "3", ExpiredAt: expireAt, SafePoint: 3}, + } + serviceGroups := []string{ + "serviceGroup1", + "serviceGroup2", + "serviceGroup3", + } + // save service safe points + for _, serviceGroup := range serviceGroups { + for _, serviceSafePoint := range serviceSafePoints { + c.Assert(storage.SaveServiceSafePointByServiceGroup(serviceGroup, serviceSafePoint), IsNil) + } + } + + // remove service safe points + for _, serviceGroup := range serviceGroups { + for _, serviceSafePoint := range serviceSafePoints { + c.Assert(storage.RemoveServiceSafePointByServiceGroup(serviceGroup, serviceSafePoint.ServiceID), IsNil) + } + } + + // check that service safe points are empty + for _, serviceGroup := range serviceGroups { + for _, serviceSafePoint := range serviceSafePoints { + safepoint, err := storage.LoadServiceSafePointByServiceGroup(serviceGroup, serviceSafePoint.ServiceID) + c.Assert(err, IsNil) + c.Assert(safepoint, IsNil) + } + } +} + +func (s *testStorageFopGCSuite) TestLoadMinServiceSafePointByServiceGroup(c *C) { + storage := NewStorageWithMemoryBackend() + currentTime := time.Now() + expireAt1 := currentTime.Add(100 * time.Second).Unix() + expireAt2 := currentTime.Add(200 * time.Second).Unix() + expireAt3 := currentTime.Add(300 * time.Second).Unix() + + serviceSafePoints := []*endpoint.ServiceSafePoint{ + {ServiceID: "1", ExpiredAt: expireAt1, SafePoint: 100}, + {ServiceID: "2", ExpiredAt: expireAt2, SafePoint: 200}, + {ServiceID: "3", ExpiredAt: expireAt3, SafePoint: 300}, + } + + for _, serviceSafePoint := range serviceSafePoints { + c.Assert(storage.SaveServiceSafePointByServiceGroup("testServiceGroup1", serviceSafePoint), IsNil) + } + minSafePoint, err := storage.LoadMinServiceSafePointByServiceGroup("testServiceGroup1", currentTime) + c.Assert(err, IsNil) + c.Assert(minSafePoint, DeepEquals, serviceSafePoints[0]) + + // this should remove safePoint with ServiceID 1 due to expiration + // and find the safePoint with ServiceID 2 + minSafePoint2, err := storage.LoadMinServiceSafePointByServiceGroup("testServiceGroup1", currentTime.Add(150*time.Second)) + c.Assert(err, IsNil) + c.Assert(minSafePoint2, DeepEquals, serviceSafePoints[1]) + + // verify that one with ServiceID 1 has been removed + ssp, err := storage.LoadServiceSafePointByServiceGroup("testServiceGroup1", "1") + c.Assert(err, IsNil) + c.Assert(ssp, IsNil) + + // this should remove all service safe points + // and return nil + ssp, err = storage.LoadMinServiceSafePointByServiceGroup("testServiceGroup1", currentTime.Add(500*time.Second)) + c.Assert(err, IsNil) + c.Assert(ssp, IsNil) +} diff --git a/server/storage/storage_test.go b/server/storage/storage_test.go index 106efe42408..51870a62133 100644 --- a/server/storage/storage_test.go +++ b/server/storage/storage_test.go @@ -279,186 +279,3 @@ func (s *testStorageSuite) TestLoadRegionsExceedRangeLimit(c *C) { } c.Assert(failpoint.Disable("github.com/tikv/pd/server/storage/kv/withRangeLimit"), IsNil) } - -func testGCSafePoints() []*endpoint.GCSafePoint { - return []*endpoint.GCSafePoint{ - { - ServiceGroupID: "testServiceGroup1", - SafePoint: 0, - }, - { - ServiceGroupID: "testServiceGroup2", - SafePoint: 1, - }, - { - ServiceGroupID: "testServiceGroup3", - SafePoint: 4396, - }, - { - ServiceGroupID: "testServiceGroup4", - SafePoint: 23333333333, - }, - { - ServiceGroupID: "testServiceGroup5", - SafePoint: math.MaxUint64, - }, - } -} - -func (s *testStorageSuite) TestLoadGCWorkerSafePoint(c *C) { - storage := NewStorageWithMemoryBackend() - testData := testGCSafePoints() - r, e := storage.LoadGCWorkerSafePoint("testServiceGroup") - c.Assert(r, IsNil) - c.Assert(e, IsNil) - for _, safePoint := range testData { - err := storage.SaveGCWorkerSafePoint(safePoint) - c.Assert(err, IsNil) - loaded, err := storage.LoadGCWorkerSafePoint(safePoint.ServiceGroupID) - c.Assert(err, IsNil) - c.Assert(safePoint.ServiceGroupID, Equals, loaded.ServiceGroupID) - c.Assert(safePoint.SafePoint, Equals, loaded.SafePoint) - } -} - -func (s *testStorageSuite) TestLoadAllServiceGroupGCSafePoints(c *C) { - storage := NewStorageWithMemoryBackend() - testData := testGCSafePoints() - for _, safePoint := range testData { - err := storage.SaveGCWorkerSafePoint(safePoint) - c.Assert(err, IsNil) - } - serviceGroupIDs, safePoints, err := storage.LoadAllServiceGroupGCSafePoints() - c.Assert(err, IsNil) - for i, safePoint := range testData { - c.Assert(string(serviceGroupIDs[i]), Equals, safePoint.ServiceGroupID) - c.Assert(safePoints[i], Equals, safePoint.SafePoint) - } -} - -func (s *testStorageSuite) TestLoadAllServiceGroup(c *C) { - storage := NewStorageWithMemoryBackend() - testData := testGCSafePoints() - for _, safePoint := range testData { - err := storage.SaveGCWorkerSafePoint(safePoint) - c.Assert(err, IsNil) - } - serviceGroups, err := storage.LoadAllServiceGroups() - c.Assert(err, IsNil) - for i, safePoint := range testData { - c.Assert(string(serviceGroups[i]), Equals, safePoint.ServiceGroupID) - } -} - -func (s *testStorageSuite) TestLoadServiceSafePointByServiceGroup(c *C) { - storage := NewStorageWithMemoryBackend() - expireAt := time.Now().Add(100 * time.Second).Unix() - serviceSafePoints := []*endpoint.ServiceSafePoint{ - {ServiceID: "1", ExpiredAt: expireAt, SafePoint: 1}, - {ServiceID: "2", ExpiredAt: expireAt, SafePoint: 2}, - {ServiceID: "3", ExpiredAt: expireAt, SafePoint: 3}, - } - serviceGroups := []string{ - "serviceGroup1", - "serviceGroup2", - "serviceGroup3", - } - - for _, serviceGroup := range serviceGroups { - for _, serviceSafePoint := range serviceSafePoints { - c.Assert(storage.SaveServiceSafePointByServiceGroup(serviceGroup, serviceSafePoint), IsNil) - } - } - for _, serviceGroup := range serviceGroups { - for _, serviceSafePoint := range serviceSafePoints { - key := endpoint.ServiceSafePointPath(serviceGroup, serviceSafePoint.ServiceID) - value, err := storage.Load(key) - c.Assert(err, IsNil) - ssp := &endpoint.ServiceSafePoint{} - c.Assert(json.Unmarshal([]byte(value), ssp), IsNil) - c.Assert(ssp.ServiceID, Equals, serviceSafePoint.ServiceID) - c.Assert(ssp.ExpiredAt, Equals, serviceSafePoint.ExpiredAt) - c.Assert(ssp.SafePoint, Equals, serviceSafePoint.SafePoint) - - } - } -} - -func (s *testStorageSuite) TestRemoveServiceSafePointByServiceGroup(c *C) { - storage := NewStorageWithMemoryBackend() - expireAt := time.Now().Add(100 * time.Second).Unix() - - serviceSafePoints := []*endpoint.ServiceSafePoint{ - {ServiceID: "1", ExpiredAt: expireAt, SafePoint: 1}, - {ServiceID: "2", ExpiredAt: expireAt, SafePoint: 2}, - {ServiceID: "3", ExpiredAt: expireAt, SafePoint: 3}, - } - serviceGroups := []string{ - "serviceGroup1", - "serviceGroup2", - "serviceGroup3", - } - // save service safe points - for _, serviceGroup := range serviceGroups { - for _, serviceSafePoint := range serviceSafePoints { - c.Assert(storage.SaveServiceSafePointByServiceGroup(serviceGroup, serviceSafePoint), IsNil) - } - } - - // remove service safe points - for _, serviceGroup := range serviceGroups { - for _, serviceSafePoint := range serviceSafePoints { - c.Assert(storage.RemoveServiceSafePointByServiceGroup(serviceGroup, serviceSafePoint.ServiceID), IsNil) - } - } - - // check that service safe points are empty - for _, serviceGroup := range serviceGroups { - for _, serviceSafePoint := range serviceSafePoints { - safepoint, err := storage.LoadServiceSafePoint(serviceGroup, serviceSafePoint.ServiceID) - c.Assert(err, IsNil) - c.Assert(safepoint, IsNil) - } - } -} - -func (s *testStorageSuite) TestLoadMinServiceSafePointByServiceGroup(c *C) { - storage := NewStorageWithMemoryBackend() - currentTime := time.Now() - expireAt1 := currentTime.Add(100 * time.Second).Unix() - expireAt2 := currentTime.Add(200 * time.Second).Unix() - expireAt3 := currentTime.Add(300 * time.Second).Unix() - - serviceSafePoints := []*endpoint.ServiceSafePoint{ - {ServiceID: "1", ExpiredAt: expireAt1, SafePoint: 100}, - {ServiceID: "2", ExpiredAt: expireAt2, SafePoint: 200}, - {ServiceID: "3", ExpiredAt: expireAt3, SafePoint: 300}, - } - - for _, serviceSafePoint := range serviceSafePoints { - c.Assert(storage.SaveServiceSafePointByServiceGroup("testServiceGroup1", serviceSafePoint), IsNil) - } - minSafePoint, err := storage.LoadMinServiceSafePointByServiceGroup("testServiceGroup1", currentTime) - c.Assert(err, IsNil) - c.Assert(minSafePoint.ServiceID, Equals, serviceSafePoints[0].ServiceID) - c.Assert(minSafePoint.ExpiredAt, Equals, serviceSafePoints[0].ExpiredAt) - c.Assert(minSafePoint.SafePoint, Equals, serviceSafePoints[0].SafePoint) - - // this should remove safePoint with ServiceID 1 due to expiration - // and find the safePoint with ServiceID 2 - minSafePoint2, err := storage.LoadMinServiceSafePointByServiceGroup("testServiceGroup1", currentTime.Add(150*time.Second)) - c.Assert(err, IsNil) - c.Assert(minSafePoint2.ServiceID, Equals, serviceSafePoints[1].ServiceID) - c.Assert(minSafePoint2.ExpiredAt, Equals, serviceSafePoints[1].ExpiredAt) - c.Assert(minSafePoint2.SafePoint, Equals, serviceSafePoints[1].SafePoint) - // verify that one with ServiceID 1 has been removed - ssp, err := storage.LoadServiceSafePoint("testServiceGroup1", "1") - c.Assert(err, IsNil) - c.Assert(ssp, IsNil) - - // this should remove all service safe points - // and return nil - ssp, err = storage.LoadMinServiceSafePointByServiceGroup("testServiceGroup1", currentTime.Add(500*time.Second)) - c.Assert(err, IsNil) - c.Assert(ssp, IsNil) -} diff --git a/tests/client/go.mod b/tests/client/go.mod index 1fc23575b2a..dc3a0512eb0 100644 --- a/tests/client/go.mod +++ b/tests/client/go.mod @@ -7,7 +7,7 @@ require ( github.com/golang/protobuf v1.5.2 // indirect github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748 + github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 go.etcd.io/etcd v0.5.0-alpha.5.0.20191023171146-3cf2f69b5738 diff --git a/tests/client/go.sum b/tests/client/go.sum index 1e4ba31add1..049da2ca0a4 100644 --- a/tests/client/go.sum +++ b/tests/client/go.sum @@ -408,8 +408,8 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748 h1:i4MBe1zGq9/r3BH6rTRunizi4T59fpNk8hvBCrB5UAY= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e h1:iquj/SVNullS8+llCooL3Pk2DWQPW/HDDpF1EHwsnq0= +github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= From 58c4c77756e27c2959f8671efcea05f1cc1e9452 Mon Sep 17 00:00:00 2001 From: pingyu Date: Mon, 9 May 2022 23:15:20 +0800 Subject: [PATCH 15/18] bugfix & add integration test Signed-off-by: pingyu --- pkg/testutil/testutil.go | 9 ++ server/gc_service.go | 26 ++-- server/storage/kv/etcd_kv.go | 13 +- server/storage/kv/kv.go | 3 + server/storage/kv/kv_test.go | 47 ++++++- server/storage/kv/levedb_kv.go | 5 + server/storage/kv/mem_kv.go | 5 + tests/cluster.go | 8 ++ tests/server/gc/gc_test.go | 250 +++++++++++++++++++++++++++++++++ tools/pd-tso-bench/go.sum | 4 +- 10 files changed, 352 insertions(+), 18 deletions(-) create mode 100644 tests/server/gc/gc_test.go diff --git a/pkg/testutil/testutil.go b/pkg/testutil/testutil.go index c3c917d7b3a..07037d03c1f 100644 --- a/pkg/testutil/testutil.go +++ b/pkg/testutil/testutil.go @@ -20,6 +20,7 @@ import ( "time" "github.com/pingcap/check" + "github.com/pingcap/kvproto/pkg/gcpb" "github.com/pingcap/kvproto/pkg/pdpb" "google.golang.org/grpc" ) @@ -86,6 +87,14 @@ func MustNewGrpcClient(c *check.C, addr string) pdpb.PDClient { return pdpb.NewPDClient(conn) } +// MustNewGCClient must create a new GC client. +func MustNewGCClient(c *check.C, addr string) gcpb.GCClient { + conn, err := grpc.Dial(strings.TrimPrefix(addr, "http://"), grpc.WithInsecure()) + + c.Assert(err, check.IsNil) + return gcpb.NewGCClient(conn) +} + // CleanServer is used to clean data directory. func CleanServer(dataDir string) { // Clean data directory diff --git a/server/gc_service.go b/server/gc_service.go index f60f924b712..22b3c173c87 100644 --- a/server/gc_service.go +++ b/server/gc_service.go @@ -93,14 +93,15 @@ func (s *GcServer) GetAllServiceGroups(ctx context.Context, request *gcpb.GetAll // Return -1 if the service group is not existed. func (s *GcServer) getServiceRevisionByServiceGroup(ctx context.Context, serviceGroupID string) (int64, error) { servicePath := endpoint.GCServiceSafePointPrefixPathByServiceGroup(serviceGroupID) - rsp, err := s.client.Get(ctx, servicePath) - if err != nil { - return -1, err - } - if rsp == nil { - return -1, nil - } - return rsp.Kvs[0].ModRevision, nil + _, revision, err := s.storage.LoadRevision(servicePath) + return revision, err +} + +// touchServiceRevisionByServiceGroup advance revision service group path. +// It's used when new service safe point is saved. +func (s *GcServer) touchServiceRevisionByServiceGroup(ctx context.Context, serviceGroupID string) error { + servicePath := endpoint.GCServiceSafePointPrefixPathByServiceGroup(serviceGroupID) + return s.storage.Save(servicePath, "") } // GetMinServiceSafePointByServiceGroup returns given service group's min service safe point. @@ -282,6 +283,15 @@ func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, req if math.MaxInt64-now.Unix() <= request.TTL { ssp.ExpiredAt = math.MaxInt64 } + + if sspOld == nil { + // Touch service revision to advance revision, for indicating that a new service safe point is added. + // Should be invoked before `SaveServiceSafePointByServiceGroup`, to avoid touch fail after new service safe point is saved. + if err := s.touchServiceRevisionByServiceGroup(ctx, serviceGroupID); err != nil { + return nil, err + } + } + if err := storage.SaveServiceSafePointByServiceGroup(serviceGroupID, ssp); err != nil { return nil, err } diff --git a/server/storage/kv/etcd_kv.go b/server/storage/kv/etcd_kv.go index e30b5f7b462..00ff55c8763 100644 --- a/server/storage/kv/etcd_kv.go +++ b/server/storage/kv/etcd_kv.go @@ -48,18 +48,23 @@ func NewEtcdKVBase(client *clientv3.Client, rootPath string) *etcdKVBase { } func (kv *etcdKVBase) Load(key string) (string, error) { + value, _, err := kv.LoadRevision(key) + return value, err +} + +func (kv *etcdKVBase) LoadRevision(key string) (string, int64, error) { key = path.Join(kv.rootPath, key) resp, err := etcdutil.EtcdKVGet(kv.client, key) if err != nil { - return "", err + return "", REVISION_UNAVAILABLE, err } if n := len(resp.Kvs); n == 0 { - return "", nil + return "", REVISION_UNAVAILABLE, nil } else if n > 1 { - return "", errs.ErrEtcdKVGetResponse.GenWithStackByArgs(resp.Kvs) + return "", REVISION_UNAVAILABLE, errs.ErrEtcdKVGetResponse.GenWithStackByArgs(resp.Kvs) } - return string(resp.Kvs[0].Value), nil + return string(resp.Kvs[0].Value), resp.Kvs[0].ModRevision, nil } func (kv *etcdKVBase) LoadRange(key, endKey string, limit int) ([]string, []string, error) { diff --git a/server/storage/kv/kv.go b/server/storage/kv/kv.go index 2f1fa06e144..7eefa7f640a 100644 --- a/server/storage/kv/kv.go +++ b/server/storage/kv/kv.go @@ -14,9 +14,12 @@ package kv +const REVISION_UNAVAILABLE = -1 + // Base is an abstract interface for load/save pd cluster data. type Base interface { Load(key string) (string, error) + LoadRevision(key string) (string, int64, error) LoadRange(key, endKey string, limit int) (keys []string, values []string, err error) Save(key, value string) error Remove(key string) error diff --git a/server/storage/kv/kv_test.go b/server/storage/kv/kv_test.go index 51c90e8a1d1..1360972a5ae 100644 --- a/server/storage/kv/kv_test.go +++ b/server/storage/kv/kv_test.go @@ -52,7 +52,7 @@ func (s *testKVSuite) TestEtcd(c *C) { rootPath := path.Join("/pd", strconv.FormatUint(100, 10)) kv := NewEtcdKVBase(client, rootPath) - s.testReadWrite(c, kv) + s.testReadWrite(c, kv, true) s.testRange(c, kv) } @@ -63,30 +63,69 @@ func (s *testKVSuite) TestLevelDB(c *C) { kv, err := NewLevelDBKV(dir) c.Assert(err, IsNil) - s.testReadWrite(c, kv) + s.testReadWrite(c, kv, false) s.testRange(c, kv) } func (s *testKVSuite) TestMemKV(c *C) { kv := NewMemoryKV() - s.testReadWrite(c, kv) + s.testReadWrite(c, kv, false) s.testRange(c, kv) } -func (s *testKVSuite) testReadWrite(c *C, kv Base) { +func (s *testKVSuite) testReadWrite(c *C, kv Base, isEtcd bool) { + rev := int64(-1) + nextRevision := func() { + if isEtcd { + if rev == -1 { + rev = 1 + } + rev += 1 + } + } + v, err := kv.Load("key") c.Assert(err, IsNil) c.Assert(v, Equals, "") + + v, revision, err := kv.LoadRevision("key") + c.Assert(err, IsNil) + c.Assert(revision, Equals, int64(-1)) + c.Assert(v, Equals, "") + err = kv.Save("key", "value") c.Assert(err, IsNil) + nextRevision() + v, err = kv.Load("key") c.Assert(err, IsNil) c.Assert(v, Equals, "value") + + v, revision, err = kv.LoadRevision("key") + c.Assert(err, IsNil) + c.Assert(v, Equals, "value") + c.Assert(revision, Equals, rev) + + err = kv.Save("key", "value1") + c.Assert(err, IsNil) + nextRevision() + v, revision, err = kv.LoadRevision("key") + c.Assert(err, IsNil) + c.Assert(v, Equals, "value1") + c.Assert(revision, Equals, rev) + err = kv.Remove("key") c.Assert(err, IsNil) + v, err = kv.Load("key") c.Assert(err, IsNil) c.Assert(v, Equals, "") + + v, revision, err = kv.LoadRevision("key") + c.Assert(err, IsNil) + c.Assert(revision, Equals, int64(-1)) + c.Assert(v, Equals, "") + err = kv.Remove("key") c.Assert(err, IsNil) } diff --git a/server/storage/kv/levedb_kv.go b/server/storage/kv/levedb_kv.go index 7f134709bd1..89d59fe1046 100644 --- a/server/storage/kv/levedb_kv.go +++ b/server/storage/kv/levedb_kv.go @@ -49,6 +49,11 @@ func (kv *LevelDBKV) Load(key string) (string, error) { return string(v), err } +func (kv *LevelDBKV) LoadRevision(key string) (string, int64, error) { + value, err := kv.Load(key) + return value, REVISION_UNAVAILABLE, err +} + // LoadRange gets a range of value for a given key range. func (kv *LevelDBKV) LoadRange(startKey, endKey string, limit int) ([]string, []string, error) { iter := kv.NewIterator(&util.Range{Start: []byte(startKey), Limit: []byte(endKey)}, nil) diff --git a/server/storage/kv/mem_kv.go b/server/storage/kv/mem_kv.go index b74cab84b11..05400451ff8 100644 --- a/server/storage/kv/mem_kv.go +++ b/server/storage/kv/mem_kv.go @@ -51,6 +51,11 @@ func (kv *memoryKV) Load(key string) (string, error) { return item.(memoryKVItem).value, nil } +func (kv *memoryKV) LoadRevision(key string) (string, int64, error) { + value, err := kv.Load(key) + return value, REVISION_UNAVAILABLE, err +} + func (kv *memoryKV) LoadRange(key, endKey string, limit int) ([]string, []string, error) { failpoint.Inject("withRangeLimit", func(val failpoint.Value) { rangeLimit, ok := val.(int) diff --git a/tests/cluster.go b/tests/cluster.go index 2061668f393..1aa9471ea4b 100644 --- a/tests/cluster.go +++ b/tests/cluster.go @@ -64,6 +64,7 @@ type TestServer struct { sync.RWMutex server *server.Server grpcServer *server.GrpcServer + gcServer *server.GcServer state int32 } @@ -91,6 +92,7 @@ func NewTestServer(ctx context.Context, cfg *config.Config) (*TestServer, error) return &TestServer{ server: svr, grpcServer: &server.GrpcServer{Server: svr}, + gcServer: &server.GcServer{Server: svr}, state: Initial, }, nil } @@ -358,6 +360,12 @@ func (s *TestServer) GetStoreRegions(storeID uint64) []*core.RegionInfo { return s.server.GetRaftCluster().GetStoreRegions(storeID) } +func (s *TestServer) GetGCService() *server.GcServer { + s.RLock() + defer s.RUnlock() + return s.gcServer +} + // BootstrapCluster is used to bootstrap the cluster. func (s *TestServer) BootstrapCluster() error { bootstrapReq := &pdpb.BootstrapRequest{ diff --git a/tests/server/gc/gc_test.go b/tests/server/gc/gc_test.go new file mode 100644 index 00000000000..c532e5082ac --- /dev/null +++ b/tests/server/gc/gc_test.go @@ -0,0 +1,250 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gc_test + +import ( + "context" + "math" + "testing" + + . "github.com/pingcap/check" + "github.com/pingcap/kvproto/pkg/gcpb" + "github.com/tikv/pd/pkg/testutil" + "github.com/tikv/pd/server" + "github.com/tikv/pd/tests" + "go.uber.org/goleak" +) + +func Test(t *testing.T) { + TestingT(t) +} + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m, testutil.LeakOptions...) +} + +var _ = Suite(&testGCSuite{}) + +func newRequestHeader(clusterID uint64) *gcpb.RequestHeader { + return &gcpb.RequestHeader{ + ClusterId: clusterID, + } +} + +type testGCSuite struct { + ctx context.Context + cancel context.CancelFunc +} + +func (s *testGCSuite) SetUpSuite(c *C) { + s.ctx, s.cancel = context.WithCancel(context.Background()) + server.EnableZap = true +} + +func (s *testGCSuite) TearDownSuite(c *C) { + s.cancel() +} + +func (s *testGCSuite) mustNewGCService(c *C) (gcSvc *server.GcServer, cli gcpb.GCClient, cluster *tests.TestCluster, clusterID uint64) { + var err error + cluster, err = tests.NewTestCluster(s.ctx, 1) + c.Assert(err, IsNil) + + err = cluster.RunInitialServers() + c.Assert(err, IsNil) + cluster.WaitLeader() + leader := cluster.GetServer(cluster.GetLeader()) + c.Assert(leader.BootstrapCluster(), IsNil) + + clusterID = leader.GetClusterID() + gcSvc = leader.GetGCService() + + cli = testutil.MustNewGCClient(c, leader.GetAddr()) + + return +} + +func (s *testGCSuite) TestXxx(c *C) { + _, cli, cluster, clusterID := s.mustNewGCService(c) + defer cluster.Destroy() + + serviceGroupRawKV := []byte("default_rawkv") + serviceGroupTxnKV := []byte("default_txnkv") + serviceID1 := []byte("svc1") + serviceID2 := []byte("svc2") + + { + req := &gcpb.GetAllServiceGroupsRequest{ + Header: newRequestHeader(clusterID), + } + resp, err := cli.GetAllServiceGroups(s.ctx, req) + c.Assert(err, IsNil) + c.Assert(resp.ServiceGroupId, DeepEquals, [][]byte{serviceGroupRawKV}) + } + + // Update service safe point + { + req := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupRawKV, + ServiceId: serviceID1, + TTL: math.MaxInt64, + SafePoint: 100, + } + resp, err := cli.UpdateServiceSafePointByServiceGroup(s.ctx, req) + c.Assert(err, IsNil) + expected := &gcpb.UpdateServiceSafePointByServiceGroupResponse{ + Header: resp.GetHeader(), + Succeeded: true, + GcSafePoint: 0, + OldSafePoint: 0, + NewSafePoint: 100, + } + c.Assert(resp, DeepEquals, expected) + + // Safe point roll back + req.SafePoint = 99 + resp, err = cli.UpdateServiceSafePointByServiceGroup(s.ctx, req) + c.Assert(err, IsNil) + c.Assert(resp.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_SAFEPOINT_ROLLBACK) + c.Assert(resp.GetSucceeded(), IsFalse) + } + + // Update GC safe point with revision mismatch + { + reqGc := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupRawKV, + } + respGc, err := cli.GetMinServiceSafePointByServiceGroup(s.ctx, reqGc) + c.Assert(err, IsNil) + c.Assert(respGc.SafePoint, Equals, uint64(100)) + // c.Assert(respGc.Revision, Equals, int64(12)) + + reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupRawKV, + ServiceId: serviceID2, + TTL: math.MaxInt64, + SafePoint: 50, + } + respSvc, err := cli.UpdateServiceSafePointByServiceGroup(s.ctx, reqSvc) + c.Assert(err, IsNil) + expected := &gcpb.UpdateServiceSafePointByServiceGroupResponse{ + Header: respSvc.GetHeader(), + Succeeded: true, + GcSafePoint: 0, + OldSafePoint: 0, + NewSafePoint: 50, + } + c.Assert(respSvc, DeepEquals, expected) + + reqUpdate := &gcpb.UpdateGCSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupRawKV, + SafePoint: 100, + Revision: respGc.Revision, + } + respUpdate, err := cli.UpdateGCSafePointByServiceGroup(s.ctx, reqUpdate) + c.Assert(err, IsNil) + c.Assert(respUpdate.Succeeded, IsFalse) + c.Assert(respUpdate.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_REVISION_MISMATCH) + } + + // Retry update GC safe point + { + reqGc := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupRawKV, + } + respGc, err := cli.GetMinServiceSafePointByServiceGroup(s.ctx, reqGc) + c.Assert(err, IsNil) + c.Assert(respGc.SafePoint, Equals, uint64(50)) + // c.Assert(respGc.Revision, Equals, int64(12)) + + reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupRawKV, + ServiceId: serviceID2, + TTL: math.MaxInt64, + SafePoint: 80, + } + respSvc, err := cli.UpdateServiceSafePointByServiceGroup(s.ctx, reqSvc) + c.Assert(err, IsNil) + expected := &gcpb.UpdateServiceSafePointByServiceGroupResponse{ + Header: respSvc.GetHeader(), + Succeeded: true, + GcSafePoint: 0, + OldSafePoint: 50, + NewSafePoint: 80, + } + c.Assert(respSvc, DeepEquals, expected) + + reqUpdate := &gcpb.UpdateGCSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupRawKV, + SafePoint: 50, + Revision: respGc.Revision, + } + respUpdate, err := cli.UpdateGCSafePointByServiceGroup(s.ctx, reqUpdate) + c.Assert(err, IsNil) + c.Assert(respUpdate.Succeeded, IsTrue) + c.Assert(respUpdate.GetNewSafePoint(), Equals, uint64(50)) + + // GC safe point roll back + reqUpdate.SafePoint = 49 + respUpdate, err = cli.UpdateGCSafePointByServiceGroup(s.ctx, reqUpdate) + c.Assert(err, IsNil) + c.Assert(respUpdate.Succeeded, IsFalse) + c.Assert(respUpdate.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_SAFEPOINT_ROLLBACK) + } + + // Another service group with no service safe point + { + reqGc := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupTxnKV, + } + respGc, err := cli.GetMinServiceSafePointByServiceGroup(s.ctx, reqGc) + c.Assert(err, IsNil) + c.Assert(respGc.SafePoint, Equals, uint64(0)) + c.Assert(respGc.Revision, Equals, int64(-1)) + + reqUpdate := &gcpb.UpdateGCSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupTxnKV, + SafePoint: 100, + Revision: -1, + } + respUpdate, err := cli.UpdateGCSafePointByServiceGroup(s.ctx, reqUpdate) + c.Assert(err, IsNil) + c.Assert(respUpdate.Succeeded, IsTrue) + c.Assert(respUpdate.GetNewSafePoint(), Equals, uint64(100)) + } + + // Get all service group GC safe points + { + req := &gcpb.GetAllServiceGroupGCSafePointsRequest{ + Header: newRequestHeader(clusterID), + } + resp, err := cli.GetAllServiceGroupGCSafePoints(s.ctx, req) + c.Assert(err, IsNil) + expected := []*gcpb.ServiceGroupSafePoint{ + {ServiceGroupId: serviceGroupRawKV, SafePoint: 50}, + {ServiceGroupId: serviceGroupTxnKV, SafePoint: 100}, + } + c.Assert(resp.GetSafePoints(), DeepEquals, expected) + } +} diff --git a/tools/pd-tso-bench/go.sum b/tools/pd-tso-bench/go.sum index 1e0eaf8b258..4a6f5f8c38d 100644 --- a/tools/pd-tso-bench/go.sum +++ b/tools/pd-tso-bench/go.sum @@ -108,8 +108,8 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTm github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748 h1:i4MBe1zGq9/r3BH6rTRunizi4T59fpNk8hvBCrB5UAY= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e h1:iquj/SVNullS8+llCooL3Pk2DWQPW/HDDpF1EHwsnq0= +github.com/pingcap/kvproto v0.0.0-20220506032820-55094d91343e/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee h1:VO2t6IBpfvW34TdtD/G10VvnGqjLic1jzOuHjUb5VqM= github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= From 3b196871930f474cd26ede830d3df50b503e5689 Mon Sep 17 00:00:00 2001 From: pingyu Date: Tue, 10 May 2022 10:06:16 +0800 Subject: [PATCH 16/18] fix CI error Signed-off-by: pingyu --- server/gc_service.go | 10 +++++----- server/storage/kv/etcd_kv.go | 6 +++--- server/storage/kv/kv.go | 4 +++- server/storage/kv/levedb_kv.go | 3 ++- server/storage/kv/mem_kv.go | 2 +- tests/cluster.go | 1 + 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/server/gc_service.go b/server/gc_service.go index 22b3c173c87..fbc9e866c51 100644 --- a/server/gc_service.go +++ b/server/gc_service.go @@ -91,7 +91,7 @@ func (s *GcServer) GetAllServiceGroups(ctx context.Context, request *gcpb.GetAll // getServiceRevisionByServiceGroup return etcd ModRevision of given service group. // It's used to detect new service safe point between `GetMinServiceSafePointByServiceGroup` & `UpdateGCSafePointByServiceGroup`. // Return -1 if the service group is not existed. -func (s *GcServer) getServiceRevisionByServiceGroup(ctx context.Context, serviceGroupID string) (int64, error) { +func (s *GcServer) getServiceRevisionByServiceGroup(serviceGroupID string) (int64, error) { servicePath := endpoint.GCServiceSafePointPrefixPathByServiceGroup(serviceGroupID) _, revision, err := s.storage.LoadRevision(servicePath) return revision, err @@ -99,7 +99,7 @@ func (s *GcServer) getServiceRevisionByServiceGroup(ctx context.Context, service // touchServiceRevisionByServiceGroup advance revision service group path. // It's used when new service safe point is saved. -func (s *GcServer) touchServiceRevisionByServiceGroup(ctx context.Context, serviceGroupID string) error { +func (s *GcServer) touchServiceRevisionByServiceGroup(serviceGroupID string) error { servicePath := endpoint.GCServiceSafePointPrefixPathByServiceGroup(serviceGroupID) return s.storage.Save(servicePath, "") } @@ -131,7 +131,7 @@ func (s *GcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, req returnSafePoint = min.SafePoint } - currentRevision, err := s.getServiceRevisionByServiceGroup(ctx, serviceGroupID) + currentRevision, err := s.getServiceRevisionByServiceGroup(serviceGroupID) if err != nil { return nil, err } @@ -157,7 +157,7 @@ func (s *GcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request serviceGroupID := string(request.ServiceGroupId) // check if revision changed since last min calculation. - currentRevision, err := s.getServiceRevisionByServiceGroup(ctx, serviceGroupID) + currentRevision, err := s.getServiceRevisionByServiceGroup(serviceGroupID) if err != nil { return nil, err } @@ -287,7 +287,7 @@ func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, req if sspOld == nil { // Touch service revision to advance revision, for indicating that a new service safe point is added. // Should be invoked before `SaveServiceSafePointByServiceGroup`, to avoid touch fail after new service safe point is saved. - if err := s.touchServiceRevisionByServiceGroup(ctx, serviceGroupID); err != nil { + if err := s.touchServiceRevisionByServiceGroup(serviceGroupID); err != nil { return nil, err } } diff --git a/server/storage/kv/etcd_kv.go b/server/storage/kv/etcd_kv.go index 00ff55c8763..1b2eda439af 100644 --- a/server/storage/kv/etcd_kv.go +++ b/server/storage/kv/etcd_kv.go @@ -57,12 +57,12 @@ func (kv *etcdKVBase) LoadRevision(key string) (string, int64, error) { resp, err := etcdutil.EtcdKVGet(kv.client, key) if err != nil { - return "", REVISION_UNAVAILABLE, err + return "", RevisionUnavailable, err } if n := len(resp.Kvs); n == 0 { - return "", REVISION_UNAVAILABLE, nil + return "", RevisionUnavailable, nil } else if n > 1 { - return "", REVISION_UNAVAILABLE, errs.ErrEtcdKVGetResponse.GenWithStackByArgs(resp.Kvs) + return "", RevisionUnavailable, errs.ErrEtcdKVGetResponse.GenWithStackByArgs(resp.Kvs) } return string(resp.Kvs[0].Value), resp.Kvs[0].ModRevision, nil } diff --git a/server/storage/kv/kv.go b/server/storage/kv/kv.go index 7eefa7f640a..60ca030596b 100644 --- a/server/storage/kv/kv.go +++ b/server/storage/kv/kv.go @@ -14,7 +14,9 @@ package kv -const REVISION_UNAVAILABLE = -1 +// RevisionUnavailable is the value of unavaiable resivion, +// when the kv is not existed (etcd_kv), or not supported (mem_kv & leveldb_kv) +const RevisionUnavailable = -1 // Base is an abstract interface for load/save pd cluster data. type Base interface { diff --git a/server/storage/kv/levedb_kv.go b/server/storage/kv/levedb_kv.go index 89d59fe1046..38c086e71ae 100644 --- a/server/storage/kv/levedb_kv.go +++ b/server/storage/kv/levedb_kv.go @@ -49,9 +49,10 @@ func (kv *LevelDBKV) Load(key string) (string, error) { return string(v), err } +// LoadRevision gets a value along with revision. The revision is unavailable for `LevelDBKV`. func (kv *LevelDBKV) LoadRevision(key string) (string, int64, error) { value, err := kv.Load(key) - return value, REVISION_UNAVAILABLE, err + return value, RevisionUnavailable, err } // LoadRange gets a range of value for a given key range. diff --git a/server/storage/kv/mem_kv.go b/server/storage/kv/mem_kv.go index 05400451ff8..826db5819b1 100644 --- a/server/storage/kv/mem_kv.go +++ b/server/storage/kv/mem_kv.go @@ -53,7 +53,7 @@ func (kv *memoryKV) Load(key string) (string, error) { func (kv *memoryKV) LoadRevision(key string) (string, int64, error) { value, err := kv.Load(key) - return value, REVISION_UNAVAILABLE, err + return value, RevisionUnavailable, err } func (kv *memoryKV) LoadRange(key, endKey string, limit int) ([]string, []string, error) { diff --git a/tests/cluster.go b/tests/cluster.go index 1aa9471ea4b..326eec7b2a6 100644 --- a/tests/cluster.go +++ b/tests/cluster.go @@ -360,6 +360,7 @@ func (s *TestServer) GetStoreRegions(storeID uint64) []*core.RegionInfo { return s.server.GetRaftCluster().GetStoreRegions(storeID) } +// GetGCService returns the gc service. func (s *TestServer) GetGCService() *server.GcServer { s.RLock() defer s.RUnlock() From 230c3453ad398e0eb94c121f095e5562ad7b4b2f Mon Sep 17 00:00:00 2001 From: pingyu Date: Tue, 10 May 2022 14:22:33 +0800 Subject: [PATCH 17/18] improve integration test & other polish Signed-off-by: pingyu --- client/client.go | 4 +- server/gc_service.go | 41 ++++--- server/server.go | 4 +- server/storage/kv/kv_test.go | 12 +- server/storage/storage_gc_test.go | 2 +- tests/server/gc/gc_test.go | 176 +++++++++++++++++++++++++++++- 6 files changed, 208 insertions(+), 31 deletions(-) diff --git a/client/client.go b/client/client.go index 1d6dce01171..a781050565c 100644 --- a/client/client.go +++ b/client/client.go @@ -134,7 +134,7 @@ type Client interface { // GetGCAllServiceGroups returns a list containing all service groups that has safe point in pd GetGCAllServiceGroups(ctx context.Context) ([]string, error) - // GetGCMinServiceSafePointByServiceGroup return the minimum of all service safe point of the given group + // GetGCMinServiceSafePointByServiceGroup returns the minimum of all service safe point of the given group // It also returns the current revision of the pd storage, with in which the min is valid // If none is found, it will return 0 as min GetGCMinServiceSafePointByServiceGroup(ctx context.Context, serviceGroupID string) (safePoint uint64, revision int64, err error) @@ -1943,7 +1943,6 @@ func (c *client) GetGCAllServiceGroups(ctx context.Context) ([]string, error) { return nil, errors.WithStack(err) } - // have to return a slice of string returnSlice := make([]string, 0, len(resp.ServiceGroupId)) for _, serviceGroupID := range resp.ServiceGroupId { returnSlice = append(returnSlice, string(serviceGroupID)) @@ -1999,7 +1998,6 @@ func (c *client) UpdateGCSafePointByServiceGroup(ctx context.Context, serviceGro c.ScheduleCheckLeader() return false, 0, errors.WithStack(err) } - // if requested safepoint is the new safepoint, then update succeeded return resp.Succeeded, resp.NewSafePoint, nil } diff --git a/server/gc_service.go b/server/gc_service.go index fbc9e866c51..6b6d50191f7 100644 --- a/server/gc_service.go +++ b/server/gc_service.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "math" + "time" "github.com/pingcap/kvproto/pkg/gcpb" "github.com/pingcap/log" @@ -78,8 +79,8 @@ func (s *GcServer) GetAllServiceGroups(ctx context.Context, request *gcpb.GetAll } serviceGroupIDs := make([][]byte, 0, len(serviceGroupList)) - for _, sg := range serviceGroupList { - serviceGroupIDs = append(serviceGroupIDs, []byte(sg)) + for _, sgid := range serviceGroupList { + serviceGroupIDs = append(serviceGroupIDs, []byte(sgid)) } return &gcpb.GetAllServiceGroupsResponse{ @@ -90,25 +91,35 @@ func (s *GcServer) GetAllServiceGroups(ctx context.Context, request *gcpb.GetAll // getServiceRevisionByServiceGroup return etcd ModRevision of given service group. // It's used to detect new service safe point between `GetMinServiceSafePointByServiceGroup` & `UpdateGCSafePointByServiceGroup`. -// Return -1 if the service group is not existed. +// Return `kv.RevisionUnavailable` if the service group is not existed. func (s *GcServer) getServiceRevisionByServiceGroup(serviceGroupID string) (int64, error) { servicePath := endpoint.GCServiceSafePointPrefixPathByServiceGroup(serviceGroupID) _, revision, err := s.storage.LoadRevision(servicePath) return revision, err } -// touchServiceRevisionByServiceGroup advance revision service group path. +// touchServiceRevisionByServiceGroup advances revision of service group path. // It's used when new service safe point is saved. func (s *GcServer) touchServiceRevisionByServiceGroup(serviceGroupID string) error { servicePath := endpoint.GCServiceSafePointPrefixPathByServiceGroup(serviceGroupID) return s.storage.Save(servicePath, "") } +func (s *GcServer) getNow() (time.Time, error) { + nowTSO, err := s.tsoAllocatorManager.HandleTSORequest(tso.GlobalDCLocation, 1) + if err != nil { + return time.Time{}, err + } + now, _ := tsoutil.ParseTimestamp(nowTSO) + return now, err +} + // GetMinServiceSafePointByServiceGroup returns given service group's min service safe point. func (s *GcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, request *gcpb.GetMinServiceSafePointByServiceGroupRequest) (*gcpb.GetMinServiceSafePointByServiceGroupResponse, error) { - // Lock to ensure that there is no other change between `min` and `currentRevison`. - s.serviceGroupSafePointLock.Lock() - defer s.serviceGroupSafePointLock.Unlock() + // Lock to ensure that there is no other change between `min` and `currentRevision`. + // Also note that `storage.LoadMinServiceSafePointByServiceGroup` is not thread-safe. + s.gcServiceGroupLock.Lock() + defer s.gcServiceGroupLock.Unlock() rc := s.GetRaftCluster() if rc == nil { @@ -117,11 +128,12 @@ func (s *GcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, req var storage endpoint.GCSafePointStorage = s.storage serviceGroupID := string(request.ServiceGroupId) - nowTSO, err := s.tsoAllocatorManager.HandleTSORequest(tso.GlobalDCLocation, 1) + + now, err := s.getNow() if err != nil { return nil, err } - now, _ := tsoutil.ParseTimestamp(nowTSO) + min, err := storage.LoadMinServiceSafePointByServiceGroup(serviceGroupID, now) if err != nil { return nil, err @@ -145,8 +157,8 @@ func (s *GcServer) GetMinServiceSafePointByServiceGroup(ctx context.Context, req // UpdateGCSafePointByServiceGroup used by gc_worker to update their gc safe points. func (s *GcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request *gcpb.UpdateGCSafePointByServiceGroupRequest) (*gcpb.UpdateGCSafePointByServiceGroupResponse, error) { - s.serviceGroupSafePointLock.Lock() - defer s.serviceGroupSafePointLock.Unlock() + s.gcServiceGroupLock.Lock() + defer s.gcServiceGroupLock.Unlock() rc := s.GetRaftCluster() if rc == nil { @@ -214,8 +226,8 @@ func (s *GcServer) UpdateGCSafePointByServiceGroup(ctx context.Context, request // UpdateServiceSafePointByServiceGroup for services like CDC/BR/Lightning to update gc safe points in PD. func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, request *gcpb.UpdateServiceSafePointByServiceGroupRequest) (*gcpb.UpdateServiceSafePointByServiceGroupResponse, error) { - s.serviceGroupSafePointLock.Lock() - defer s.serviceGroupSafePointLock.Unlock() + s.gcServiceGroupLock.Lock() + defer s.gcServiceGroupLock.Unlock() rc := s.GetRaftCluster() if rc == nil { @@ -236,11 +248,10 @@ func (s *GcServer) UpdateServiceSafePointByServiceGroup(ctx context.Context, req }, nil } - nowTSO, err := s.tsoAllocatorManager.HandleTSORequest(tso.GlobalDCLocation, 1) + now, err := s.getNow() if err != nil { return nil, err } - now, _ := tsoutil.ParseTimestamp(nowTSO) sspOld, err := storage.LoadServiceSafePointByServiceGroup(serviceGroupID, serviceID) if err != nil { diff --git a/server/server.go b/server/server.go index 27ccd8b2d7b..ec2b5dc7539 100644 --- a/server/server.go +++ b/server/server.go @@ -147,8 +147,8 @@ type Server struct { // serviceSafePointLock is a lock for UpdateServiceGCSafePoint serviceSafePointLock syncutil.Mutex - // Lock for UpdateServiceSafePointByServiceGroup - serviceGroupSafePointLock syncutil.Mutex + // Lock for GC service group interfaces + gcServiceGroupLock syncutil.Mutex // hot region history info storage hotRegionStorage *storage.HotRegionStorage diff --git a/server/storage/kv/kv_test.go b/server/storage/kv/kv_test.go index 1360972a5ae..a3657732162 100644 --- a/server/storage/kv/kv_test.go +++ b/server/storage/kv/kv_test.go @@ -74,13 +74,13 @@ func (s *testKVSuite) TestMemKV(c *C) { } func (s *testKVSuite) testReadWrite(c *C, kv Base, isEtcd bool) { - rev := int64(-1) + Rev := int64(-1) nextRevision := func() { if isEtcd { - if rev == -1 { - rev = 1 + if Rev == -1 { + Rev = 1 } - rev += 1 + Rev += 1 } } @@ -104,7 +104,7 @@ func (s *testKVSuite) testReadWrite(c *C, kv Base, isEtcd bool) { v, revision, err = kv.LoadRevision("key") c.Assert(err, IsNil) c.Assert(v, Equals, "value") - c.Assert(revision, Equals, rev) + c.Assert(revision, Equals, Rev) err = kv.Save("key", "value1") c.Assert(err, IsNil) @@ -112,7 +112,7 @@ func (s *testKVSuite) testReadWrite(c *C, kv Base, isEtcd bool) { v, revision, err = kv.LoadRevision("key") c.Assert(err, IsNil) c.Assert(v, Equals, "value1") - c.Assert(revision, Equals, rev) + c.Assert(revision, Equals, Rev) err = kv.Remove("key") c.Assert(err, IsNil) diff --git a/server/storage/storage_gc_test.go b/server/storage/storage_gc_test.go index b624fc5d508..916bbb387e7 100644 --- a/server/storage/storage_gc_test.go +++ b/server/storage/storage_gc_test.go @@ -86,7 +86,7 @@ func (s *testStorageFopGCSuite) TestLoadAllServiceGroup(c *C) { storage := NewStorageWithMemoryBackend() serviceGroups, err := storage.LoadAllServiceGroups() c.Assert(err, IsNil) - c.Assert(serviceGroups, DeepEquals, []string{"default_rawkv"}) + c.Assert(serviceGroups, DeepEquals, []string{endpoint.ServiceGroupRawKVDefault}) } func (s *testStorageFopGCSuite) TestLoadServiceSafePointByServiceGroup(c *C) { diff --git a/tests/server/gc/gc_test.go b/tests/server/gc/gc_test.go index c532e5082ac..99d9665b748 100644 --- a/tests/server/gc/gc_test.go +++ b/tests/server/gc/gc_test.go @@ -16,13 +16,16 @@ package gc_test import ( "context" + "fmt" "math" + "sync" "testing" . "github.com/pingcap/check" "github.com/pingcap/kvproto/pkg/gcpb" "github.com/tikv/pd/pkg/testutil" "github.com/tikv/pd/server" + "github.com/tikv/pd/server/storage/endpoint" "github.com/tikv/pd/tests" "go.uber.org/goleak" ) @@ -76,11 +79,11 @@ func (s *testGCSuite) mustNewGCService(c *C) (gcSvc *server.GcServer, cli gcpb.G return } -func (s *testGCSuite) TestXxx(c *C) { +func (s *testGCSuite) TestGCService(c *C) { _, cli, cluster, clusterID := s.mustNewGCService(c) defer cluster.Destroy() - serviceGroupRawKV := []byte("default_rawkv") + serviceGroupRawKV := []byte(endpoint.ServiceGroupRawKVDefault) serviceGroupTxnKV := []byte("default_txnkv") serviceID1 := []byte("svc1") serviceID2 := []byte("svc2") @@ -121,6 +124,7 @@ func (s *testGCSuite) TestXxx(c *C) { c.Assert(resp.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_SAFEPOINT_ROLLBACK) c.Assert(resp.GetSucceeded(), IsFalse) } + // now: svc1: 100 // Update GC safe point with revision mismatch { @@ -131,7 +135,7 @@ func (s *testGCSuite) TestXxx(c *C) { respGc, err := cli.GetMinServiceSafePointByServiceGroup(s.ctx, reqGc) c.Assert(err, IsNil) c.Assert(respGc.SafePoint, Equals, uint64(100)) - // c.Assert(respGc.Revision, Equals, int64(12)) + // c.Assert(respGc.Revision, Equals, ?): Revision value is not stable. Don't check it. reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ Header: newRequestHeader(clusterID), @@ -162,6 +166,7 @@ func (s *testGCSuite) TestXxx(c *C) { c.Assert(respUpdate.Succeeded, IsFalse) c.Assert(respUpdate.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_REVISION_MISMATCH) } + // now: svc1: 100, svc2: 50 // Retry update GC safe point { @@ -172,7 +177,6 @@ func (s *testGCSuite) TestXxx(c *C) { respGc, err := cli.GetMinServiceSafePointByServiceGroup(s.ctx, reqGc) c.Assert(err, IsNil) c.Assert(respGc.SafePoint, Equals, uint64(50)) - // c.Assert(respGc.Revision, Equals, int64(12)) reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ Header: newRequestHeader(clusterID), @@ -210,6 +214,48 @@ func (s *testGCSuite) TestXxx(c *C) { c.Assert(respUpdate.Succeeded, IsFalse) c.Assert(respUpdate.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_SAFEPOINT_ROLLBACK) } + // now: svc1: 100, svc2: 80, gc: 50 + + // Remove svc2 + { + reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupRawKV, + ServiceId: serviceID2, + TTL: 0, + } + respSvc, err := cli.UpdateServiceSafePointByServiceGroup(s.ctx, reqSvc) + c.Assert(err, IsNil) + expected := &gcpb.UpdateServiceSafePointByServiceGroupResponse{ + Header: respSvc.GetHeader(), + Succeeded: true, + } + c.Assert(respSvc, DeepEquals, expected) + + reqGc := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupRawKV, + } + respGc, err := cli.GetMinServiceSafePointByServiceGroup(s.ctx, reqGc) + c.Assert(err, IsNil) + c.Assert(respGc.SafePoint, Equals, uint64(100)) + } + // now: svc1: 100, gc: 50 + + // Add svc2 with safe point roll back + { + reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupRawKV, + ServiceId: serviceID2, + TTL: math.MaxInt64, + SafePoint: 49, + } + respSvc, err := cli.UpdateServiceSafePointByServiceGroup(s.ctx, reqSvc) + c.Assert(err, IsNil) + c.Assert(respSvc.Succeeded, IsFalse) + c.Assert(respSvc.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_SAFEPOINT_ROLLBACK) + } // Another service group with no service safe point { @@ -248,3 +294,125 @@ func (s *testGCSuite) TestXxx(c *C) { c.Assert(resp.GetSafePoints(), DeepEquals, expected) } } + +func (s *testGCSuite) TestConcurrency(c *C) { + count := 500 + concurrency := 10 + + svc, _, cluster, clusterID := s.mustNewGCService(c) + defer cluster.Destroy() + + serviceGroupID := []byte(endpoint.ServiceGroupRawKVDefault) + closeCh := make(chan struct{}) + + updateGcSafePoint := func(safepoint uint64, revision int64) { + reqUpdate := &gcpb.UpdateGCSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupID, + SafePoint: safepoint, + Revision: revision, + } + _, err := svc.UpdateGCSafePointByServiceGroup(s.ctx, reqUpdate) + c.Assert(err, IsNil) + + } + updateGcSafePoint(0, -1) + + gcWorkerThread := func() { + for { + reqMin := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupID, + } + respMin, err := svc.GetMinServiceSafePointByServiceGroup(s.ctx, reqMin) + c.Assert(err, IsNil) + + if respMin.SafePoint == 0 { + continue + } + + updateGcSafePoint(respMin.SafePoint, respMin.Revision) + + select { + case <-closeCh: + return + default: + } + } + } + + updateSvcSafePoint := func(svcName string, safepoint uint64) { + reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupID, + ServiceId: []byte(svcName), + TTL: math.MaxInt64, + SafePoint: safepoint, + } + respSvc, err := svc.UpdateServiceSafePointByServiceGroup(s.ctx, reqSvc) + c.Assert(err, IsNil) + c.Assert(respSvc.Succeeded, IsTrue) + } + + svcThread := func(svcName string) { + for i := 1; i <= count; i++ { + updateSvcSafePoint(svcName, uint64(i*10)) + } + } + + tikvThread := func() { + for { + reqGc := &gcpb.GetAllServiceGroupGCSafePointsRequest{ + Header: newRequestHeader(clusterID), + } + respGc, err := svc.GetAllServiceGroupGCSafePoints(s.ctx, reqGc) + c.Assert(err, IsNil) + c.Assert(len(respGc.GetSafePoints()), Equals, 1) + + gcSafePoint := respGc.GetSafePoints()[0].SafePoint + + reqMin := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(clusterID), + ServiceGroupId: serviceGroupID, + } + respMin, err := svc.GetMinServiceSafePointByServiceGroup(s.ctx, reqMin) + c.Assert(err, IsNil) + + c.Assert(gcSafePoint <= respMin.SafePoint, IsTrue) + + select { + case <-closeCh: + return + default: + } + } + } + + wgSvc := sync.WaitGroup{} + wgGc := sync.WaitGroup{} + + for i := 0; i < concurrency; i++ { + i := i + wgSvc.Add(1) + go func() { + defer wgSvc.Done() + svcThread(fmt.Sprintf("svc_%v", i)) + }() + } + + wgGc.Add(1) + go func() { + defer wgGc.Done() + gcWorkerThread() + }() + + wgGc.Add(1) + go func() { + defer wgGc.Done() + tikvThread() + }() + + wgSvc.Wait() + close(closeCh) + wgGc.Wait() +} From 2965b74dfbd1fd7a8ac9ddef94acb4c5040d5832 Mon Sep 17 00:00:00 2001 From: pingyu Date: Tue, 10 May 2022 16:02:54 +0800 Subject: [PATCH 18/18] polish GC integration test codes Signed-off-by: pingyu --- tests/server/gc/gc_test.go | 299 +++++++++++++++---------------------- 1 file changed, 122 insertions(+), 177 deletions(-) diff --git a/tests/server/gc/gc_test.go b/tests/server/gc/gc_test.go index 99d9665b748..40ab8133367 100644 --- a/tests/server/gc/gc_test.go +++ b/tests/server/gc/gc_test.go @@ -60,7 +60,7 @@ func (s *testGCSuite) TearDownSuite(c *C) { s.cancel() } -func (s *testGCSuite) mustNewGCService(c *C) (gcSvc *server.GcServer, cli gcpb.GCClient, cluster *tests.TestCluster, clusterID uint64) { +func (s *testGCSuite) mustNewGCService(c *C) (addr string, cluster *tests.TestCluster, clusterID uint64) { var err error cluster, err = tests.NewTestCluster(s.ctx, 1) c.Assert(err, IsNil) @@ -72,42 +72,91 @@ func (s *testGCSuite) mustNewGCService(c *C) (gcSvc *server.GcServer, cli gcpb.G c.Assert(leader.BootstrapCluster(), IsNil) clusterID = leader.GetClusterID() - gcSvc = leader.GetGCService() + addr = leader.GetAddr() + return +} - cli = testutil.MustNewGCClient(c, leader.GetAddr()) +type testClient struct { + cli gcpb.GCClient + clusterID uint64 + c *C + ctx context.Context +} - return +func (c *testClient) mustGetAllServiceGroups() [][]byte { + req := &gcpb.GetAllServiceGroupsRequest{ + Header: newRequestHeader(c.clusterID), + } + resp, err := c.cli.GetAllServiceGroups(c.ctx, req) + c.c.Assert(err, IsNil) + return resp.ServiceGroupId +} + +func (c *testClient) mustUpdateServiceSafePoint(serviceGroupID []byte, serviceID []byte, ttl int64, safepoint uint64) *gcpb.UpdateServiceSafePointByServiceGroupResponse { + req := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(c.clusterID), + ServiceGroupId: serviceGroupID, + ServiceId: serviceID, + TTL: ttl, + SafePoint: safepoint, + } + resp, err := c.cli.UpdateServiceSafePointByServiceGroup(c.ctx, req) + c.c.Assert(err, IsNil) + return resp +} + +func (c *testClient) mustGetMinServiceSafePoint(serviceGroupID []byte) (safepoint uint64, revision int64) { + req := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ + Header: newRequestHeader(c.clusterID), + ServiceGroupId: serviceGroupID, + } + resp, err := c.cli.GetMinServiceSafePointByServiceGroup(c.ctx, req) + c.c.Assert(err, IsNil) + return resp.GetSafePoint(), resp.GetRevision() +} + +func (c *testClient) mustUpdateGCSafePoint(serviceGroupID []byte, safepoint uint64, revision int64) *gcpb.UpdateGCSafePointByServiceGroupResponse { + req := &gcpb.UpdateGCSafePointByServiceGroupRequest{ + Header: newRequestHeader(c.clusterID), + ServiceGroupId: serviceGroupID, + SafePoint: safepoint, + Revision: revision, + } + resp, err := c.cli.UpdateGCSafePointByServiceGroup(c.ctx, req) + c.c.Assert(err, IsNil) + return resp +} + +func (c *testClient) mustGetAllGCSafePoint() []*gcpb.ServiceGroupSafePoint { + req := &gcpb.GetAllServiceGroupGCSafePointsRequest{ + Header: newRequestHeader(c.clusterID), + } + resp, err := c.cli.GetAllServiceGroupGCSafePoints(c.ctx, req) + c.c.Assert(err, IsNil) + return resp.GetSafePoints() } func (s *testGCSuite) TestGCService(c *C) { - _, cli, cluster, clusterID := s.mustNewGCService(c) + addr, cluster, clusterID := s.mustNewGCService(c) defer cluster.Destroy() + client := testClient{ + cli: testutil.MustNewGCClient(c, addr), + clusterID: clusterID, + c: c, + ctx: s.ctx, + } + serviceGroupRawKV := []byte(endpoint.ServiceGroupRawKVDefault) serviceGroupTxnKV := []byte("default_txnkv") serviceID1 := []byte("svc1") serviceID2 := []byte("svc2") - { - req := &gcpb.GetAllServiceGroupsRequest{ - Header: newRequestHeader(clusterID), - } - resp, err := cli.GetAllServiceGroups(s.ctx, req) - c.Assert(err, IsNil) - c.Assert(resp.ServiceGroupId, DeepEquals, [][]byte{serviceGroupRawKV}) - } + c.Assert(client.mustGetAllServiceGroups(), DeepEquals, [][]byte{serviceGroupRawKV}) // Update service safe point { - req := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupRawKV, - ServiceId: serviceID1, - TTL: math.MaxInt64, - SafePoint: 100, - } - resp, err := cli.UpdateServiceSafePointByServiceGroup(s.ctx, req) - c.Assert(err, IsNil) + resp := client.mustUpdateServiceSafePoint(serviceGroupRawKV, serviceID1, math.MaxInt64, 100) expected := &gcpb.UpdateServiceSafePointByServiceGroupResponse{ Header: resp.GetHeader(), Succeeded: true, @@ -118,9 +167,7 @@ func (s *testGCSuite) TestGCService(c *C) { c.Assert(resp, DeepEquals, expected) // Safe point roll back - req.SafePoint = 99 - resp, err = cli.UpdateServiceSafePointByServiceGroup(s.ctx, req) - c.Assert(err, IsNil) + resp = client.mustUpdateServiceSafePoint(serviceGroupRawKV, serviceID1, math.MaxInt64, 99) c.Assert(resp.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_SAFEPOINT_ROLLBACK) c.Assert(resp.GetSucceeded(), IsFalse) } @@ -128,24 +175,12 @@ func (s *testGCSuite) TestGCService(c *C) { // Update GC safe point with revision mismatch { - reqGc := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupRawKV, - } - respGc, err := cli.GetMinServiceSafePointByServiceGroup(s.ctx, reqGc) - c.Assert(err, IsNil) - c.Assert(respGc.SafePoint, Equals, uint64(100)) - // c.Assert(respGc.Revision, Equals, ?): Revision value is not stable. Don't check it. - - reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupRawKV, - ServiceId: serviceID2, - TTL: math.MaxInt64, - SafePoint: 50, - } - respSvc, err := cli.UpdateServiceSafePointByServiceGroup(s.ctx, reqSvc) - c.Assert(err, IsNil) + safepoint, revision := client.mustGetMinServiceSafePoint(serviceGroupRawKV) + c.Assert(safepoint, Equals, uint64(100)) + // c.Assert(revision, Equals, ?): Revision value is not stable. Don't check it. + + // Add a new service safe point + respSvc := client.mustUpdateServiceSafePoint(serviceGroupRawKV, serviceID2, math.MaxInt64, 50) expected := &gcpb.UpdateServiceSafePointByServiceGroupResponse{ Header: respSvc.GetHeader(), Succeeded: true, @@ -155,14 +190,8 @@ func (s *testGCSuite) TestGCService(c *C) { } c.Assert(respSvc, DeepEquals, expected) - reqUpdate := &gcpb.UpdateGCSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupRawKV, - SafePoint: 100, - Revision: respGc.Revision, - } - respUpdate, err := cli.UpdateGCSafePointByServiceGroup(s.ctx, reqUpdate) - c.Assert(err, IsNil) + // Revision mismatch + respUpdate := client.mustUpdateGCSafePoint(serviceGroupRawKV, 100, revision) c.Assert(respUpdate.Succeeded, IsFalse) c.Assert(respUpdate.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_REVISION_MISMATCH) } @@ -170,23 +199,10 @@ func (s *testGCSuite) TestGCService(c *C) { // Retry update GC safe point { - reqGc := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupRawKV, - } - respGc, err := cli.GetMinServiceSafePointByServiceGroup(s.ctx, reqGc) - c.Assert(err, IsNil) - c.Assert(respGc.SafePoint, Equals, uint64(50)) - - reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupRawKV, - ServiceId: serviceID2, - TTL: math.MaxInt64, - SafePoint: 80, - } - respSvc, err := cli.UpdateServiceSafePointByServiceGroup(s.ctx, reqSvc) - c.Assert(err, IsNil) + safepoint, revision := client.mustGetMinServiceSafePoint(serviceGroupRawKV) + c.Assert(safepoint, Equals, uint64(50)) + + respSvc := client.mustUpdateServiceSafePoint(serviceGroupRawKV, serviceID2, math.MaxInt64, 80) expected := &gcpb.UpdateServiceSafePointByServiceGroupResponse{ Header: respSvc.GetHeader(), Succeeded: true, @@ -196,21 +212,12 @@ func (s *testGCSuite) TestGCService(c *C) { } c.Assert(respSvc, DeepEquals, expected) - reqUpdate := &gcpb.UpdateGCSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupRawKV, - SafePoint: 50, - Revision: respGc.Revision, - } - respUpdate, err := cli.UpdateGCSafePointByServiceGroup(s.ctx, reqUpdate) - c.Assert(err, IsNil) + respUpdate := client.mustUpdateGCSafePoint(serviceGroupRawKV, 50, revision) c.Assert(respUpdate.Succeeded, IsTrue) c.Assert(respUpdate.GetNewSafePoint(), Equals, uint64(50)) // GC safe point roll back - reqUpdate.SafePoint = 49 - respUpdate, err = cli.UpdateGCSafePointByServiceGroup(s.ctx, reqUpdate) - c.Assert(err, IsNil) + respUpdate = client.mustUpdateGCSafePoint(serviceGroupRawKV, 49, revision) c.Assert(respUpdate.Succeeded, IsFalse) c.Assert(respUpdate.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_SAFEPOINT_ROLLBACK) } @@ -218,80 +225,44 @@ func (s *testGCSuite) TestGCService(c *C) { // Remove svc2 { - reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupRawKV, - ServiceId: serviceID2, - TTL: 0, - } - respSvc, err := cli.UpdateServiceSafePointByServiceGroup(s.ctx, reqSvc) - c.Assert(err, IsNil) + respSvc := client.mustUpdateServiceSafePoint(serviceGroupRawKV, serviceID2, 0, 0) expected := &gcpb.UpdateServiceSafePointByServiceGroupResponse{ Header: respSvc.GetHeader(), Succeeded: true, } c.Assert(respSvc, DeepEquals, expected) - reqGc := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupRawKV, - } - respGc, err := cli.GetMinServiceSafePointByServiceGroup(s.ctx, reqGc) - c.Assert(err, IsNil) - c.Assert(respGc.SafePoint, Equals, uint64(100)) + safepoint, _ := client.mustGetMinServiceSafePoint(serviceGroupRawKV) + c.Assert(safepoint, Equals, uint64(100)) } // now: svc1: 100, gc: 50 // Add svc2 with safe point roll back { - reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupRawKV, - ServiceId: serviceID2, - TTL: math.MaxInt64, - SafePoint: 49, - } - respSvc, err := cli.UpdateServiceSafePointByServiceGroup(s.ctx, reqSvc) - c.Assert(err, IsNil) + respSvc := client.mustUpdateServiceSafePoint(serviceGroupRawKV, serviceID2, math.MaxInt64, 49) c.Assert(respSvc.Succeeded, IsFalse) c.Assert(respSvc.GetHeader().GetError().GetType(), Equals, gcpb.ErrorType_SAFEPOINT_ROLLBACK) } // Another service group with no service safe point { - reqGc := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupTxnKV, - } - respGc, err := cli.GetMinServiceSafePointByServiceGroup(s.ctx, reqGc) - c.Assert(err, IsNil) - c.Assert(respGc.SafePoint, Equals, uint64(0)) - c.Assert(respGc.Revision, Equals, int64(-1)) - - reqUpdate := &gcpb.UpdateGCSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupTxnKV, - SafePoint: 100, - Revision: -1, - } - respUpdate, err := cli.UpdateGCSafePointByServiceGroup(s.ctx, reqUpdate) - c.Assert(err, IsNil) + safepoint, revision := client.mustGetMinServiceSafePoint(serviceGroupTxnKV) + c.Assert(safepoint, Equals, uint64(0)) + c.Assert(revision, Equals, int64(-1)) + + respUpdate := client.mustUpdateGCSafePoint(serviceGroupTxnKV, 100, -1) c.Assert(respUpdate.Succeeded, IsTrue) c.Assert(respUpdate.GetNewSafePoint(), Equals, uint64(100)) } // Get all service group GC safe points { - req := &gcpb.GetAllServiceGroupGCSafePointsRequest{ - Header: newRequestHeader(clusterID), - } - resp, err := cli.GetAllServiceGroupGCSafePoints(s.ctx, req) - c.Assert(err, IsNil) + safepoints := client.mustGetAllGCSafePoint() expected := []*gcpb.ServiceGroupSafePoint{ {ServiceGroupId: serviceGroupRawKV, SafePoint: 50}, {ServiceGroupId: serviceGroupTxnKV, SafePoint: 100}, } - c.Assert(resp.GetSafePoints(), DeepEquals, expected) + c.Assert(safepoints, DeepEquals, expected) } } @@ -299,39 +270,35 @@ func (s *testGCSuite) TestConcurrency(c *C) { count := 500 concurrency := 10 - svc, _, cluster, clusterID := s.mustNewGCService(c) + addr, cluster, clusterID := s.mustNewGCService(c) defer cluster.Destroy() + newClient := func() testClient { + return testClient{ + cli: testutil.MustNewGCClient(c, addr), + clusterID: clusterID, + c: c, + ctx: s.ctx, + } + } + serviceGroupID := []byte(endpoint.ServiceGroupRawKVDefault) closeCh := make(chan struct{}) - updateGcSafePoint := func(safepoint uint64, revision int64) { - reqUpdate := &gcpb.UpdateGCSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupID, - SafePoint: safepoint, - Revision: revision, - } - _, err := svc.UpdateGCSafePointByServiceGroup(s.ctx, reqUpdate) - c.Assert(err, IsNil) - + { // Initialize GC safe point to make sure that tikvThread will get a valid safe point. + client := newClient() + client.mustUpdateGCSafePoint(serviceGroupID, 0, -1) } - updateGcSafePoint(0, -1) gcWorkerThread := func() { + client := newClient() for { - reqMin := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupID, - } - respMin, err := svc.GetMinServiceSafePointByServiceGroup(s.ctx, reqMin) - c.Assert(err, IsNil) - - if respMin.SafePoint == 0 { + safepoint, revision := client.mustGetMinServiceSafePoint(serviceGroupID) + if safepoint == 0 { continue } - updateGcSafePoint(respMin.SafePoint, respMin.Revision) + client.mustUpdateGCSafePoint(serviceGroupID, safepoint, revision) select { case <-closeCh: @@ -341,44 +308,22 @@ func (s *testGCSuite) TestConcurrency(c *C) { } } - updateSvcSafePoint := func(svcName string, safepoint uint64) { - reqSvc := &gcpb.UpdateServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupID, - ServiceId: []byte(svcName), - TTL: math.MaxInt64, - SafePoint: safepoint, - } - respSvc, err := svc.UpdateServiceSafePointByServiceGroup(s.ctx, reqSvc) - c.Assert(err, IsNil) - c.Assert(respSvc.Succeeded, IsTrue) - } - svcThread := func(svcName string) { + client := newClient() for i := 1; i <= count; i++ { - updateSvcSafePoint(svcName, uint64(i*10)) + client.mustUpdateServiceSafePoint(serviceGroupID, []byte(svcName), math.MaxInt64, uint64(i*10)) } } tikvThread := func() { + client := newClient() for { - reqGc := &gcpb.GetAllServiceGroupGCSafePointsRequest{ - Header: newRequestHeader(clusterID), - } - respGc, err := svc.GetAllServiceGroupGCSafePoints(s.ctx, reqGc) - c.Assert(err, IsNil) - c.Assert(len(respGc.GetSafePoints()), Equals, 1) - - gcSafePoint := respGc.GetSafePoints()[0].SafePoint - - reqMin := &gcpb.GetMinServiceSafePointByServiceGroupRequest{ - Header: newRequestHeader(clusterID), - ServiceGroupId: serviceGroupID, - } - respMin, err := svc.GetMinServiceSafePointByServiceGroup(s.ctx, reqMin) - c.Assert(err, IsNil) + safepoints := client.mustGetAllGCSafePoint() + c.Assert(len(safepoints), Equals, 1) + gcSafePoint := safepoints[0].SafePoint - c.Assert(gcSafePoint <= respMin.SafePoint, IsTrue) + svcSafePoint, _ := client.mustGetMinServiceSafePoint(serviceGroupID) + c.Assert(gcSafePoint <= svcSafePoint, IsTrue) select { case <-closeCh: