Skip to content

Commit

Permalink
tikvclient: add metrics for gRPC connection transient failure (#12084) (
Browse files Browse the repository at this point in the history
  • Loading branch information
lonng authored and sre-bot committed Sep 9, 2019
1 parent f396ce1 commit ebf7d8f
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 2 deletions.
27 changes: 27 additions & 0 deletions metrics/gprc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package metrics

import "github.com/prometheus/client_golang/prometheus"

// Metrics to monitor gRPC service
var (
GRPCConnTransientFailureCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "tidb",
Subsystem: "grpc",
Name: "connection_transient_failure_count",
Help: "Counter of gRPC connection transient failure",
}, []string{LblAddress, LblStore})
)
1 change: 1 addition & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,5 @@ func RegisterMetrics() {
prometheus.MustRegister(TotalQueryProcHistogram)
prometheus.MustRegister(TotalCopProcHistogram)
prometheus.MustRegister(TotalCopWaitHistogram)
prometheus.MustRegister(GRPCConnTransientFailureCounter)
}
2 changes: 2 additions & 0 deletions metrics/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,6 @@ const (
LblSQLType = "sql_type"
LblGeneral = "general"
LblInternal = "internal"
LblStore = "store"
LblAddress = "address"
)
2 changes: 1 addition & 1 deletion metrics/tikvclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ var (
Name: "request_seconds",
Help: "Bucketed histogram of sending request duration.",
Buckets: prometheus.ExponentialBuckets(0.0005, 2, 20), // 0.5ms ~ 524s
}, []string{LblType, "store"})
}, []string{LblType, LblStore})

TiKVCoprocessorHistogram = prometheus.NewHistogram(
prometheus.HistogramOpts{
Expand Down
8 changes: 7 additions & 1 deletion store/tikv/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"github.com/pingcap/tidb/util/logutil"
"golang.org/x/net/context"
"google.golang.org/grpc"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/keepalive"
)
Expand Down Expand Up @@ -251,7 +252,12 @@ func (c *rpcClient) SendRequest(ctx context.Context, addr string, req *tikvrpc.R
if err != nil {
return nil, errors.Trace(err)
}
client := tikvpb.NewTikvClient(connArray.Get())

clientConn := connArray.Get()
if state := clientConn.GetState(); state == connectivity.TransientFailure {
metrics.GRPCConnTransientFailureCounter.WithLabelValues(addr, storeID).Inc()
}
client := tikvpb.NewTikvClient(clientConn)

if req.Type != tikvrpc.CmdCopStream {
ctx1, cancel := context.WithTimeout(ctx, timeout)
Expand Down

0 comments on commit ebf7d8f

Please sign in to comment.