Skip to content

Commit

Permalink
feat: db and collection name tags in traces (#402)
Browse files Browse the repository at this point in the history
  • Loading branch information
pboros committed Aug 9, 2022
1 parent 401dcea commit 8046e7e
Show file tree
Hide file tree
Showing 23 changed files with 475 additions and 350 deletions.
9 changes: 9 additions & 0 deletions api/server/v1/api.go
Expand Up @@ -27,3 +27,12 @@ type Request interface {
type Response interface {
proto.Message
}

type RequestWithDb interface {
GetDb() string
}

type RequestWithDbAndCollection interface {
GetDb() string
GetCollection() string
}
28 changes: 0 additions & 28 deletions server/config/options.go
Expand Up @@ -34,7 +34,6 @@ type Config struct {
Search SearchConfig `yaml:"search" json:"search"`
Tracing TracingConfig `yaml:"tracing" json:"tracing"`
Profiling ProfilingConfig `yaml:"profiling" json:"profiling"`
Metrics MetricsConfig
FoundationDB FoundationDBConfig
Quota QuotaConfig
}
Expand Down Expand Up @@ -78,15 +77,6 @@ type ProfilingConfig struct {
EnableGoroutine bool `mapstructure:"enable_goroutine" yaml:"enable_goroutine" json:"enable_goroutine"`
}

type MetricsConfig struct {
// Global switch
Enabled bool `mapstructure:"enabled" yaml:"enabled" json:"enabled"`
// Individual metric group configs
Grpc GrpcMetricsConfig
Fdb FdbMetricsConfig
Search SearchMetricsConfig
}

type GrpcMetricsConfig struct {
Enabled bool `mapstructure:"enabled" yaml:"enabled" json:"enabled"`
Counters bool `mapstructure:"counters" yaml:"counters" json:"counters"`
Expand Down Expand Up @@ -145,24 +135,6 @@ var DefaultConfig = Config{
EnableCPU: true,
EnableHeap: true,
},
Metrics: MetricsConfig{
Enabled: true,
Grpc: GrpcMetricsConfig{
Enabled: true,
Counters: true,
ResponseTime: true,
},
Fdb: FdbMetricsConfig{
Enabled: true,
Counters: true,
ResponseTime: true,
},
Search: SearchMetricsConfig{
Enabled: true,
Counters: true,
ResponseTime: true,
},
},
Quota: QuotaConfig{
Enabled: false,
RateLimit: 1000, // requests per second
Expand Down
2 changes: 1 addition & 1 deletion server/main.go
Expand Up @@ -53,7 +53,7 @@ func main() {
log.Info().Str("version", util.Version).Msgf("Starting server")

var kvStore kv.KeyValueStore
if config.DefaultConfig.Metrics.Fdb.Enabled {
if config.DefaultConfig.Tracing.Enabled {
kvStore, err = kv.NewKeyValueStoreWithMetrics(&config.DefaultConfig.FoundationDB)
} else {
kvStore, err = kv.NewKeyValueStore(&config.DefaultConfig.FoundationDB)
Expand Down
7 changes: 5 additions & 2 deletions server/metrics/fdb.go
Expand Up @@ -16,12 +16,14 @@ package metrics

import (
"context"

"github.com/uber-go/tally"
)

var (
FdbRequests tally.Scope
FdbErrorRequests tally.Scope
FdbRespTime tally.Scope
MeasuredFdbRequests = []string{
"Delete",
"DeleteRange",
Expand All @@ -43,15 +45,15 @@ var (
func getFdbReqTags(reqMethodName string) map[string]string {
return map[string]string{
"method": reqMethodName,
"tigris_tenant": DefaultReportedTigrisTenant,
"tigris_tenant": UnknownValue,
}
}

func getFdbReqSpecificErrorTags(reqMethodName string, code string) map[string]string {
return map[string]string{
"method": reqMethodName,
"error_code": code,
"tigris_tenant": DefaultReportedTigrisTenant,
"tigris_tenant": UnknownValue,
}
}

Expand All @@ -66,4 +68,5 @@ func GetFdbSpecificErrorTags(ctx context.Context, reqMethodName string, code str
func InitializeFdbScopes() {
FdbRequests = FdbMetrics.SubScope("requests")
FdbErrorRequests = FdbRequests.SubScope("error")
FdbRespTime = FdbRequests.SubScope("resptime")
}
4 changes: 1 addition & 3 deletions server/metrics/fdb_test.go
Expand Up @@ -23,7 +23,7 @@ import (
)

func TestFdbMetrics(t *testing.T) {
config.DefaultConfig.Metrics.Fdb.Enabled = true
config.DefaultConfig.Tracing.Enabled = true
InitializeMetrics()

ctx := context.Background()
Expand All @@ -40,7 +40,6 @@ func TestFdbMetrics(t *testing.T) {
GetFdbSpecificErrorTags(ctx, "Insert", "3"),
}

config.DefaultConfig.Metrics.Fdb.Counters = true
t.Run("Test FDB counters", func(t *testing.T) {
for _, tags := range testNormalTags {
FdbRequests.Tagged(tags).Counter("ok").Inc(1)
Expand All @@ -51,7 +50,6 @@ func TestFdbMetrics(t *testing.T) {
}
})

config.DefaultConfig.Metrics.Fdb.ResponseTime = true
t.Run("Test FDB histograms", func(t *testing.T) {
testHistogramTags := GetFdbTags(ctx, "Insert")
defer FdbMetrics.Tagged(testHistogramTags).Histogram("histogram", tally.DefaultBuckets).Start().Stop()
Expand Down
23 changes: 11 additions & 12 deletions server/metrics/metrics.go
Expand Up @@ -29,9 +29,9 @@ import (
var (
root tally.Scope
Reporter promreporter.Reporter
server tally.Scope
// GRPC and HTTP related metric scopes
Requests tally.Scope
OkRequests tally.Scope
ErrorRequests tally.Scope
RequestsRespTime tally.Scope
// Fdb related metric scopes
Expand All @@ -41,11 +41,14 @@ var (
)

func GetGlobalTags() map[string]string {
return map[string]string{
res := map[string]string{
"service": util.Service,
"env": config.GetEnvironment(),
"version": util.Version,
}
if res["version"] = util.Version; res["version"] == "" {
res["version"] = "dev"
}
return res
}

func InitializeMetrics() io.Closer {
Expand All @@ -59,18 +62,14 @@ func InitializeMetrics() io.Closer {
// Panics with .
Separator: promreporter.DefaultSeparator,
}, 1*time.Second)
// Request level metrics (HTTP and GRPC)
// metric names: tigris_server
if config.DefaultConfig.Metrics.Grpc.Enabled {
if config.DefaultConfig.Tracing.Enabled {
// Request level metrics (HTTP and GRPC)
Requests = root.SubScope("requests")
InitializeRequestScopes()
}
// FDB level metrics
if config.DefaultConfig.Metrics.Fdb.Enabled {
// FDB level metrics
FdbMetrics = root.SubScope("fdb")
InitializeFdbScopes()
}
// Search level metrics
if config.DefaultConfig.Metrics.Search.Enabled {
// Search level metrics
SearchMetrics = root.SubScope("search")
InitializeSearchScopes()
}
Expand Down
26 changes: 15 additions & 11 deletions server/metrics/requests.go
Expand Up @@ -25,9 +25,9 @@ import (
)

const (
AdminServiceName = "tigrisdata.admin.v1.Admin"
SystemTigrisTenantName = "system"
DefaultReportedTigrisTenant = "unknown"
AdminServiceName = "tigrisdata.admin.v1.Admin"
SystemTigrisTenantName = "system"
UnknownValue = "unknown"
)

type RequestEndpointMetadata struct {
Expand All @@ -40,7 +40,7 @@ func getNamespaceName(ctx context.Context) string {
if namespace, _ := request.GetNamespace(ctx); namespace != "" {
return namespace
}
return DefaultReportedTigrisTenant
return UnknownValue
}

func newRequestEndpointMetadata(ctx context.Context, serviceName string, methodInfo grpc.MethodInfo) RequestEndpointMetadata {
Expand All @@ -66,13 +66,17 @@ func (r *RequestEndpointMetadata) GetTags() map[string]string {
"grpc_service": r.serviceName,
"tigris_tenant": r.namespaceName,
"grpc_service_type": r.GetServiceType(),
"db": UnknownValue,
"collection": UnknownValue,
}
} else {
return map[string]string{
"grpc_method": r.methodInfo.Name,
"grpc_service": r.serviceName,
"tigris_tenant": r.namespaceName,
"grpc_service_type": r.GetServiceType(),
"db": UnknownValue,
"collection": UnknownValue,
}
}
}
Expand All @@ -85,14 +89,17 @@ func (r *RequestEndpointMetadata) GetSpecificErrorTags(source string, code strin
"error_source": source,
"error_code": code,
"tigris_tenant": SystemTigrisTenantName,
"db": UnknownValue,
"collection": UnknownValue,
}
} else {
return map[string]string{
"grpc_method": r.methodInfo.Name,
"grpc_service": r.serviceName,
"error_source": source,
"error_code": code,
"tigris_tenant": DefaultReportedTigrisTenant,
"tigris_tenant": UnknownValue,
"db": UnknownValue,
}
}
}
Expand Down Expand Up @@ -123,15 +130,12 @@ func GetGrpcEndPointMetadataFromFullMethod(ctx context.Context, fullMethod strin
}

func InitializeRequestScopes() {
server = root.SubScope("server")
// metric names: tigris_server_requests
if config.DefaultConfig.Metrics.Grpc.Enabled && config.DefaultConfig.Metrics.Grpc.Counters {
Requests = server.SubScope("requests")
if config.DefaultConfig.Tracing.Enabled {
OkRequests = Requests.SubScope("requests")
// metric names: tigris_server_requests_errors
ErrorRequests = Requests.SubScope("error")
}
if config.DefaultConfig.Metrics.Grpc.Enabled && config.DefaultConfig.Metrics.Grpc.ResponseTime {
// metric names: tigirs_server_requests_resptime
RequestsRespTime = server.SubScope("resptime")
RequestsRespTime = Requests.SubScope("resptime")
}
}
21 changes: 1 addition & 20 deletions server/metrics/requests_test.go
Expand Up @@ -26,9 +26,7 @@ import (

func TestGRPCMetrics(t *testing.T) {
InitializeMetrics()
config.DefaultConfig.Metrics.Grpc.Enabled = true
config.DefaultConfig.Metrics.Grpc.Counters = true
config.DefaultConfig.Metrics.Grpc.ResponseTime = true
config.DefaultConfig.Tracing.Enabled = true

svcName := "tigrisdata.v1.Tigris"
unaryMethodName := "TestUnaryMethod"
Expand Down Expand Up @@ -58,23 +56,6 @@ func TestGRPCMetrics(t *testing.T) {
assert.Equal(t, streamMetadata, streamingEndpointMetadata)
})

t.Run("Test GetPreinitializedTagsFromFullMethod", func(t *testing.T) {
unaryTags := unaryEndPointMetadata.GetTags()
assert.Equal(t, unaryTags, map[string]string{
"grpc_method": unaryMethodInfo.Name,
"grpc_service": "tigrisdata.v1.Tigris",
"grpc_service_type": "unary",
"tigris_tenant": DefaultReportedTigrisTenant,
})
streamTags := streamingEndpointMetadata.GetTags()
assert.Equal(t, streamTags, map[string]string{
"grpc_method": streamingMethodInfo.Name,
"grpc_service": "tigrisdata.v1.Tigris",
"grpc_service_type": "stream",
"tigris_tenant": DefaultReportedTigrisTenant,
})
})

t.Run("Test full method names", func(t *testing.T) {
assert.Equal(t, unaryEndPointMetadata.getFullMethod(), fmt.Sprintf("/%s/%s", svcName, unaryMethodName))
assert.Equal(t, streamingEndpointMetadata.getFullMethod(), fmt.Sprintf("/%s/%s", svcName, streamingMethodName))
Expand Down
7 changes: 6 additions & 1 deletion server/metrics/search.go
Expand Up @@ -16,17 +16,22 @@ package metrics

import (
"context"

"github.com/uber-go/tally"
)

var (
SearchRequests tally.Scope
SearchOkRequests tally.Scope
SearchErrorRequests tally.Scope
SearchRespTime tally.Scope
)

func InitializeSearchScopes() {
SearchRequests = SearchMetrics.SubScope("requests")
SearchOkRequests = SearchRequests.SubScope("ok")
SearchErrorRequests = SearchRequests.SubScope("error")
SearchRespTime = SearchRequests.SubScope("resptime")
}

func GetSearchTags(ctx context.Context, reqMethodName string) map[string]string {
Expand All @@ -36,6 +41,6 @@ func GetSearchTags(ctx context.Context, reqMethodName string) map[string]string
func getSearchReqTags(reqMethodName string) map[string]string {
return map[string]string{
"method": reqMethodName,
"tigris_tenant": DefaultReportedTigrisTenant,
"tigris_tenant": UnknownValue,
}
}
7 changes: 3 additions & 4 deletions server/metrics/search_test.go
Expand Up @@ -16,13 +16,14 @@ package metrics

import (
"context"
"testing"

"github.com/tigrisdata/tigris/server/config"
"github.com/uber-go/tally"
"testing"
)

func TestSearchMetrics(t *testing.T) {
config.DefaultConfig.Metrics.Search.Enabled = true
config.DefaultConfig.Tracing.Enabled = true
InitializeMetrics()

ctx := context.Background()
Expand All @@ -36,15 +37,13 @@ func TestSearchMetrics(t *testing.T) {
GetSearchTags(ctx, "Search"),
}

config.DefaultConfig.Metrics.Search.Counters = true
t.Run("Test Search counters", func(t *testing.T) {
for _, tags := range testNormalTags {
SearchRequests.Tagged(tags).Counter("ok").Inc(1)
SearchErrorRequests.Tagged(tags).Counter("unknown").Inc(1)
}
})

config.DefaultConfig.Metrics.Search.ResponseTime = true
t.Run("Test Search histograms", func(t *testing.T) {
testHistogramTags := GetSearchTags(ctx, "IndexDocuments")
defer SearchMetrics.Tagged(testHistogramTags).Histogram("histogram", tally.DefaultBuckets).Start().Stop()
Expand Down

0 comments on commit 8046e7e

Please sign in to comment.