/
remotewrite.go
185 lines (161 loc) · 5.27 KB
/
remotewrite.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
package cortex
import (
"bytes"
"context"
"fmt"
"net/http"
"strings"
"github.com/cortexproject/cortex/pkg/cortexpb"
"github.com/rancher/opni/pkg/auth/cluster"
"github.com/rancher/opni/pkg/auth/session"
"github.com/rancher/opni/pkg/config/v1beta1"
"github.com/rancher/opni/pkg/metrics"
"github.com/rancher/opni/pkg/util"
"github.com/rancher/opni/plugins/metrics/apis/remotewrite"
metricsutil "github.com/rancher/opni/plugins/metrics/pkg/util"
"github.com/weaveworks/common/user"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/trace"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/types/known/emptypb"
"log/slog"
)
type RemoteWriteForwarder struct {
remotewrite.UnsafeRemoteWriteServer
RemoteWriteForwarderConfig
interceptors map[string]RequestInterceptor
util.Initializer
}
var _ remotewrite.RemoteWriteServer = (*RemoteWriteForwarder)(nil)
type RemoteWriteForwarderConfig struct {
CortexClientSet ClientSet `validate:"required"`
Config *v1beta1.GatewayConfigSpec `validate:"required"`
Logger *slog.Logger `validate:"required"`
}
func (f *RemoteWriteForwarder) Initialize(conf RemoteWriteForwarderConfig) {
f.InitOnce(func() {
if err := metricsutil.Validate.Struct(conf); err != nil {
panic(err)
}
f.RemoteWriteForwarderConfig = conf
f.interceptors = map[string]RequestInterceptor{
"local": NewFederatingInterceptor(InterceptorConfig{
IdLabelName: metrics.LabelImpersonateAs,
}),
}
})
}
var passthrough = &passthroughInterceptor{}
func (f *RemoteWriteForwarder) Push(ctx context.Context, writeReq *cortexpb.WriteRequest) (_ *cortexpb.WriteResponse, pushErr error) {
if !f.Initialized() {
return nil, util.StatusError(codes.Unavailable)
}
clusterId := cluster.StreamAuthorizedID(ctx)
var interceptor RequestInterceptor = passthrough
attributes := session.StreamAuthorizedAttributes(ctx)
for _, attr := range attributes {
if i, ok := f.interceptors[attr.Name()]; ok {
interceptor = i
break
}
}
defer func() {
code := status.Code(pushErr)
cRemoteWriteRequests.Add(ctx, 1,
metric.WithAttributes(
attribute.String("cluster_id", clusterId),
attribute.Int("code", int(code)),
attribute.String("code_text", code.String()),
),
)
}()
payloadSize := int64(writeReq.Size())
cIngestBytesByID.Add(ctx, payloadSize,
metric.WithAttributes(
attribute.String("cluster_id", clusterId),
),
)
ctx, span := otel.Tracer("plugin_metrics").Start(ctx, "remoteWriteForwarder.Push",
trace.WithAttributes(attribute.String("clusterId", clusterId)))
defer span.End()
defer func() {
if pushErr != nil {
if s, ok := status.FromError(pushErr); ok {
code := s.Code()
if code == 400 {
// As a special case, status code 400 may indicate a success.
// Cortex handles a variety of cases where prometheus would normally
// return an error, such as duplicate or out of order samples. Cortex
// will return code 400 to prometheus, which prometheus will treat as
// a non-retriable error. In this case, the remote write status condition
// will be cleared as if the request succeeded.
if code == http.StatusBadRequest {
message := s.Message()
if strings.Contains(message, "out of bounds") ||
strings.Contains(message, "out of order sample") ||
strings.Contains(message, "duplicate sample for timestamp") ||
strings.Contains(message, "exemplars not ingested because series not already present") {
{
// clear the soft error
pushErr = nil
}
}
}
}
}
}
}()
var err error
ctx, err = user.InjectIntoGRPCRequest(user.InjectOrgID(ctx, clusterId))
if err != nil {
return nil, status.Errorf(codes.Internal, err.Error())
}
wr, err := interceptor.Intercept(ctx, writeReq, f.CortexClientSet.Distributor().Push)
cortexpb.ReuseSlice(writeReq.Timeseries)
return wr, err
}
func (f *RemoteWriteForwarder) SyncRules(ctx context.Context, payload *remotewrite.Payload) (_ *emptypb.Empty, syncErr error) {
if !f.Initialized() {
return nil, util.StatusError(codes.Unavailable)
}
clusterId := cluster.StreamAuthorizedID(ctx)
ctx, span := otel.Tracer("plugin_metrics").Start(ctx, "remoteWriteForwarder.SyncRules",
trace.WithAttributes(attribute.String("clusterId", clusterId)))
defer span.End()
defer func() {
if syncErr != nil {
f.Logger.With(
"err", syncErr,
"clusterId", clusterId,
).Error("error syncing rules to cortex")
}
}()
url := fmt.Sprintf(
"https://%s/api/v1/rules/%s",
f.Config.Cortex.Ruler.HTTPAddress,
"synced", // set the namespace to synced to differentiate from user rules
)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url,
bytes.NewReader(payload.Contents))
if err != nil {
return nil, err
}
if err := user.InjectOrgIDIntoHTTPRequest(user.InjectOrgID(ctx, clusterId), req); err != nil {
return nil, err
}
for k, v := range payload.Headers {
req.Header.Add(k, v)
}
resp, err := f.CortexClientSet.HTTP().Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode/100 != 2 {
return nil, status.Errorf(codes.Internal, "failed to sync rules: %v", resp.Status)
}
return &emptypb.Empty{}, nil
}