-
Notifications
You must be signed in to change notification settings - Fork 387
/
attribution.go
361 lines (338 loc) · 10.3 KB
/
attribution.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package satellitedb
import (
"context"
"database/sql"
"errors"
"time"
"github.com/zeebo/errs"
"storj.io/common/uuid"
"storj.io/storj/satellite/attribution"
"storj.io/storj/satellite/satellitedb/dbx"
)
const (
// I can see how keeping around the query to only get attribution for one partner might be good,
// but that means we need to maintain both queries. Maybe we should get rid of this one.
valueAttrQuery = `
-- A union of both the storage tally and bandwidth rollups.
-- Should be 1 row per project/bucket by partner within the timeframe specified
SELECT
o.partner_id as partner_id,
o.user_agent as user_agent,
o.project_id as project_id,
o.bucket_name as bucket_name,
SUM(o.total) as total_byte_hours,
SUM(o.remote) as remote_byte_hours,
SUM(o.inline) as inline_byte_hours,
SUM(o.segments) as segment_hours,
SUM(o.objects) as object_hours,
SUM(o.settled) as settled,
COALESCE(SUM(o.hours),0) as hours
FROM
(
-- SUM the storage and hours
-- Hours are used to calculate byte hours above
SELECT
bsti.partner_id as partner_id,
bsti.user_agent as user_agent,
bsto.project_id as project_id,
bsto.bucket_name as bucket_name,
SUM(bsto.total_bytes) as total,
SUM(bsto.remote) as remote,
SUM(bsto.inline) as inline,
SUM(bsto.total_segments_count) as segments,
SUM(bsto.object_count) as objects,
0 as settled,
count(1) as hours
FROM
(
-- Collapse entries by the latest record in the hour
-- If there are more than 1 records within the hour, only the latest will be considered
SELECT
va.partner_id,
va.user_agent,
date_trunc('hour', bst.interval_start) as hours,
bst.project_id,
bst.bucket_name,
MAX(bst.interval_start) as max_interval
FROM
bucket_storage_tallies bst
RIGHT JOIN value_attributions va ON (
bst.project_id = va.project_id
AND bst.bucket_name = va.bucket_name
)
WHERE
va.partner_id = ?
AND va.user_agent = ?
AND bst.interval_start >= ?
AND bst.interval_start < ?
GROUP BY
va.partner_id,
va.user_agent,
bst.project_id,
bst.bucket_name,
date_trunc('hour', bst.interval_start)
ORDER BY
max_interval DESC
) bsti
INNER JOIN bucket_storage_tallies bsto ON (
bsto.project_id = bsti.project_id
AND bsto.bucket_name = bsti.bucket_name
AND bsto.interval_start = bsti.max_interval
)
GROUP BY
bsti.partner_id,
bsti.user_agent,
bsto.project_id,
bsto.bucket_name
UNION
-- SUM the bandwidth for the timeframe specified grouping by the partner_id, user_agent, project_id, and bucket_name
SELECT
va.partner_id as partner_id,
va.user_agent as user_agent,
bbr.project_id as project_id,
bbr.bucket_name as bucket_name,
0 as total,
0 as remote,
0 as inline,
0 as segments,
0 as objects,
SUM(settled)::integer as settled,
NULL as hours
FROM
bucket_bandwidth_rollups bbr
INNER JOIN value_attributions va ON (
bbr.project_id = va.project_id
AND bbr.bucket_name = va.bucket_name
)
WHERE
va.partner_id = ?
AND va.user_agent = ?
AND bbr.interval_start >= ?
AND bbr.interval_start < ?
-- action 2 is GET
AND bbr.action = 2
GROUP BY
va.partner_id,
va.user_agent,
bbr.project_id,
bbr.bucket_name
) AS o
GROUP BY
o.partner_id,
o.user_agent,
o.project_id,
o.bucket_name;
`
allValueAttrQuery = `
-- A union of both the storage tally and bandwidth rollups.
-- Should be 1 row per project/bucket by partner within the timeframe specified
SELECT
o.partner_id as partner_id,
o.user_agent as user_agent,
o.project_id as project_id,
o.bucket_name as bucket_name,
SUM(o.total) as total_byte_hours,
SUM(o.remote) as remote_byte_hours,
SUM(o.inline) as inline_byte_hours,
SUM(o.segments) as segment_hours,
SUM(o.objects) as object_hours,
SUM(o.settled) as settled,
COALESCE(SUM(o.hours),0) as hours
FROM
(
-- SUM the storage and hours
-- Hours are used to calculate byte hours above
SELECT
bsti.partner_id as partner_id,
bsti.user_agent as user_agent,
bsto.project_id as project_id,
bsto.bucket_name as bucket_name,
SUM(bsto.total_bytes) as total,
SUM(bsto.remote) as remote,
SUM(bsto.inline) as inline,
SUM(bsto.total_segments_count) as segments,
SUM(bsto.object_count) as objects,
0 as settled,
count(1) as hours
FROM
(
-- Collapse entries by the latest record in the hour
-- If there are more than 1 records within the hour, only the latest will be considered
SELECT
va.partner_id,
va.user_agent,
date_trunc('hour', bst.interval_start) as hours,
bst.project_id,
bst.bucket_name,
MAX(bst.interval_start) as max_interval
FROM
bucket_storage_tallies bst
INNER JOIN value_attributions va ON (
bst.project_id = va.project_id
AND bst.bucket_name = va.bucket_name
)
WHERE
bst.interval_start >= $1
AND bst.interval_start < $2
GROUP BY
va.partner_id,
va.user_agent,
bst.project_id,
bst.bucket_name,
date_trunc('hour', bst.interval_start)
ORDER BY
max_interval DESC
) bsti
INNER JOIN bucket_storage_tallies bsto ON (
bsto.project_id = bsti.project_id
AND bsto.bucket_name = bsti.bucket_name
AND bsto.interval_start = bsti.max_interval
)
GROUP BY
bsti.partner_id,
bsti.user_agent,
bsto.project_id,
bsto.bucket_name
UNION
-- SUM the bandwidth for the timeframe specified grouping by the partner_id, user_agent, project_id, and bucket_name
SELECT
va.partner_id as partner_id,
va.user_agent as user_agent,
bbr.project_id as project_id,
bbr.bucket_name as bucket_name,
0 as total,
0 as remote,
0 as inline,
0 as segments,
0 as objects,
SUM(settled)::integer as settled,
null as hours
FROM
bucket_bandwidth_rollups bbr
INNER JOIN value_attributions va ON (
bbr.project_id = va.project_id
AND bbr.bucket_name = va.bucket_name
)
WHERE
bbr.interval_start >= $1
AND bbr.interval_start < $2
-- action 2 is GET
AND bbr.action = 2
GROUP BY
va.partner_id,
va.user_agent,
bbr.project_id,
bbr.bucket_name
) AS o
GROUP BY
o.partner_id,
o.user_agent,
o.project_id,
o.bucket_name;
`
)
type attributionDB struct {
db *satelliteDB
}
// Get reads the partner info.
func (keys *attributionDB) Get(ctx context.Context, projectID uuid.UUID, bucketName []byte) (info *attribution.Info, err error) {
defer mon.Task()(&ctx)(&err)
dbxInfo, err := keys.db.Get_ValueAttribution_By_ProjectId_And_BucketName(ctx,
dbx.ValueAttribution_ProjectId(projectID[:]),
dbx.ValueAttribution_BucketName(bucketName),
)
if errors.Is(err, sql.ErrNoRows) {
return nil, attribution.ErrBucketNotAttributed.New("%q", bucketName)
}
if err != nil {
return nil, Error.Wrap(err)
}
return attributionFromDBX(dbxInfo)
}
// Insert implements create partner info.
func (keys *attributionDB) Insert(ctx context.Context, info *attribution.Info) (_ *attribution.Info, err error) {
defer mon.Task()(&ctx)(&err)
err = keys.db.QueryRowContext(ctx, `
INSERT INTO value_attributions (project_id, bucket_name, partner_id, user_agent, last_updated)
VALUES ($1, $2, $3, $4, now())
ON CONFLICT (project_id, bucket_name) DO NOTHING
RETURNING last_updated
`, info.ProjectID[:], info.BucketName, info.PartnerID[:], info.UserAgent).Scan(&info.CreatedAt)
// TODO when sql.ErrNoRows is returned then CreatedAt is not set
if errors.Is(err, sql.ErrNoRows) {
return info, nil
}
if err != nil {
return nil, Error.Wrap(err)
}
return info, nil
}
// QueryAttribution queries partner bucket attribution data.
func (keys *attributionDB) QueryAttribution(ctx context.Context, partnerID uuid.UUID, userAgent []byte, start time.Time, end time.Time) (_ []*attribution.BucketUsage, err error) {
defer mon.Task()(&ctx)(&err)
rows, err := keys.db.DB.QueryContext(ctx, keys.db.Rebind(valueAttrQuery), partnerID[:], userAgent, start.UTC(), end.UTC(), partnerID[:], userAgent, start.UTC(), end.UTC())
if err != nil {
return nil, Error.Wrap(err)
}
defer func() { err = errs.Combine(err, rows.Close()) }()
results := []*attribution.BucketUsage{}
for rows.Next() {
r := &attribution.BucketUsage{}
var inline, remote float64
err := rows.Scan(&r.PartnerID, &r.UserAgent, &r.ProjectID, &r.BucketName, &r.ByteHours, &inline, &remote, &r.SegmentHours, &r.ObjectHours, &r.EgressData, &r.Hours)
if err != nil {
return results, Error.Wrap(err)
}
if r.ByteHours == 0 {
r.ByteHours = inline + remote
}
results = append(results, r)
}
return results, Error.Wrap(rows.Err())
}
// QueryAllAttribution queries all partner bucket attribution data.
func (keys *attributionDB) QueryAllAttribution(ctx context.Context, start time.Time, end time.Time) (_ []*attribution.BucketUsage, err error) {
defer mon.Task()(&ctx)(&err)
rows, err := keys.db.DB.QueryContext(ctx, keys.db.Rebind(allValueAttrQuery), start.UTC(), end.UTC())
if err != nil {
return nil, Error.Wrap(err)
}
defer func() { err = errs.Combine(err, rows.Close()) }()
results := []*attribution.BucketUsage{}
for rows.Next() {
r := &attribution.BucketUsage{}
var inline, remote float64
err := rows.Scan(&r.PartnerID, &r.UserAgent, &r.ProjectID, &r.BucketName, &r.ByteHours, &inline, &remote, &r.SegmentHours, &r.ObjectHours, &r.EgressData, &r.Hours)
if err != nil {
return results, Error.Wrap(err)
}
if r.ByteHours == 0 {
r.ByteHours = inline + remote
}
results = append(results, r)
}
return results, Error.Wrap(rows.Err())
}
func attributionFromDBX(info *dbx.ValueAttribution) (*attribution.Info, error) {
partnerID, err := uuid.FromBytes(info.PartnerId)
if err != nil {
return nil, Error.Wrap(err)
}
userAgent := info.UserAgent
if err != nil {
return nil, Error.Wrap(err)
}
projectID, err := uuid.FromBytes(info.ProjectId)
if err != nil {
return nil, Error.Wrap(err)
}
return &attribution.Info{
ProjectID: projectID,
BucketName: info.BucketName,
PartnerID: partnerID,
UserAgent: userAgent,
CreatedAt: info.LastUpdated,
}, nil
}