Skip to content

Commit

Permalink
Add /v1/admin/asset-resize-errors endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
talal committed Mar 25, 2020
1 parent a027292 commit 7aae894
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 19 deletions.
37 changes: 37 additions & 0 deletions docs/api-spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ This document uses the terminology defined in the [README.md](../README.md#termi
* [GET /v1/operations/recently-succeeded](#get-v1operationsrecently-succeeded)
* [GET /v1/admin/resource-scrape-errors](#get-v1adminresource-scrape-errors)
* [GET /v1/admin/asset-scrape-errors](#get-v1adminasset-scrape-errors)
* [GET /v1/admin/asset-resize-errors](#get-v1adminasset-resize-errors)

## GET /v1/projects/:id

Expand Down Expand Up @@ -494,3 +495,39 @@ fields:
this asset belongs. `project_id` is only shown for non-domain resources.

For each asset, at most one error will be listed (the most recent one).

## GET /v1/admin/asset-resize-errors

Shows information about asset resize errors. This is intended to give operators
a view of resize errors for all assets across all resources.

Returns `200` on success and a JSON response body like this:

```json
{
"asset_resize_errors": [
{
"asset_id": "c991eb08-e14e-4559-94d6-c9c390c18776",
"asset_type": "nfs-shares",
"domain_id": "481b2af2-d816-4453-8743-a05382e7d1ce",
"finished": {
"at": 1557144789,
"error": "datacenter is on fire"
},
"new_size": 1025,
"old_size": 1024,
"project_id": "89b76fc7-78fa-454c-b23b-674bd7589390"
}
]
}
```

Most fields on the top level have the same meaning as for `GET
/v1/projects/:id/assets/:type/:id` (see above), except for the following
additional fields:

- `asset_id` identifies the concerning asset.
- `asset_type`, `project_id` and `domain_id` identify the resource to which
this asset belongs. `project_id` is only shown for non-domain resources.

For each asset, at most one error will be listed (the most recent one).
3 changes: 3 additions & 0 deletions internal/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ func (h *handler) BuildRouter() http.Handler {
router.Methods("GET").
Path(`/v1/admin/asset-scrape-errors`).
HandlerFunc(h.GetAssetScrapeErrors)
router.Methods("GET").
Path(`/v1/admin/asset-resize-errors`).
HandlerFunc(h.GetAssetResizeErrors)

return sre.Instrument(router)
}
Expand Down
2 changes: 1 addition & 1 deletion internal/api/assets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ func TestGetAsset(baseT *testing.T) {
},
{
"reason": "critical",
"state": "failed",
"state": "errored",
"old_size": 1024,
"new_size": 1025,
"created": assert.JSONObject{
Expand Down
94 changes: 84 additions & 10 deletions internal/api/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,20 @@ type ResourceScrapeError struct {
Checked Checked `json:"checked"`
}

// AssetScrapeError is how a resource's scrape error appears in API.
type AssetScrapeError struct {
AssetUUID string `json:"asset_id"`
ProjectUUID string `json:"project_id,omitempty"`
DomainUUID string `json:"domain_id"`
AssetType string `json:"asset_type"`
Checked Checked `json:"checked"`
// AssetError is how a resource's error appears in API.
type AssetError struct {
AssetUUID string `json:"asset_id"`
ProjectUUID string `json:"project_id,omitempty"`
DomainUUID string `json:"domain_id"`
AssetType string `json:"asset_type"`

// this field is only used in scrape errors
Checked *Checked `json:"checked,omitempty"`

// these fields are only used in resize errors
OldSize uint64 `json:"old_size,omitempty"`
NewSize uint64 `json:"new_size,omitempty"`
Finished *Checked `json:"finished,omitempty"`
}

///////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -102,7 +109,7 @@ func (h handler) GetAssetScrapeErrors(w http.ResponseWriter, r *http.Request) {
}

var result struct {
AssetScrapeErrors []AssetScrapeError `json:"asset_scrape_errors"`
AssetScrapeErrors []AssetError `json:"asset_scrape_errors"`
}

var dbResources []db.Resource
Expand Down Expand Up @@ -131,12 +138,12 @@ func (h handler) GetAssetScrapeErrors(w http.ResponseWriter, r *http.Request) {

for _, a := range dbAssets {
result.AssetScrapeErrors = append(result.AssetScrapeErrors,
AssetScrapeError{
AssetError{
AssetUUID: a.UUID,
ProjectUUID: projectID,
DomainUUID: res.DomainUUID,
AssetType: string(res.AssetType),
Checked: Checked{
Checked: &Checked{
AtUnix: a.CheckedAt.Unix(),
ErrorMessage: a.ScrapeErrorMessage,
},
Expand All @@ -146,3 +153,70 @@ func (h handler) GetAssetScrapeErrors(w http.ResponseWriter, r *http.Request) {

respondwith.JSON(w, http.StatusOK, result)
}

func (h handler) GetAssetResizeErrors(w http.ResponseWriter, r *http.Request) {
sre.IdentifyEndpoint(r, "/v1/admin/asset-resize-errors")
_, token := h.CheckToken(w, r)
if token == nil {
return
}
if !token.Require(w, "cluster:access") {
return
}

var result struct {
AssetResizeErrors []AssetError `json:"asset_resize_errors"`
}

var dbResources []db.Resource
_, err := h.DB.Select(&dbResources,
`SELECT * FROM resources ORDER BY id`)
if respondwith.ErrorText(w, err) {
return
}

for _, res := range dbResources {
var ops []db.FinishedOperation
_, err := h.DB.Select(&ops, `
SELECT o.* FROM finished_operations o
JOIN assets a ON a.id = o.asset_id
WHERE a.resource_id = $1
`, res.ID)
if respondwith.ErrorText(w, err) {
return
}

projectID := ""
// res.ScopeUUID is either a domain- or project UUID.
if res.ScopeUUID != res.DomainUUID {
projectID = res.ScopeUUID
}

//find asset UUIDs
assetUUIDs, err := h.getAssetUUIDMap(res)
if respondwith.ErrorText(w, err) {
return
}

for _, o := range ops {
if o.Outcome == db.OperationOutcomeErrored {
//We are only interested in the status errored.
result.AssetResizeErrors = append(result.AssetResizeErrors,
AssetError{
AssetUUID: assetUUIDs[o.AssetID],
ProjectUUID: projectID,
DomainUUID: res.DomainUUID,
AssetType: string(res.AssetType),
OldSize: o.OldSize,
NewSize: o.NewSize,
Finished: &Checked{
AtUnix: o.FinishedAt.Unix(),
ErrorMessage: o.ErrorMessage,
},
})
}
}
}

respondwith.JSON(w, http.StatusOK, result)
}
38 changes: 38 additions & 0 deletions internal/api/errors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,41 @@ func TestGetAssetScrapeErrors(baseT *testing.T) {
}.Check(t.T, hh)
})
}

func TestGetAssetResizeErrors(baseT *testing.T) {
t := test.T{T: baseT}
withHandler(t, nil, func(h *handler, hh http.Handler, mv *MockValidator, _ []db.Resource, _ []db.Asset) {

//endpoint requires a token with cluster access
mv.Forbid("cluster:access")
assert.HTTPRequest{
Method: "GET",
Path: "/v1/admin/asset-resize-errors",
ExpectStatus: http.StatusForbidden,
}.Check(t.T, hh)
mv.Allow("cluster:access")

//happy path
assert.HTTPRequest{
Method: "GET",
Path: "/v1/admin/asset-resize-errors",
ExpectStatus: http.StatusOK,
ExpectBody: assert.JSONObject{
"asset_resize_errors": []assert.JSONObject{
assert.JSONObject{
"asset_id": "fooasset1",
"asset_type": "foo",
"domain_id": "domain1",
"finished": assert.JSONObject{
"at": 53,
"error": "datacenter is on fire",
},
"new_size": 1025,
"old_size": 1024,
"project_id": "project1",
},
},
},
}.Check(t.T, hh)
})
}
2 changes: 1 addition & 1 deletion internal/api/fixtures/start-data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ INSERT INTO assets (id, resource_id, uuid, size, usage_percent, scraped_at, expe

INSERT INTO finished_operations (asset_id, reason, outcome, old_size, new_size, usage_percent, created_at, confirmed_at, greenlit_at, greenlit_by_user_uuid, finished_at, error_message) VALUES (1, 'low', 'cancelled', 1000, 900, 20, UNIX(31), NULL, NULL, NULL, UNIX(32), '');
INSERT INTO finished_operations (asset_id, reason, outcome, old_size, new_size, usage_percent, created_at, confirmed_at, greenlit_at, greenlit_by_user_uuid, finished_at, error_message) VALUES (1, 'high', 'succeeded', 1023, 1024, 80, UNIX(41), UNIX(42), UNIX(43), 'user2', UNIX(44), '');
INSERT INTO finished_operations (asset_id, reason, outcome, old_size, new_size, usage_percent, created_at, confirmed_at, greenlit_at, greenlit_by_user_uuid, finished_at, error_message) VALUES (1, 'critical', 'failed', 1024, 1025, 97, UNIX(51), UNIX(52), UNIX(52), NULL, UNIX(53), 'datacenter is on fire');
INSERT INTO finished_operations (asset_id, reason, outcome, old_size, new_size, usage_percent, created_at, confirmed_at, greenlit_at, greenlit_by_user_uuid, finished_at, error_message) VALUES (1, 'critical', 'errored', 1024, 1025, 97, UNIX(51), UNIX(52), UNIX(52), NULL, UNIX(53), 'datacenter is on fire');
14 changes: 7 additions & 7 deletions internal/api/operations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ func TestGetPendingOperationsForResource(baseT *testing.T) {
}

func withEitherFailedOrErroredOperation(action func(db.OperationOutcome)) {
//start-data.sql has a FinishedOperation with outcome "failed". This helper
//function enables us to re-run tests concerning this "failed" operation with
//its outcome changed to "errored", to check that "errored" behaves like
//"failed" for the operations report endpoints.
action(db.OperationOutcomeFailed)
//start-data.sql has a FinishedOperation with outcome "errored". This helper
//function enables us to re-run tests concerning this "errored" operation with
//its outcome changed to "failed", to check that "failed" behaves like
//"errored" for the operations report endpoints.
action(db.OperationOutcomeErrored)
action(db.OperationOutcomeFailed)
}

func TestGetRecentlyFailedOperationsForResource(baseT *testing.T) {
Expand All @@ -130,7 +130,7 @@ func TestGetRecentlyFailedOperationsForResource(baseT *testing.T) {
"/v1/projects/%s/resources/%s/operations/recently-failed")

t.MustExec(h.DB, `UPDATE finished_operations SET outcome = $1 WHERE outcome = $2`,
failedOperationOutcome, db.OperationOutcomeFailed,
failedOperationOutcome, db.OperationOutcomeErrored,
)

//start-data.sql has a recently failed critical operation for fooasset1, but
Expand Down Expand Up @@ -249,7 +249,7 @@ func TestGetRecentlySucceededOperationsForResource(baseT *testing.T) {
"/v1/projects/%s/resources/%s/operations/recently-succeeded")

t.MustExec(h.DB, `UPDATE finished_operations SET outcome = $1 WHERE outcome = $2`,
failedOperationOutcome, db.OperationOutcomeFailed,
failedOperationOutcome, db.OperationOutcomeErrored,
)

//start-data.sql has a succeeded operation, but also a failed/errored one on the same
Expand Down

0 comments on commit 7aae894

Please sign in to comment.