This repository has been archived by the owner on Oct 9, 2023. It is now read-only.
/
executions_cache.go
162 lines (133 loc) · 5.31 KB
/
executions_cache.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
package presto
import (
"context"
"k8s.io/client-go/util/workqueue"
"github.com/lyft/flytestdlib/cache"
"github.com/lyft/flyteplugins/go/tasks/errors"
stdErrors "github.com/lyft/flytestdlib/errors"
"github.com/lyft/flyteplugins/go/tasks/plugins/presto/client"
"github.com/lyft/flyteplugins/go/tasks/plugins/presto/config"
"github.com/lyft/flytestdlib/logger"
"github.com/lyft/flytestdlib/promutils"
)
const (
BadPrestoReturnCodeError stdErrors.ErrorCode = "PRESTO_RETURNED_UNKNOWN"
)
type ExecutionsCache struct {
cache.AutoRefresh
prestoClient client.PrestoClient
scope promutils.Scope
cfg *config.Config
}
func NewPrestoExecutionsCache(
ctx context.Context,
prestoClient client.PrestoClient,
cfg *config.Config,
scope promutils.Scope) (ExecutionsCache, error) {
q := ExecutionsCache{
prestoClient: prestoClient,
scope: scope,
cfg: cfg,
}
autoRefreshCache, err := cache.NewAutoRefreshCache(cfg.RefreshCacheConfig.Name, q.SyncPrestoQuery, workqueue.DefaultControllerRateLimiter(), cfg.RefreshCacheConfig.SyncPeriod.Duration, cfg.RefreshCacheConfig.Workers, cfg.RefreshCacheConfig.LruCacheSize, scope)
if err != nil {
logger.Errorf(ctx, "Could not create AutoRefreshCache in Executor. [%s]", err)
return q, errors.Wrapf(errors.CacheFailed, err, "Error creating AutoRefreshCache")
}
q.AutoRefresh = autoRefreshCache
return q, nil
}
type ExecutionStateCacheItem struct {
ExecutionState
// This ID is the cache key and so will need to be unique across all objects in the cache (it will probably be
// unique across all of Flyte) and needs to be deterministic.
// This will also be used as the allocation token for now.
Identifier string `json:"id"`
}
func (e ExecutionStateCacheItem) ID() string {
return e.Identifier
}
// This basically grab an updated status from the Presto API and stores it in the cache
// All other handling should be in the synchronous loop.
func (p *ExecutionsCache) SyncPrestoQuery(ctx context.Context, batch cache.Batch) (
updatedBatch []cache.ItemSyncResponse, err error) {
resp := make([]cache.ItemSyncResponse, 0, len(batch))
for _, query := range batch {
// Cast the item back to the thing we want to work with.
executionStateCacheItem, ok := query.GetItem().(ExecutionStateCacheItem)
if !ok {
logger.Errorf(ctx, "Sync loop - Error casting cache object into ExecutionState")
return nil, errors.Errorf(errors.CacheFailed, "Failed to cast [%v]", batch[0].GetID())
}
if executionStateCacheItem.CommandID == "" {
logger.Warnf(ctx, "Sync loop - CommandID is blank for [%s] skipping", executionStateCacheItem.Identifier)
resp = append(resp, cache.ItemSyncResponse{
ID: query.GetID(),
Item: query.GetItem(),
Action: cache.Unchanged,
})
continue
}
logger.Debugf(ctx, "Sync loop - processing Presto job [%s] - cache key [%s]",
executionStateCacheItem.CommandID, executionStateCacheItem.Identifier)
if InTerminalState(executionStateCacheItem.ExecutionState) {
logger.Debugf(ctx, "Sync loop - Presto id [%s] in terminal state [%s]",
executionStateCacheItem.CommandID, executionStateCacheItem.Identifier)
resp = append(resp, cache.ItemSyncResponse{
ID: query.GetID(),
Item: query.GetItem(),
Action: cache.Unchanged,
})
continue
}
// Get an updated status from Presto
logger.Debugf(ctx, "Querying Presto for %s - %s", executionStateCacheItem.CommandID, executionStateCacheItem.Identifier)
commandStatus, err := p.prestoClient.GetCommandStatus(ctx, executionStateCacheItem.CommandID)
if err != nil {
logger.Errorf(ctx, "Error from Presto command %s", executionStateCacheItem.CommandID)
executionStateCacheItem.SyncFailureCount++
// Make sure we don't return nil for the first argument, because that deletes it from the cache.
resp = append(resp, cache.ItemSyncResponse{
ID: query.GetID(),
Item: executionStateCacheItem,
Action: cache.Update,
})
continue
}
newExecutionPhase, err := StatusToExecutionPhase(commandStatus)
if err != nil {
return nil, err
}
if newExecutionPhase > executionStateCacheItem.CurrentPhase {
logger.Infof(ctx, "Moving ExecutionPhase for %s %s from %s to %s", executionStateCacheItem.CommandID,
executionStateCacheItem.Identifier, executionStateCacheItem.CurrentPhase, newExecutionPhase)
executionStateCacheItem.PreviousPhase = executionStateCacheItem.CurrentPhase
executionStateCacheItem.CurrentPhase = newExecutionPhase
resp = append(resp, cache.ItemSyncResponse{
ID: query.GetID(),
Item: executionStateCacheItem,
Action: cache.Update,
})
}
}
return resp, nil
}
// We need some way to translate results we get from Presto, into a plugin phase
func StatusToExecutionPhase(s client.PrestoStatus) (ExecutionPhase, error) {
switch s {
case client.PrestoStatusFinished:
return PhaseQuerySucceeded, nil
case client.PrestoStatusCancelled:
return PhaseQueryFailed, nil
case client.PrestoStatusFailed:
return PhaseQueryFailed, nil
case client.PrestoStatusWaiting:
return PhaseSubmitted, nil
case client.PrestoStatusRunning:
return PhaseSubmitted, nil
case client.PrestoStatusUnknown:
return PhaseQueryFailed, errors.Errorf(BadPrestoReturnCodeError, "Presto returned status Unknown")
default:
return PhaseQueryFailed, errors.Errorf(BadPrestoReturnCodeError, "default fallthrough case")
}
}