-
Notifications
You must be signed in to change notification settings - Fork 0
/
worker.go
204 lines (171 loc) · 5.29 KB
/
worker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
package main
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"time"
)
type Job struct {
Root string
Dir string
}
type Worker struct {
Uploader FileUploader
DeleteFilesAfterUpload bool
}
type MetaData struct {
Name string
Timestamp time.Time
Shasum *string
Meta map[string]string
Value string
}
type UploadInfo struct {
NodeID string
Namespace string
Name string
Version string
}
func (worker *Worker) Run(ctx context.Context, jobs <-chan Job, results chan<- string) {
for job := range jobs {
var result string
if err := worker.Process(job); err != nil {
result = fmt.Sprintf("job error: %+v %s", job, err.Error())
} else {
result = fmt.Sprintf("job ok: %+v", job)
}
select {
case results <- result:
case <-ctx.Done():
return
}
}
}
func parseUploadPath(dir string) (*UploadInfo, error) {
fields := strings.Split(dir, "/")
p := &UploadInfo{
NodeID: strings.TrimPrefix(fields[0], "node-"),
Namespace: "sage", // sage is the default in cases where no namespace was given
Name: "",
Version: "",
}
switch len(fields) {
case 6:
p.Namespace = fields[2]
p.Name = fields[3]
p.Version = fields[4]
case 5: // namespace is missing
p.Name = fields[2]
p.Version = fields[3]
default:
return nil, fmt.Errorf("could not parse path %s", dir)
}
return p, nil
}
func (w *Worker) Process(job Job) error {
p, err := parseUploadPath(job.Dir)
if err != nil {
return fmt.Errorf("error parsing upload path: %s", err.Error())
}
dataPath := filepath.Join(job.Root, job.Dir, "data")
metaPath := filepath.Join(job.Root, job.Dir, "meta")
donePath := filepath.Join(job.Root, job.Dir, DoneFilename)
var meta MetaData
if err := readMetaFile(metaPath, &meta); err != nil {
return fmt.Errorf("error reading meta file: %s", err.Error())
}
// Add info extracted from path.
meta.Meta["node"] = strings.ToLower(p.NodeID)
// TODO(sean) reconcile namespace and job usage
meta.Meta["plugin"] = p.Namespace + "/" + p.Name + ":" + p.Version
labelFilename := meta.Meta["filename"]
targetNameData := fmt.Sprintf("%d-%s", meta.Timestamp.UnixNano(), labelFilename)
targetNameMeta := fmt.Sprintf("%d-%s.meta", meta.Timestamp.UnixNano(), labelFilename)
s3path := fmt.Sprintf("node-data/%s/sage-%s-%s/%s", p.Namespace, p.Name, p.Version, p.NodeID)
if err := w.Uploader.UploadFile(dataPath, filepath.Join(s3path, targetNameData), &meta); err != nil {
return fmt.Errorf("error uploading data file: %s", err.Error())
}
if err := w.Uploader.UploadFile(metaPath, filepath.Join(s3path, targetNameMeta), &meta); err != nil {
return fmt.Errorf("error uploading meta file: %s", err.Error())
}
if err := os.WriteFile(donePath, []byte{}, 0o644); err != nil {
return fmt.Errorf("error creating flag file: %s", err.Error())
}
// TODO(sean) If we see the need to support various clean up strategies,
// we should just make this step plugable. For example, maybe instead of
// deleting, we want to move files to a done directory.
if w.DeleteFilesAfterUpload {
// Clean up data, meta and done files.
for _, name := range []string{dataPath, metaPath, donePath} {
if err := os.Remove(name); err != nil {
return fmt.Errorf("error cleaning up %s: %s", name, err.Error())
}
}
// Attempt to clean up parent directories up to root/node-xyz/uploads.
//
// NOTE(sean) There is a possible race condition with the upload agent here.
//
// It's possible that the upload agent creates the parent paths which are removed
// before we can upload. In this case, that particular rsync will fail and then
// will be tried again later.
//
// In order for this to happen, the OSN loader would have to upload and clean up the
// last staged item for a task right when that task is posting a new upload. This seems
// potentially rare enough that I'd opt for simpler, more robust cleanup logic for now.
for p := filepath.Dir(dataPath); filepath.Base(p) != "uploads"; p = filepath.Dir(p) {
if err := os.Remove(p); err != nil {
break
}
}
}
return nil
}
func readMetaFile(name string, m *MetaData) error {
var data struct {
EpochNano *int64 `json:"ts"`
EpochNanoOld *int64 `json:"timestamp"` // only for reading (deprecated soon), keep it backwards compatible
Shasum *string `json:"shasum"`
Meta map[string]string `json:"meta"`
MetaOld map[string]string `json:"labels"` // only read (will write to meta)
}
if err := readJSONFile(name, &data); err != nil {
return err
}
m.Name = "upload"
// detect timestamp
switch {
case data.EpochNano != nil:
m.Timestamp = time.Unix(0, *data.EpochNano)
case data.EpochNanoOld != nil:
m.Timestamp = time.Unix(0, *data.EpochNanoOld)
default:
return fmt.Errorf("meta file is missing timestamp")
}
// detect meta
switch {
case data.Meta != nil:
m.Meta = data.Meta
case data.MetaOld != nil:
m.Meta = data.MetaOld
default:
return fmt.Errorf("meta file is missing meta fields")
}
if m.Meta["filename"] == "" {
return fmt.Errorf("filename metadata must exist and be nonempty")
}
if data.Shasum != nil {
m.Shasum = data.Shasum
}
return nil
}
func readJSONFile(name string, v interface{}) error {
f, err := os.Open(name)
if err != nil {
return err
}
defer f.Close()
return json.NewDecoder(f).Decode(v)
}