-
Notifications
You must be signed in to change notification settings - Fork 566
/
amazon_client.go
332 lines (316 loc) · 10.7 KB
/
amazon_client.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
package obj
import (
"bytes"
"context"
"crypto/tls"
"crypto/x509"
"encoding/pem"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/cloudfront/sign"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/aws/aws-sdk-go/service/storagegateway"
"github.com/pachyderm/pachyderm/v2/src/internal/backoff"
"github.com/pachyderm/pachyderm/v2/src/internal/errors"
"github.com/pachyderm/pachyderm/v2/src/internal/errutil"
"github.com/pachyderm/pachyderm/v2/src/internal/log"
"github.com/pachyderm/pachyderm/v2/src/internal/pacherr"
"github.com/pachyderm/pachyderm/v2/src/internal/pctx"
"github.com/pachyderm/pachyderm/v2/src/internal/promutil"
"github.com/pachyderm/pachyderm/v2/src/internal/tracing"
"go.uber.org/zap"
)
type amazonClient struct {
bucket string
cloudfrontDistribution string
cloudfrontURLSigner *sign.URLSigner
s3 *s3.S3
uploader *s3manager.Uploader
advancedConfig *AmazonAdvancedConfiguration
}
// AmazonCreds are options that are applicable specifically to Pachd's
// credentials in an AWS deployment
type AmazonCreds struct {
// Direct credentials. Only applicable if Pachyderm is given its own permanent
// AWS credentials
ID string // Access Key ID
Secret string // Secret Access Key
Token string // Access token (if using temporary security credentials
}
func parseLogOptions(ctx context.Context, optstring string) *aws.LogLevelType {
if optstring == "" {
return nil
}
toLevel := map[string]aws.LogLevelType{
"debug": aws.LogDebug,
"signing": aws.LogDebugWithSigning,
"httpbody": aws.LogDebugWithHTTPBody,
"requestretries": aws.LogDebugWithRequestRetries,
"requesterrors": aws.LogDebugWithRequestErrors,
"eventstreambody": aws.LogDebugWithEventStreamBody,
"all": aws.LogDebugWithSigning |
aws.LogDebugWithHTTPBody |
aws.LogDebugWithRequestRetries |
aws.LogDebugWithRequestErrors |
aws.LogDebugWithEventStreamBody,
}
var result aws.LogLevelType
opts := strings.Split(optstring, ",")
for _, optStr := range opts {
result |= toLevel[strings.ToLower(optStr)]
}
var msg bytes.Buffer
// build log message separately, as the log flags have overlapping definitions
msg.WriteString("using S3 logging flags: ")
for _, optStr := range []string{"Debug", "Signing", "HTTPBody", "RequestRetries", "RequestErrors", "EventStreamBody"} {
optBits := toLevel[strings.ToLower(optStr)]
if (result & optBits) == optBits {
msg.WriteString(optStr)
msg.WriteString(",")
}
}
log.Info(ctx, msg.String())
return &result
}
func newAmazonClient(ctx context.Context, region, bucket string, creds *AmazonCreds, cloudfrontDistribution string, endpoint string, advancedConfig *AmazonAdvancedConfiguration) (*amazonClient, error) {
// set up aws config, including credentials (If creds.ID not set then this will use the EC2 metadata service)
timeout, err := time.ParseDuration(advancedConfig.Timeout)
if err != nil {
return nil, errors.EnsureStack(err)
}
httpClient := &http.Client{Timeout: timeout}
// If NoVerifySSL is true, then configure the transport to skip ssl verification (enables self-signed certificates).
if advancedConfig.NoVerifySSL {
transport := http.DefaultTransport.(*http.Transport).Clone()
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
httpClient.Transport = transport
}
httpClient.Transport = promutil.InstrumentRoundTripper("s3", httpClient.Transport)
awsConfig := &aws.Config{
Region: aws.String(region),
MaxRetries: aws.Int(advancedConfig.Retries),
HTTPClient: httpClient,
DisableSSL: aws.Bool(advancedConfig.DisableSSL),
LogLevel: parseLogOptions(ctx, advancedConfig.LogOptions),
Logger: log.NewAmazonLogger(ctx),
}
if creds.ID != "" {
awsConfig.Credentials = credentials.NewStaticCredentials(creds.ID, creds.Secret, creds.Token)
}
// Set custom endpoint for a custom deployment.
if endpoint != "" {
awsConfig.Endpoint = aws.String(endpoint)
awsConfig.S3ForcePathStyle = aws.Bool(true)
}
// Create new session using awsConfig
session, err := session.NewSession(awsConfig)
if err != nil {
return nil, errors.EnsureStack(err)
}
awsClient := &amazonClient{
bucket: bucket,
s3: s3.New(session),
uploader: s3manager.NewUploader(session, func(u *s3manager.Uploader) {
u.PartSize = advancedConfig.PartSize
u.MaxUploadParts = advancedConfig.MaxUploadParts
}),
advancedConfig: advancedConfig,
}
// Set awsClient.cloudfrontURLSigner and cloudfrontDistribution (if Pachd is
// using cloudfront)
awsClient.cloudfrontDistribution = strings.TrimSpace(cloudfrontDistribution)
if cloudfrontDistribution != "" {
rawCloudfrontPrivateKey, err := readSecretFile("/cloudfrontPrivateKey")
if err != nil {
return nil, err
}
cloudfrontKeyPairID, err := readSecretFile("/cloudfrontKeyPairId")
if err != nil {
return nil, err
}
block, _ := pem.Decode(bytes.TrimSpace([]byte(rawCloudfrontPrivateKey)))
if block == nil || block.Type != "RSA PRIVATE KEY" {
return nil, errors.Errorf("block undefined or wrong type: type is (%v) should be (RSA PRIVATE KEY)", block.Type)
}
cloudfrontPrivateKey, err := x509.ParsePKCS1PrivateKey(block.Bytes)
if err != nil {
return nil, errors.EnsureStack(err)
}
awsClient.cloudfrontURLSigner = sign.NewURLSigner(cloudfrontKeyPairID, cloudfrontPrivateKey)
log.Info(ctx, "Using cloudfront security credentials to sign cloudfront URLs", zap.String("keypairID", string(cloudfrontKeyPairID)))
}
return awsClient, nil
}
func (c *amazonClient) Put(ctx context.Context, name string, r io.Reader) (retErr error) {
defer func() { retErr = c.transformError(retErr, name) }()
ctx, cf := pctx.WithCancel(ctx)
defer cf()
_, err := c.uploader.UploadWithContext(ctx, &s3manager.UploadInput{
ACL: aws.String(c.advancedConfig.UploadACL),
Body: r,
Bucket: aws.String(c.bucket),
Key: aws.String(name),
ContentEncoding: aws.String("application/octet-stream"),
})
return errors.EnsureStack(err)
}
func (c *amazonClient) Walk(ctx context.Context, name string, fn func(name string) error) (retErr error) {
defer func() { retErr = c.transformError(retErr, name) }()
var fnErr error
var prefix = &name
if err := c.s3.ListObjectsPagesWithContext(ctx,
&s3.ListObjectsInput{
Bucket: aws.String(c.bucket),
Prefix: prefix,
},
func(listObjectsOutput *s3.ListObjectsOutput, lastPage bool) bool {
for _, object := range listObjectsOutput.Contents {
key := *object.Key
if strings.HasPrefix(key, name) {
if err := fn(key); err != nil {
fnErr = err
return false
}
}
}
return true
},
); err != nil {
return errors.EnsureStack(err)
}
return fnErr
}
func (c *amazonClient) Get(ctx context.Context, name string, w io.Writer) (retErr error) {
defer func() { retErr = c.transformError(retErr, name) }()
var reader io.ReadCloser
if c.cloudfrontDistribution != "" {
var resp *http.Response
var connErr error
url := fmt.Sprintf("http://%v.cloudfront.net/%v", c.cloudfrontDistribution, name)
if c.cloudfrontURLSigner != nil {
signedURL, err := c.cloudfrontURLSigner.Sign(url, time.Now().Add(1*time.Hour))
if err != nil {
return errors.EnsureStack(err)
}
url = strings.TrimSpace(signedURL)
}
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return errors.EnsureStack(err)
}
backoff.RetryNotify(func() (retErr error) { //nolint:errcheck
span, _ := tracing.AddSpanToAnyExisting(ctx, "/Amazon.Cloudfront/Get")
defer func() {
tracing.FinishAnySpan(span, "err", retErr)
}()
resp, connErr = http.DefaultClient.Do(req)
if connErr != nil && errutil.IsNetRetryable(connErr) {
return errors.EnsureStack(connErr)
}
return nil
}, backoff.NewExponentialBackOff(), func(err error, d time.Duration) error {
log.Info(ctx, "Error connecting; retrying", zap.String("url", url), zap.Duration("retryAfter", d), zap.Error(err))
return nil
})
if connErr != nil {
return errors.EnsureStack(connErr)
}
if resp.StatusCode >= 300 {
// Cloudfront returns 200s, and 206s as success codes
return errors.Errorf("cloudfront returned HTTP error code %v for url %v", resp.Status, url)
}
reader = resp.Body
} else {
objIn := &s3.GetObjectInput{
Bucket: aws.String(c.bucket),
Key: aws.String(name),
}
getObjectOutput, err := c.s3.GetObjectWithContext(ctx, objIn)
if err != nil {
return errors.EnsureStack(err)
}
reader = getObjectOutput.Body
}
defer func() {
if err := reader.Close(); retErr == nil {
retErr = err
}
}()
_, err := io.Copy(w, reader)
return errors.EnsureStack(err)
}
func (c *amazonClient) Delete(ctx context.Context, name string) (retErr error) {
defer func() { retErr = c.transformError(retErr, name) }()
_, err := c.s3.DeleteObjectWithContext(ctx, &s3.DeleteObjectInput{
Bucket: aws.String(c.bucket),
Key: aws.String(name),
})
return errors.EnsureStack(err)
}
func (c *amazonClient) Exists(ctx context.Context, name string) (bool, error) {
_, err := c.s3.HeadObjectWithContext(ctx, &s3.HeadObjectInput{
Bucket: aws.String(c.bucket),
Key: aws.String(name),
})
tracing.TagAnySpan(ctx, "err", err)
if err != nil {
err = c.transformError(err, name)
if pacherr.IsNotExist(err) {
err = nil
}
return false, err
}
return true, nil
}
func (c *amazonClient) BucketURL() ObjectStoreURL {
return ObjectStoreURL{
Scheme: "s3",
Bucket: c.bucket,
}
}
func (c *amazonClient) transformError(err error, objectPath string) error {
const minWait = 250 * time.Millisecond
if err == nil {
return nil
}
if c.cloudfrontDistribution != "" {
// cloudfront returns forbidden error for nonexisting data
if strings.Contains(err.Error(), "error code 403") {
return pacherr.NewNotExist(c.bucket, objectPath)
}
}
if strings.Contains(err.Error(), "unexpected EOF") {
return pacherr.WrapTransient(err, minWait)
}
if strings.Contains(err.Error(), "Not Found") {
return pacherr.NewNotExist(c.bucket, objectPath)
}
var awsErr awserr.Error
if !errors.As(err, &awsErr) {
return err
}
// errors.Is is unable to correctly identify context.Cancel with the amazon error types
if strings.Contains(awsErr.Error(), "RequestCanceled") {
return context.Canceled
}
if strings.Contains(awsErr.Message(), "SlowDown:") {
return pacherr.WrapTransient(err, minWait)
}
switch awsErr.Code() {
case s3.ErrCodeNoSuchKey:
return pacherr.NewNotExist(c.bucket, objectPath)
case storagegateway.ErrorCodeServiceUnavailable,
storagegateway.ErrorCodeInternalError,
storagegateway.ErrorCodeGatewayInternalError:
return pacherr.WrapTransient(err, minWait)
}
return err
}