-
Notifications
You must be signed in to change notification settings - Fork 6
/
provider.go
159 lines (146 loc) · 4.21 KB
/
provider.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
// Package content provides a unified way of turning a URL into a struct that provides an array of bytes.
package content
import (
"bytes"
"context"
"encoding/hex"
"errors"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"os"
"strings"
"time"
"cloud.google.com/go/storage"
"github.com/googleapis/google-cloud-go-testing/storage/stiface"
"github.com/m-lab/uuid-annotator/metrics"
)
// Errors that might be returned outside the package.
var (
ErrUnsupportedURLScheme = errors.New("Unsupported URL scheme")
ErrNoChange = errors.New("Data is unchanged")
)
// Provider is the interface implemented by everything that can return raw files.
type Provider interface {
// Get returns the raw file []byte read from the latest copy of the provider
// URL. It may be called multiple times. Caching is left up to the individual
// Provider implementation.
Get(ctx context.Context) ([]byte, error)
}
// gcsProvider gets zip files from Google Cloud Storage.
type gcsProvider struct {
bucket, filename string
client stiface.Client
md5 []byte
}
func (g *gcsProvider) Get(ctx context.Context) ([]byte, error) {
o := g.client.Bucket(g.bucket).Object(g.filename)
oa, err := o.Attrs(ctx)
if err != nil {
return nil, err
}
if g.md5 != nil && bytes.Equal(g.md5, oa.MD5) {
return nil, ErrNoChange
}
// Otherise, we know that either g.md5 == nil || g.md5 != oa.MD5.
// Reload data only if the object changed or the data was never loaded in the first place.
r, err := o.NewReader(ctx)
if err != nil {
return nil, err
}
data, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
if g.md5 != nil {
metrics.GCSFilesLoaded.WithLabelValues(hex.EncodeToString(g.md5)).Set(0)
}
g.md5 = oa.MD5
metrics.GCSFilesLoaded.WithLabelValues(hex.EncodeToString(g.md5)).Set(1)
return data, nil
}
// fileProvider gets files from the local disk.
type fileProvider struct {
filename string
mtime time.Time
}
func (f *fileProvider) Get(ctx context.Context) ([]byte, error) {
s, err := os.Stat(f.filename)
if err != nil {
return nil, fmt.Errorf("Could not os.Stat(%q): %w", f.filename, err)
}
newtime := s.ModTime()
if newtime == f.mtime {
return nil, ErrNoChange
}
b, err := ioutil.ReadFile(f.filename)
if err != nil {
return nil, err
}
f.mtime = newtime
return b, nil
}
// httpsProvider gets files from public HTTPS URLs (i.e. no authentication).
type httpsProvider struct {
u url.URL
timeout time.Duration
client *http.Client
}
func (h *httpsProvider) Get(ctx context.Context) ([]byte, error) {
reqCtx, cancel := context.WithTimeout(ctx, h.timeout)
defer cancel()
r, err := http.NewRequestWithContext(reqCtx, http.MethodGet, h.u.String(), nil)
if err != nil {
return nil, err
}
resp, err := h.client.Do(r)
if err != nil {
return nil, err
}
defer resp.Body.Close()
return ioutil.ReadAll(resp.Body)
}
// FromURL returns a new rawfile.Provider based on the passed-in URL. Supported
// URL schemes are currently: gs://bucket/filename, file:localpath, and
// https://. Whether the path contained in the URL is valid isn't known until
// the Get() method of the returned Provider is called. Unsupported URL schemes
// cause this to return ErrUnsupportedURLScheme.
//
// Users interested in having the daemon download the data directly from MaxMind
// using credentials should implement an alternate https case in the below
// handler. M-Lab doesn't need that case because we cache MaxMind's data to
// reduce load on their servers and to eliminate a runtime dependency on a third
// party service.
func FromURL(ctx context.Context, u *url.URL) (Provider, error) {
switch u.Scheme {
case "gs":
client, err := storage.NewClient(ctx)
filename := strings.TrimPrefix(u.Path, "/")
if len(filename) == 0 {
return nil, errors.New("Bad GS url, no filename detected")
}
return &gcsProvider{
client: stiface.AdaptClient(client),
bucket: u.Host,
filename: filename,
}, err
case "file":
if u.Path == "" {
return &fileProvider{
filename: u.Opaque,
}, nil
}
return &fileProvider{
filename: u.Path,
}, nil
case "https":
return &httpsProvider{
u: *u,
timeout: time.Minute,
client: http.DefaultClient,
}, nil
default:
return nil, ErrUnsupportedURLScheme
}
}