-
Notifications
You must be signed in to change notification settings - Fork 15
/
http.go
281 lines (247 loc) · 8.01 KB
/
http.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
package oohelperd
//
// HTTP measurements
//
import (
"context"
"io"
"net"
"net/http"
"strings"
"sync"
"time"
"github.com/ooni/probe-engine/pkg/legacy/tracex"
"github.com/ooni/probe-engine/pkg/logx"
"github.com/ooni/probe-engine/pkg/measurexlite"
"github.com/ooni/probe-engine/pkg/model"
"github.com/ooni/probe-engine/pkg/netxlite"
"github.com/ooni/probe-engine/pkg/runtimex"
)
// TODO(bassosimone): we should refactor the TH to use step-by-step such that we
// can use an existing connection for the HTTP-measuring task
// ctrlHTTPResponse is the result of the HTTP check performed by
// the Web Connectivity test helper.
type ctrlHTTPResponse = model.THHTTPRequestResult
// httpConfig configures the HTTP check.
type httpConfig struct {
// Headers is OPTIONAL and contains the request headers we should set.
Headers map[string][]string
// Logger is the MANDATORY logger to use.
Logger model.Logger
// MaxAcceptableBody is MANDATORY and specifies the maximum acceptable body size.
MaxAcceptableBody int64
// NewClient is the MANDATORY factory to create a new client.
NewClient func(model.Logger) model.HTTPClient
// Out is the MANDATORY channel where we'll post results.
Out chan ctrlHTTPResponse
// URL is the MANDATORY URL to measure.
URL string
// Wg is MANDATORY and allows synchronizing with parent.
Wg *sync.WaitGroup
// searchForH3 is the OPTIONAL flag to decide whether to inspect Alt-Svc for HTTP/3 discovery
searchForH3 bool
}
// httpDo performs the HTTP check.
func httpDo(ctx context.Context, config *httpConfig) {
// make sure we log about the operation
ol := logx.NewOperationLogger(config.Logger, "GET %s", config.URL)
// we want to limit the maximum amount of time we spend here
const timeout = 15 * time.Second
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
// we want the caller to know when we're done running
defer config.Wg.Done()
// now let's create an HTTP request
req, err := http.NewRequestWithContext(ctx, "GET", config.URL, nil)
if err != nil {
// fix: emit -1 like the old test helper does
config.Out <- ctrlHTTPResponse{
BodyLength: -1,
Failure: httpMapFailure(err),
Title: "",
Headers: map[string]string{},
StatusCode: -1,
}
ol.Stop(err)
return
}
// The original test helper failed with extra headers while here
// we're implementing a more liberal approach.
for k, vs := range config.Headers {
switch strings.ToLower(k) {
case "user-agent", "accept", "accept-language":
for _, v := range vs {
req.Header.Add(k, v)
}
}
}
// we need a client because we want to follow redirects
clnt := config.NewClient(config.Logger)
defer clnt.CloseIdleConnections()
// take the time before starting the HTTP task
t0 := time.Now()
// fetch the webpage following redirects
resp, err := clnt.Do(req)
// publish the elapsed time required for measuring HTTP
elapsed := time.Since(t0)
metricHTTPTaskDurationSeconds.Observe(elapsed.Seconds())
// handle the case of failure
if err != nil {
// fix: emit -1 like the old test helper does
config.Out <- ctrlHTTPResponse{
BodyLength: -1,
Failure: httpMapFailure(err),
Title: "",
Headers: map[string]string{},
StatusCode: -1,
}
ol.Stop(err)
return
}
// make sure we eventually close the body
defer resp.Body.Close()
// copy headers
headers := make(map[string]string)
for k := range resp.Header {
headers[k] = resp.Header.Get(k)
}
// read the body up within a given maximum limit
// TODO(bassosimone): do we need to compute whether the body was truncated?
reader := &io.LimitedReader{R: resp.Body, N: config.MaxAcceptableBody}
data, err := netxlite.ReadAllContext(ctx, reader)
ol.Stop(err)
// optionally check whether there's an HTTP3 endpoint
h3Endpoint := ""
if config.searchForH3 {
h3Endpoint = discoverH3Endpoint(resp, req)
}
// we're good and we can emit a final response now
config.Out <- ctrlHTTPResponse{
BodyLength: int64(len(data)),
DiscoveredH3Endpoint: h3Endpoint,
Failure: httpMapFailure(err),
StatusCode: int64(resp.StatusCode),
Headers: headers,
Title: measurexlite.WebGetTitle(string(data)),
}
}
// Discovers an H3 endpoint by inspecting the Alt-Svc header in the first request-response pair
// of the redirect chain.
//
// TODO(kelmenhorst) Known limitations:
// - This will not work for http:// URLs: Many/some/? hosts do not advertise h3 via Alt-Svc on a
// cleartext HTTP response.
// Thus, measuring http://cloudflare.com will not cause a h3 follow-up, but
// https://cloudflare.com will.
// - We only consider the Alt-Svc binding of the very first request-response pair.
// However, by using parseAltSvc we can later change the code to consider any request-response
// pair without too much refactoring.
func discoverH3Endpoint(resp *http.Response, initReq *http.Request) string {
firstResp, found := getFirstResponseInRedirectChain(resp)
if !found {
return ""
}
h3Endpoint := parseAltSvc(firstResp)
if h3Endpoint == "" {
return ""
}
// Examples:
//
// Alt-Svc: h2="alt.example.com:443", h2=":443"
// Alt-Svc: h3-25=":443"; ma=3600, h2=":443"; ma=3600
//
// So here we need to handle both `alt.example.com:443` and `:443` cases.
host, port, err := net.SplitHostPort(h3Endpoint)
if err != nil {
return ""
}
if host == "" {
host = initReq.URL.Host
}
return net.JoinHostPort(host, port)
}
// search for the first HTTP response in the redirect chain
func getFirstResponseInRedirectChain(resp *http.Response) (*http.Response, bool) {
// The default std lib behavior is to stop redirecting after 10 consecutive requests.
// Defensively we stop searching after 11.
for i := 0; i < 11; i++ {
request := resp.Request
runtimex.Assert(request != nil, "expected resp.Request != nil")
if request.Response == nil {
return resp, true
}
resp = request.Response
}
return nil, false
}
func parseAltSvc(resp *http.Response) string {
altsvc := resp.Header.Get("Alt-Svc")
// Syntax:
//
// Alt-Svc: clear
// Alt-Svc: <protocol-id>=<alt-authority>; ma=<max-age>
// Alt-Svc: <protocol-id>=<alt-authority>; ma=<max-age>; persist=1
//
// Multiple entries may be separated by comma.
//
// See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Alt-Svc
entries := strings.Split(altsvc, ",")
for _, entry := range entries {
parts := strings.Split(entry, ";")
runtimex.Assert(len(parts) > 0, "expected at least one entry in strings.Split result")
_, alt_authority, _ := strings.Cut(parts[0], "h3=")
if alt_authority == "" {
continue
}
alt_authority = strings.TrimPrefix(alt_authority, "\"")
alt_authority = strings.TrimSuffix(alt_authority, "\"")
return alt_authority
}
return ""
}
// httpMapFailure attempts to map netxlite failures to the strings
// used by the original OONI test helper.
//
// See https://github.com/ooni/backend/blob/6ec4fda5b18/oonib/testhelpers/http_helpers.py#L361
func httpMapFailure(err error) *string {
failure := newfailure(err)
failedOperation := tracex.NewFailedOperation(err)
switch failure {
case nil:
return nil
default:
switch *failure {
case netxlite.FailureDNSNXDOMAINError,
netxlite.FailureDNSNoAnswer,
netxlite.FailureDNSNonRecoverableFailure,
netxlite.FailureDNSRefusedError,
netxlite.FailureDNSServerMisbehaving,
netxlite.FailureDNSTemporaryFailure:
// Strangely the HTTP code uses the more broad
// dns_lookup_error and does not check for
// the NXDOMAIN-equivalent-error dns_name_error
s := "dns_lookup_error"
return &s
case netxlite.FailureGenericTimeoutError:
// The old TH would return "dns_lookup_error" when
// there is a timeout error during the DNS phase of HTTP.
switch failedOperation {
case nil:
// nothing
default:
switch *failedOperation {
case netxlite.ResolveOperation:
s := "dns_lookup_error"
return &s
}
}
return failure // already using the same name
case netxlite.FailureConnectionRefused:
s := "connection_refused_error"
return &s
default:
s := "unknown_error"
return &s
}
}
}