-
Notifications
You must be signed in to change notification settings - Fork 46
/
analysiscore.go
486 lines (441 loc) · 17.1 KB
/
analysiscore.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
package webconnectivitylte
import (
"fmt"
"net"
"net/url"
"github.com/ooni/probe-cli/v3/internal/geoipx"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
)
//
// Core analysis
//
// These flags determine the context of TestKeys.Blocking. However, while .Blocking
// is an enumeration, these flags allow to describe multiple blocking methods.
const (
// analysisFlagDNSBlocking indicates there's blocking at the DNS level.
analysisFlagDNSBlocking = 1 << iota
// analysisFlagTCPIPBlocking indicates there's blocking at the TCP/IP level.
analysisFlagTCPIPBlocking
// analysisFlagTLSBlocking indicates there were TLS issues.
analysisFlagTLSBlocking
// analysisFlagHTTPBlocking indicates there was an HTTP failure.
analysisFlagHTTPBlocking
// analysisFlagHTTPDiff indicates there's an HTTP diff.
analysisFlagHTTPDiff
// analysisFlagSuccess indicates we did not detect any blocking.
analysisFlagSuccess
)
// analysisToplevel is the toplevel function that analyses the results
// of the experiment once all network tasks have completed.
//
// The ultimate objective of this function is to set the toplevel flags
// used by the backend to score results. These flags are:
//
// - blocking (and x_blocking_flags) which contain information about
// the detected blocking method (or methods);
//
// - accessible which contains information on whether we think we
// could access the resource somehow.
//
// Originally, Web Connectivity only had a blocking scalar value so
// we could see ourselves in one of the following cases:
//
// +----------+------------+--------------------------+
// | Blocking | Accessible | Meaning |
// +----------+------------+--------------------------+
// | null | null | Probe analysis error |
// +----------+------------+--------------------------+
// | false | true | We detected no blocking |
// +----------+------------+--------------------------+
// | "..." | false | We detected blocking |
// +----------+------------+--------------------------+
//
// While it would be possible in this implementation, which has a granular
// definition of blocking (x_blocking_flags), to set accessible to mean
// whether we could access the resource in some conditions, it seems quite
// dangerous to deviate from the original behavior.
//
// Our code will NEVER set .Blocking or .Accessible outside of this function
// and we'll instead rely on XBlockingFlags. This function's job is to call
// other functions that compute the .XBlockingFlags and then to assign the value
// of .Blocking and .Accessible from the .XBlockingFlags value.
//
// Accordingly, this is how we map the value of the .XBlockingFlags to the
// values of .Blocking and .Accessible:
//
// +--------------------------------------+----------------+-------------+
// | .BlockingFlags | .Blocking | .Accessible |
// +--------------------------------------+----------------+-------------+
// | (& DNSBlocking) != 0 | "dns" | false |
// +--------------------------------------+----------------+-------------+
// | (& TCPIPBlocking) != 0 | "tcp_ip" | false |
// +--------------------------------------+----------------+-------------+
// | (& (TLSBlocking|HTTPBlocking)) != 0 | "http-failure" | false |
// +--------------------------------------+----------------+-------------+
// | (& HTTPDiff) != 0 | "http-diff" | false |
// +--------------------------------------+----------------+-------------+
// | == FlagSuccess | false | true |
// +--------------------------------------+----------------+-------------+
// | otherwise | null | null |
// +--------------------------------------+----------------+-------------+
//
// It's a very simple rule, that should preserve previous semantics.
//
// As an improvement over Web Connectivity v0.4, we also attempt to identify
// special subcases of a null, null result to provide the user with more information.
func (tk *TestKeys) analysisToplevel(logger model.Logger) {
// Since we run after all tasks have completed (or so we assume) we're
// not going to use any form of locking here.
// these functions compute the value of XBlockingFlags
tk.analysisDNSToplevel(logger, model.GeoIPASNLookupperFunc(geoipx.LookupASN))
tk.analysisTCPIPToplevel(logger)
tk.analysisTLSToplevel(logger)
tk.analysisHTTPToplevel(logger)
// now, let's determine .Accessible and .Blocking
switch {
case (tk.BlockingFlags & analysisFlagDNSBlocking) != 0:
tk.Blocking = "dns"
tk.Accessible = false
logger.Warnf(
"ANOMALY: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
case (tk.BlockingFlags & analysisFlagTCPIPBlocking) != 0:
tk.Blocking = "tcp_ip"
tk.Accessible = false
logger.Warnf(
"ANOMALY: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
// Assigning "http-failure" for both TLS and HTTP blocking is a legacy behavior
// because the spec does not consider the case of TLS based blocking
case (tk.BlockingFlags & (analysisFlagTLSBlocking | analysisFlagHTTPBlocking)) != 0:
tk.Blocking = "http-failure"
tk.Accessible = false
logger.Warnf("ANOMALY: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
case (tk.BlockingFlags & analysisFlagHTTPDiff) != 0:
tk.Blocking = "http-diff"
tk.Accessible = false
logger.Warnf(
"ANOMALY: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
case tk.BlockingFlags == analysisFlagSuccess:
tk.Blocking = false
tk.Accessible = true
logger.Infof(
"ACCESSIBLE: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
default:
// NullNull remediation
//
// If we arrive here, the measurement has failed. However, there are a
// bunch of cases where we can still explain what happened by applying specific
// algorithms to detect edge cases.
//
// The relative order of these algorithsm matters: swapping them without
// careful consideration may produce unexpected results.
if tk.analysisNullNullDetectTHDNSNXDOMAIN(logger) {
tk.Blocking = "dns"
tk.Accessible = false
logger.Warnf(
"RESIDUAL_DNS_BLOCKING: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
return
}
if tk.analysisNullNullDetectNoAddrs(logger) {
tk.Blocking = false
tk.Accessible = false
logger.Infof(
"WEBSITE_DOWN_DNS: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
return
}
if tk.analysisNullNullDetectAllConnectsFailed(logger) {
tk.Blocking = false
tk.Accessible = false
logger.Infof(
"WEBSITE_DOWN_TCP: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
return
}
if tk.analysisNullNullDetectTLSMisconfigured(logger) {
tk.Blocking = false
tk.Accessible = false
logger.Infof(
"WEBSITE_DOWN_TLS: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
return
}
if tk.analysisNullNullDetectSuccessfulHTTPS(logger) {
tk.Blocking = false
tk.Accessible = true
logger.Infof(
"ACCESSIBLE_HTTPS: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
return
}
tk.Blocking = nil
tk.Accessible = nil
logger.Warnf(
"UNKNOWN: flags=%d, accessible=%+v, blocking=%+v",
tk.BlockingFlags, tk.Accessible, tk.Blocking,
)
}
}
const (
// analysisFlagNullNullNoAddrs indicates neither the probe nor the TH were
// able to get any IP addresses from any resolver.
analysisFlagNullNullNoAddrs = 1 << iota
// analysisFlagNullNullAllConnectsFailed indicates that all the connect
// attempts failed both in the probe and in the test helper.
analysisFlagNullNullAllConnectsFailed
// analysisFlagNullNullTLSMisconfigured indicates that all the TLS handshake
// attempts failed both in the probe and in the test helper.
analysisFlagNullNullTLSMisconfigured
// analysisFlagNullNullSuccessfulHTTPS indicates that we had no TH data
// but all the HTTP requests used always HTTPS and never failed.
analysisFlagNullNullSuccessfulHTTPS
// analysisFlagNullNullNXDOMAINWithCensorship indicates that we have
// seen no error with local DNS resolutions but, at the same time, the
// control failed with NXDOMAIN. When this happens, we probably have
// DNS interception locally, so all cleartext queries return the same
// bogus answers based on a rule applied on a now-expired domain.
analysisFlagNullNullNXDOMAINWithCensorship
)
// analysisNullNullDetectTHDNSNXDOMAIN runs when .Blocking = nil and
// .Accessible = nil to flag cases in which the probe resolved addresses
// but the TH thinks the address is actually NXDOMAIN. When this
// happens, we're going to give priority to the TH's DoH observation.
//
// See https://github.com/ooni/probe/issues/2308.
func (tk *TestKeys) analysisNullNullDetectTHDNSNXDOMAIN(logger model.Logger) bool {
if tk.Control == nil {
// we need the control info to continue
return false
}
// we need some cleartext successes
var cleartextSuccesses int
for _, query := range tk.Queries {
if query.Engine == "doh" {
// we skip DoH entries because they are encrypted and
// cannot be manipulated by censors
continue
}
if query.Failure != nil {
// we should stop the algorithm in case we've got any
// hard failure, but `dns_no_answer` is acceptable because
// actually it might be there's only A censorship and the
// AAAA query instead returns `dns_no_answer`.
//
// See https://explorer.ooni.org/measurement/20220914T073558Z_webconnectivity_IT_30722_n1_wroXRsBGYx0x9h0q?input=http%3A%2F%2Fitsat.info
// for a case where this was happening and fooled us
// causing us to conclude that the website was just down.
if *query.Failure == netxlite.FailureDNSNoAnswer {
continue
}
return false
}
cleartextSuccesses++
}
if cleartextSuccesses <= 0 {
return false
}
// if the TH failed with its own string representing the NXDOMAIN
// error, then we've detected our corner case
failure := tk.Control.DNS.Failure
if failure != nil && *failure == model.THDNSNameError {
logger.Info("DNS censorship: local DNS success with remote NXDOMAIN")
tk.NullNullFlags |= analysisFlagNullNullNXDOMAINWithCensorship
return true
}
// otherwise it's something else
return false
}
// analysisNullNullDetectSuccessfulHTTPS runs when .Blocking = nil and
// .Accessible = nil to flag successul HTTPS measurements chains that
// occurred regardless of whatever else could have gone wrong.
//
// We need all requests to be HTTPS because an HTTP request in the
// chain breaks the ~reasonable assumption that our custom CA bundle
// is enough to protect against MITM. Of course, when we use this
// algorithm, we're not well positioned to flag server-side blocking.
//
// Version 0.4 of the probe implemented a similar algorithm, which
// however ran before other checks. Version, 0.5 on the contrary, runs
// this algorithm if any other heuristics failed.
//
// See https://github.com/ooni/probe/issues/2307 for more info.
func (tk *TestKeys) analysisNullNullDetectSuccessfulHTTPS(logger model.Logger) bool {
// the chain is sorted from most recent to oldest but it does
// not matter much since we need to walk all of it.
//
// CAVEAT: this code assumes we have a single request chain
// inside the .Requests field, which seems fine because it's
// what Web Connectivity should be doing.
for _, req := range tk.Requests {
URL, err := url.Parse(req.Request.URL)
if err != nil {
// this looks like a bug
return false
}
if URL.Scheme != "https" {
// the whole chain must be HTTPS
return false
}
if req.Failure != nil {
// they must all succeed
return false
}
switch req.Response.Code {
case 200, 301, 302, 307, 308:
default:
// the response must be successful or redirect
return false
}
}
// only if we have at least one request
if len(tk.Requests) > 0 {
logger.Info("website likely accessible: seen successful chain of HTTPS transactions")
tk.NullNullFlags |= analysisFlagNullNullSuccessfulHTTPS
return true
}
// safety net otherwise
return false
}
// analysisNullNullDetectTLSMisconfigured runs when .Blocking = nil and
// .Accessible = nil to check whether by chance we had TLS issues both on the
// probe side and on the TH side. This problem of detecting misconfiguration
// of the server's TLS stack is discussed at https://github.com/ooni/probe/issues/2300.
func (tk *TestKeys) analysisNullNullDetectTLSMisconfigured(logger model.Logger) bool {
if tk.Control == nil || tk.Control.TLSHandshake == nil {
// we need TLS control data to say we are in this case
return false
}
for _, entry := range tk.TLSHandshakes {
if entry.Failure == nil {
// we need all attempts to fail to flag this state
return false
}
thEntry, found := tk.Control.TLSHandshake[entry.Address]
if !found {
// we need to have seen exactly the same attempts
return false
}
if thEntry.Failure == nil {
// we need all TH attempts to fail
return false
}
if *entry.Failure != *thEntry.Failure {
// we need to see the same failure to be sure, which it's
// possible to do for TLS because we have the same definition
// of failure rather than being constrained by the legacy
// implementation of the test helper and Twisted names
//
// TODO(bassosimone): this is the obvious algorithm but maybe
// it's a bit too strict and there is a more lax version of
// the same algorithm that it's still acceptable?
return false
}
}
// only if we have had some TLS handshakes for both probe and TH
if len(tk.TLSHandshakes) > 0 && len(tk.Control.TLSHandshake) > 0 {
logger.Info("website likely down: all TLS handshake attempts failed for both probe and TH")
tk.NullNullFlags |= analysisFlagNullNullTLSMisconfigured
return true
}
// safety net in case we've got wrong input
return false
}
// analysisNullNullDetectAllConnectsFailed attempts to detect whether we are in
// the .Blocking = nil, .Accessible = nil case because all the TCP connect
// attempts by either the probe or the TH have failed.
//
// See https://explorer.ooni.org/measurement/20220911T105037Z_webconnectivity_IT_30722_n1_ruzuQ219SmIO9SrT?input=https://doh.centraleu.pi-dns.com/dns-query?dns=q80BAAABAAAAAAAAA3d3dwdleGFtcGxlA2NvbQAAAQAB
// for an example measurement with this behavior.
//
// See https://github.com/ooni/probe/issues/2299 for the reference issue.
func (tk *TestKeys) analysisNullNullDetectAllConnectsFailed(logger model.Logger) bool {
if tk.Control == nil {
// we need control data to say we're in this case
return false
}
for _, entry := range tk.TCPConnect {
if entry.Status.Failure == nil {
// we need all connect attempts to fail
return false
}
epnt := net.JoinHostPort(entry.IP, fmt.Sprintf("%d", entry.Port))
thEntry, found := tk.Control.TCPConnect[epnt]
if !found {
// we need to have seen exactly the same attempts
return false
}
if thEntry.Failure == nil {
// we need all TH attempts to fail
return false
}
}
// only if we have had some addresses to connect
if len(tk.TCPConnect) > 0 && len(tk.Control.TCPConnect) > 0 {
logger.Info("website likely down: all TCP connect attempts failed for both probe and TH")
tk.NullNullFlags |= analysisFlagNullNullAllConnectsFailed
return true
}
// safety net in case we're passed empty lists/maps
return false
}
// analysisNullNullDetectNoAddrs attempts to see whether we
// ended up into the .Blocking = nil, .Accessible = nil case because
// the domain is expired and all queries returned no addresses.
//
// See https://github.com/ooni/probe/issues/2290 for further
// documentation about the issue we're solving here.
//
// It would be tempting to check specifically for NXDOMAIN here, but we
// know it is problematic do that. In fact, on Android the getaddrinfo
// resolver always returns EAI_NODATA on error, regardless of the actual
// error that may have occurred in the Android DNS backend.
//
// See https://github.com/ooni/probe/issues/2029 for more information
// on Android's getaddrinfo behavior.
func (tk *TestKeys) analysisNullNullDetectNoAddrs(logger model.Logger) bool {
if tk.Control == nil {
// we need control data to say we're in this case
return false
}
for _, query := range tk.Queries {
if len(query.Answers) > 0 {
// when a query has answers, we're not in the NoAddresses case
return false
}
}
if len(tk.TCPConnect) > 0 {
// if we attempted TCP connect, we're not in the NoAddresses case
return false
}
if len(tk.TLSHandshakes) > 0 {
// if we attempted TLS handshakes, we're not in the NoAddresses case
return false
}
if len(tk.Control.DNS.Addrs) > 0 {
// when the TH resolved addresses, we're not in the NoAddresses case
return false
}
if len(tk.Control.TCPConnect) > 0 {
// when the TH used addresses, we're not in the NoAddresses case
return false
}
logger.Infof("website likely down: all DNS lookups failed for both probe and TH")
tk.NullNullFlags |= analysisFlagNullNullNoAddrs
return true
}