/
rule_multiphase.go
389 lines (368 loc) · 14.6 KB
/
rule_multiphase.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0
package corazawaf
import (
"strings"
"github.com/nguyentin2068/waf/types"
"github.com/nguyentin2068/waf/types/variables"
)
type inferredPhases byte
func (p *inferredPhases) has(phase types.RulePhase) bool {
return (*p & (1 << phase)) != 0
}
// hasOrMinor returns true if the phase is set or any phase before it
// E.g.
// inferredPhases = 00000010 (types.PhaseRequestHeaders)
// hasOrMinor(types.PhaseRequestBody) performs:
// 00000010 & 00000001
// 00000010 & 00000010
// 00000010 & 00000100
// If any of the them is true, it returns true and stops iterating
func (p *inferredPhases) hasOrMinor(phase types.RulePhase) bool {
for i := 1; i <= int(phase); i++ {
if (*p & (1 << i)) != 0 {
return true
}
}
return false
}
func (p *inferredPhases) set(phase types.RulePhase) {
*p |= 1 << phase
}
// minPhase returns the earliest phase a variable may be populated.
// NOTE: variables.Args and variables.ArgsNames should ideally be evaluated
// both in phase 1 and 2, but rules can set state in the transaction, e.g. a
// counter, which prevents evaluating any variable multiple times.
func minPhase(v variables.RuleVariable) types.RulePhase {
switch v {
case variables.ResponseContentType:
return types.PhaseResponseHeaders
case variables.UniqueID:
return types.PhaseRequestHeaders
case variables.ArgsCombinedSize:
// Size changes between phase 1 and 2 so evaluate both times
return types.PhaseRequestHeaders
case variables.FilesCombinedSize:
return types.PhaseRequestBody
case variables.FullRequestLength:
// Not populated by Coraza
return types.PhaseRequestBody
case variables.InboundDataError:
// Not populated by Coraza
return types.PhaseRequestBody
case variables.MatchedVar:
// MatchedVar is only for logging, not evaluation
return types.PhaseUnknown
case variables.MatchedVarName:
// MatchedVar is only for logging, not evaluation
return types.PhaseUnknown
// MultipartBoundaryWhitespace kept for compatibility
case variables.MultipartDataAfter:
// Not populated by Coraza
return types.PhaseRequestBody
case variables.OutboundDataError:
return types.PhaseResponseBody
case variables.QueryString:
return types.PhaseRequestHeaders
case variables.RemoteAddr:
return types.PhaseRequestHeaders
case variables.RemoteHost:
// Not implemented
return types.PhaseRequestHeaders
case variables.RemotePort:
return types.PhaseRequestHeaders
case variables.ReqbodyError:
return types.PhaseRequestBody
case variables.ReqbodyErrorMsg:
return types.PhaseRequestBody
case variables.ReqbodyProcessorError:
return types.PhaseRequestBody
case variables.ReqbodyProcessorErrorMsg:
return types.PhaseRequestBody
case variables.ReqbodyProcessor:
// Configuration of Coraza itself, though shouldn't be used in phases
return types.PhaseUnknown
case variables.RequestBasename:
return types.PhaseRequestHeaders
case variables.RequestBody:
return types.PhaseRequestBody
case variables.RequestBodyLength:
return types.PhaseRequestBody
case variables.RequestFilename:
return types.PhaseRequestHeaders
case variables.RequestLine:
return types.PhaseRequestHeaders
case variables.RequestMethod:
return types.PhaseRequestHeaders
case variables.RequestProtocol:
return types.PhaseRequestHeaders
case variables.RequestURI:
return types.PhaseRequestHeaders
case variables.RequestURIRaw:
return types.PhaseRequestHeaders
case variables.ResponseBody:
return types.PhaseResponseBody
case variables.ResponseContentLength:
return types.PhaseResponseBody
case variables.ResponseProtocol:
return types.PhaseResponseHeaders
case variables.ResponseStatus:
return types.PhaseResponseHeaders
case variables.ServerAddr:
// Configuration of the server itself
return types.PhaseRequestHeaders
case variables.ServerName:
// Configuration of the server itself
return types.PhaseRequestHeaders
case variables.ServerPort:
// Configuration of the server itself
return types.PhaseRequestHeaders
case variables.HighestSeverity:
// Result of matching, not used in phaes
return types.PhaseUnknown
case variables.StatusLine:
return types.PhaseResponseHeaders
case variables.Duration:
// If used in matching, would need to be defined for multiple inferredPhases to make sense
return types.PhaseUnknown
case variables.ResponseHeadersNames:
return types.PhaseResponseHeaders
case variables.RequestHeadersNames:
return types.PhaseRequestHeaders
case variables.Args:
// Updated between headers and body
return types.PhaseRequestBody
case variables.ArgsGet:
return types.PhaseRequestHeaders
case variables.ArgsPost:
return types.PhaseRequestBody
case variables.ArgsPath:
return types.PhaseRequestHeaders
case variables.FilesSizes:
return types.PhaseRequestBody
case variables.FilesNames:
return types.PhaseRequestBody
case variables.FilesTmpContent:
// Not populated by Coraza
return types.PhaseRequestBody
case variables.MultipartFilename:
return types.PhaseRequestBody
case variables.MultipartName:
return types.PhaseRequestBody
case variables.MatchedVarsNames:
// Result of execution, not used in inferredPhases
return types.PhaseUnknown
case variables.MatchedVars:
// Result of execution, not used in inferredPhases
return types.PhaseUnknown
case variables.Files:
return types.PhaseRequestBody
case variables.RequestCookies:
return types.PhaseRequestHeaders
case variables.RequestHeaders:
return types.PhaseRequestHeaders
case variables.ResponseHeaders:
return types.PhaseResponseHeaders
case variables.Geo:
// Not populated by Coraza
return types.PhaseRequestHeaders
case variables.RequestCookiesNames:
return types.PhaseRequestHeaders
case variables.FilesTmpNames:
return types.PhaseRequestBody
case variables.ArgsNames:
// Updated between headers and body
return types.PhaseRequestBody
case variables.ArgsGetNames:
return types.PhaseRequestHeaders
case variables.ArgsPostNames:
return types.PhaseRequestBody
case variables.TX:
return types.PhaseUnknown
case variables.Rule:
// Shouldn't be used in phases
return types.PhaseUnknown
case variables.JSON:
return types.PhaseRequestBody
case variables.Env:
return types.PhaseRequestHeaders
case variables.UrlencodedError:
return types.PhaseRequestHeaders
case variables.ResponseArgs:
return types.PhaseResponseBody
case variables.ResponseXML:
return types.PhaseResponseBody
case variables.RequestXML:
return types.PhaseRequestBody
case variables.XML:
return types.PhaseRequestBody
case variables.MultipartPartHeaders:
return types.PhaseRequestBody
}
return types.PhaseUnknown
}
// TODO(anuraaga): This is effectively lazily computing the min phase of a rule with chain the first
// time we evaluate the rule. Instead, we should do this at parse time, but this will require a
// large-ish refactoring of the parser, which adds parent rules to a rule group before preparing
// the child rules. In the meantime, only evaluating this once should allow performance to be fine.
//
// chainMinPhase is the minimum phase among all the rules in which the chained rule may match.
// We evaluate the min possible phase for each rule in the chain and we take the minimum in common
// If we reached this point, it means that the parent rule already reached its min phase.
func computeRuleChainMinPhase(r *Rule) {
if r.ParentID_ == 0 && r.HasChain && r.chainMinPhase == types.PhaseUnknown {
for c := r.Chain; c != nil; c = c.Chain {
singleChainedRuleMinPhase := types.PhaseUnknown
for _, v := range c.variables {
min := minPhase(v.Variable)
if min == types.PhaseUnknown {
continue
}
if singleChainedRuleMinPhase == types.PhaseUnknown || min < singleChainedRuleMinPhase {
singleChainedRuleMinPhase = min
}
}
if r.chainMinPhase == types.PhaseUnknown || singleChainedRuleMinPhase > r.chainMinPhase {
r.chainMinPhase = singleChainedRuleMinPhase
}
}
}
}
func multiphaseSkipVariable(r *Rule, variable variables.RuleVariable, phase types.RulePhase) bool {
if r.ParentID_ == 0 && (!r.HasChain || phase >= r.chainMinPhase) {
min := minPhase(variable)
// When multiphase evaluation is enabled, any variable is evaluated at its
// earliest possible phase, so we make sure to skip in other phases.
if min != types.PhaseUnknown {
if r.HasChain {
if min < r.chainMinPhase {
// The variable was previously available but not evaluated yet because the
// chain wasn't available. We evaluate once during the chainMinPhase and
// skip the rest.
if phase != r.chainMinPhase {
return true
}
}
// Commented out: we have to evaluate variables multiple times to give a chance to chained rules to match
// else if min != phase {
// // Chain is available, and variable gets evaluated in its phase and skip the rest.
// continue
// }
} else {
// For rules that have no chains, we know the variable is evaluated in its min phase and no other phases.
if min != phase {
return true
}
}
}
} else if r.HasChain && phase < r.chainMinPhase {
// When multiphase evaluation is enabled, if the variable is available but the whole chain is not,
// we don't evaluate the rule yet.
return true
}
return false
}
// generateChainMatches generates matched chains based on the matchedValues. The latter provides all the variables that matched and their depth in the chain
// generateChainMatches splits them into variables chains matches.
// E.g. REQUEST_URI (chainLevel 0), REQUEST_URI (chainLevel 1), REQUEST_HEADERS (chainLevel 1), REQUEST_BODY (chainLevel 2), REQUEST_HEADERS (chainLevel 2)
// REQUEST_URI - REQUEST_URI - REQUEST_BODY
// REQUEST_URI - REQUEST_URI - REQUEST_HEADERS
// REQUEST_URI - REQUEST_HEADERS - REQUEST_BODY
// REQUEST_URI - REQUEST_HEADERS - REQUEST_HEADERS
func generateChainMatches(tx *Transaction, matchedValues []types.MatchData, currentDepth int, buildingMatchedChain []types.MatchData, matchedChainsResult *[][]types.MatchData) {
finalDepth := matchedChainDepth(matchedValues)
// Iterate the variables based on the chain level (first all the variables at level 0, then all the variables at level 1, etc.)
for _, mv := range matchedValues {
if mv.ChainLevel() == currentDepth {
var localebuildingMatchedChain []types.MatchData
if buildingMatchedChain == nil {
localebuildingMatchedChain = []types.MatchData{}
} else {
localebuildingMatchedChain = make([]types.MatchData, len(buildingMatchedChain))
copy(localebuildingMatchedChain, buildingMatchedChain)
}
localebuildingMatchedChain = append(localebuildingMatchedChain, mv)
if mv.ChainLevel() == finalDepth {
// We have reached the last level of the chain, we can generate the matched chains
*matchedChainsResult = append(*matchedChainsResult, localebuildingMatchedChain)
continue
}
generateChainMatches(tx, matchedValues, currentDepth+1, localebuildingMatchedChain, matchedChainsResult)
}
}
}
// isMultiphaseDoubleEvaluation checks if the rule already matched against the same variables.
// It avoids running more then once the relative actions (e.g. avoids incrementing the anomaly score twice).
// Currently, it is intended for chained matches because the same variables are evaluated multiple times and not
// constained to the min phase. If the same match is found, the actions of the most inner rule are skipped and the match
// is not added to matchedValues (and removed from collectiveMatchedValues)
func isMultiphaseDoubleEvaluation(tx *Transaction, phase types.RulePhase, r *Rule, collectiveMatchedValues *[]types.MatchData, mr types.MatchData) bool {
*collectiveMatchedValues = append(*collectiveMatchedValues, mr)
for _, matchedRule := range tx.matchedRules {
if matchedRule.Rule().ID() == r.ParentID_ && matchedChainDepth(matchedRule.MatchedDatas()) == matchedChainDepth(*collectiveMatchedValues) {
// This might be a double match, let's generate the chains that aready matched and the one that just matched
// let's see if all the latter already matched.
// generateChainMatches generates matched chains based on the matchedValues and populates matchedChains and collectiveMatchedChains variables
var matchedChains, collectiveMatchedChains [][]types.MatchData
generateChainMatches(tx, matchedRule.MatchedDatas(), 0, nil, &matchedChains)
generateChainMatches(tx, *collectiveMatchedValues, 0, nil, &collectiveMatchedChains)
// Check if a newly matched chain (part of collectiveMatchedChain) already matched
for _, newMatchedChain := range collectiveMatchedChains {
// if collectiveMatchedChain is inside matchedChains, then it is a double match
if chainPartOf(newMatchedChain, matchedChains) {
// if this point is reached, it means that these chained values already matched
// We have to skip actions execution in order to avoid double match against the same variable and consequent double actions execution
var res strings.Builder
for n, m := range newMatchedChain {
if n != 0 {
res.WriteString(" - ")
}
res.WriteString(m.Variable().Name())
}
rid := r.ID_
if rid == 0 {
rid = r.ParentID_
}
tx.DebugLogger().Debug().Int("rule_id", rid).Int("phase", int(phase)).
Str("matched chain", res.String()).Msg("Chain already matched, skipping actions enforcement")
// The rule already matched against the same variables, we skip it
// we skip this variable and remove it from the collectiveMatchedValues slice
*collectiveMatchedValues = (*collectiveMatchedValues)[:len(*collectiveMatchedValues)-1]
return true
}
}
// if this point is reached, it means that these chained values did not match yet
// we can continue iterating the matched values, generate new matched chains and repeat the check
continue
}
}
return false
}
// chainPartOf checks if a chain is part of a list of already matched chains
func chainPartOf(newMatchedChain []types.MatchData, matchedChains [][]types.MatchData) bool {
for _, matchedChain := range matchedChains {
var differentMatch bool
for n, newMatchedValue := range newMatchedChain {
if newMatchedValue.Variable() != matchedChain[n].Variable() || newMatchedValue.Value() != matchedChain[n].Value() {
differentMatch = true
break
}
}
if differentMatch {
continue
}
// we found a chain already matched
return true
}
return false
}
// matchedChainDepth returns the depth of a matched chain returning the lowest chain level between all the the matched values
func matchedChainDepth(datas []types.MatchData) int {
depth := 0
for _, matchedValue := range datas {
if matchedValue.ChainLevel() > depth {
depth = matchedValue.ChainLevel()
}
}
return depth
}