Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-22739: Properly handle rewrite-target annotation #534

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions images/router/haproxy/conf/haproxy-config.template
Expand Up @@ -671,9 +671,9 @@ backend {{ genBackendNamePrefix $cfg.TLSTermination }}:{{ $cfgIdx }}
{{- with $pathRewriteTarget := firstMatch $pathRewriteTargetPattern (index $cfg.Annotations "haproxy.router.openshift.io/rewrite-target") }}
# Path rewrite target
{{- if eq $pathRewriteTarget "/" }}
http-request replace-path ^{{ $cfg.Path }}/?(.*)$ {{ $pathRewriteTarget }}\1
http-request replace-path ^{{ $cfg.Path }}/?(.*)$ '{{ processRewriteTarget $pathRewriteTarget }}'
{{- else }}
http-request replace-path ^{{ $cfg.Path }}(.*)$ {{ $pathRewriteTarget }}\1
http-request replace-path ^{{ $cfg.Path }}(.*)$ '{{ processRewriteTarget $pathRewriteTarget }}'
{{- end }}
{{- end }}{{/* rewrite target */}}

Expand Down
3 changes: 3 additions & 0 deletions pkg/router/template/template_helper.go
Expand Up @@ -19,6 +19,7 @@ import (
templateutil "github.com/openshift/router/pkg/router/template/util"
haproxyutil "github.com/openshift/router/pkg/router/template/util/haproxy"
"github.com/openshift/router/pkg/router/template/util/haproxytime"
"github.com/openshift/router/pkg/router/template/util/rewritetarget"
)

const (
Expand Down Expand Up @@ -411,4 +412,6 @@ var helperFunctions = template.FuncMap{

"clipHAProxyTimeoutValue": clipHAProxyTimeoutValue, //clips extrodinarily high timeout values to be below the maximum allowed timeout value
"parseIPList": parseIPList, //parses the list of IPs/CIDRs (IPv4/IPv6)

"processRewriteTarget": rewritetarget.SanitizeInput, //sanitizes `haproxy.router.openshift.io/rewrite-target` annotation
}
159 changes: 159 additions & 0 deletions pkg/router/template/util/rewritetarget/rewritetarget.go
@@ -0,0 +1,159 @@
package rewritetarget

import (
"regexp"
"strings"
)

var (
// oneOrMorePercentSigns matches one or more literal percent
// signs.
oneOrMorePercentSigns = regexp.MustCompile(`%+`)

// oneOrMoreBackslashes matches one or more literal
// backslashes.
oneOrMoreBackslashes = regexp.MustCompile(`\\+`)
)

type runeResult struct {
value string // The rune to append to the result.
escaped bool // Whether the next rune is escaped.
skip bool // Whether to skip appending this rune.
stop bool // Whether to stop processing further runes.
}

type processFunc func(char rune, escaped bool) runeResult

// processRunes iterates through the input string and calls processFn
// for each rune. It uses the result of processFn to determine whether
// to continue, skip the rune, or stop processing the input. It returns
// a processed string.
func processRunes(input string, processFn processFunc) string {
var sb strings.Builder

// Increase buffer capacity to improve performance by
// preallocating space for the string being built. The buffer
// is set to 125% of the input length to accommodate
// additional characters without the need for further
// allocation.
sb.Grow(len(input) + (len(input) / 4))

// escapeFlag tracks if the next rune is escaped.
escapeFlag := false

for _, char := range input {
result := processFn(char, escapeFlag)
escapeFlag = result.escaped
if result.stop {
break
}
if !result.skip {
sb.WriteString(result.value)
}
}

return sb.String()
}

// replacePercentSigns replaces each % to with %% if the current sequence
// contains an odd number %. Previously, a single % produced a syntax error,
// and users had to use %% to represent a literal %. To ensure compatibility
// while resolving this problem, we must continue to replace single % with
// double %%, but leave double %% as is, so that existing users who manually
// addressed the issue are not broken. Furthermore, even sequences, such as
// %%%% or %%%%%% were also valid and should be left untouched, while odd
// sequences such as %%% or %%%%% were invalid, and each % should be doubled
// to ensure every % is escaped.
func replacePercentSigns(val string) string {
return oneOrMorePercentSigns.ReplaceAllStringFunc(val, func(match string) string {
if len(match)%2 == 1 {
// For an odd count of %, replace each % with %%.
return strings.ReplaceAll(match, "%", "%%")
}
return match
})
}

// removeSingleBackslashes removes single backslashes \.
func removeSingleBackslashes(val string) string {
return oneOrMoreBackslashes.ReplaceAllStringFunc(val, func(match string) string {
if len(match) == 1 {
return oneOrMoreBackslashes.ReplaceAllString(match, "")
}
return match
})
}

// convertDoubleBackslashes converts double backslashes \\ to single
// backslashes \.
func convertDoubleBackslashes(val string) string {
return strings.ReplaceAll(val, `\\`, `\`)
}

// newProcessHashCreator creates a function to process '#' runes in a
// string. It returns a processFunc which will set
// encounteredCommentMarker to true and stop processing if it
// encounters a '#' that is not escaped.
func newProcessHashCreator(encounteredCommentMarker *bool) processFunc {
return func(char rune, escaped bool) runeResult {
if char == '#' && !escaped {
*encounteredCommentMarker = true
return runeResult{stop: true}
}
return runeResult{value: string(char), escaped: char == '\\' && !escaped}
}
}

// processDoubleQuotes is a processFunc that processes double quote
// characters. It skips the double quote if it's not escaped.
func processDoubleQuotes(char rune, escaped bool) runeResult {
if char == '"' && !escaped {
return runeResult{skip: true}
}
return runeResult{value: string(char), escaped: char == '\\' && !escaped}
}

// processSingleQuotes is a processFunc that processes single quote
// characters. It skips the single quote if it's not escaped. If it's
// escaped, it adds an escaped single quote for later conversion.
func processSingleQuotes(char rune, escaped bool) runeResult {
if char == '\'' && !escaped {
return runeResult{skip: true}
} else if char == '\'' && escaped {
// TRICKY: Because our later operation removes single \,
// we should double escape \\ it, so it will be converted
// to \ later.
return runeResult{value: `'\\''`}
}
return runeResult{value: string(char), escaped: char == '\\' && !escaped}
}

// SanitizeInput processes the `haproxy.router.openshift.io/rewrite-target`
// annotation value for API compatibility while properly handling values
// with spaces, backslashes, and other special characters. Because the
// annotation value was initially introduced without being enclosed in
// single quotes, certain characters were interpreted rather than being
// treated literally. We later added the single quotes wrapping to resolve
// OCPBUGS-22739. However, we must still maintain API compatibility after
// this change: the annotation values MUST be interpreted to the same values
// after updating to enclose the value in single quotes.
func SanitizeInput(val string) string {
var encounteredCommentMarker bool

val = processRunes(val, newProcessHashCreator(&encounteredCommentMarker))
val = processRunes(val, processDoubleQuotes)
val = processRunes(val, processSingleQuotes)
val = replacePercentSigns(val)
val = removeSingleBackslashes(val)
val = convertDoubleBackslashes(val)

if !encounteredCommentMarker {
// The literal `\1` is appended to annotations without
// comments to comply with HAProxy rewrite rule
// expectations, which necessitate a capture group
// reference (i.e., `\1`) for dynamic substitutions.
val += `\1`
}

return val
}
94 changes: 94 additions & 0 deletions pkg/router/template/util/rewritetarget/rewritetarget_test.go
@@ -0,0 +1,94 @@
package rewritetarget_test

import (
"github.com/openshift/router/pkg/router/template/util/rewritetarget"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SanitizeInput is in the same package, no need to import it.

Copy link
Contributor

@frobware frobware Mar 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not the same package. This is a deliberate decision. Placing tests in a separate package, package rewritetarget_test, enforces black-box testing principles. For me, key reasons to use black-box testing:

  • Encapsulation: It guarantees tests can only access exported package members, mirroring how external packages interact with rewritetarget. This approach reinforces the integrity of the package's encapsulation.

  • API Design: Black-box testing inherently leads to clearer and more stable APIs by focusing on the package's usability rather than its internals.

  • Resilience: Tests are less prone to break due to internal changes, as they're tied to the public API. This helps to reduce maintenance.

  • User Perspective: It provides insights into how the package is used, highlighting potential areas for improvement from the end-user’s standpoint.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right, I was looking at the directories. So, what's the idea behind? To test without the access to any private variables? Like if it was the user of the package?

I see this pattern is not quite uniformly implemented in util package (e.g. haproxy).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Encapsulation: It guarantees tests can only access exported package members, mirroring how external packages interact with rewritetarget. This approach reinforces the integrity of the package's encapsulation.

API Design: Black-box testing inherently leads to clearer and more stable APIs by focusing on the package's usability rather than its internals.

Resilience: Tests are less prone to break due to internal changes, as they're tied to the public API. This helps to reduce maintenance.

User Perspective: It provides insights into how the package is used, highlighting potential areas for improvement from the end-user’s standpoint.

I see. Yes, I think it makes sense for SanitizeInput function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, Andy and I had this discussion in slack. It's an interesting perspective we should consider when writing unit tests.

"testing"
)

func Test_SanitizeInput(t *testing.T) {
testCases := []struct {
name string
input string
output string
}{
{
name: "single percent should be doubled",
input: `%foo`,
output: `%%foo\1`,
},
{
name: "double percent should be not changed",
input: `%%foo`,
output: `%%foo\1`,
},
{
name: "triple percent should be doubled entirely",
input: `%%%foo`,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So here we think that the user put "3 percent signs without escaping" and not "1 escaped percent sign plus one unescaped", right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly. This case is currently a syntax error, which means we don't have to maintain any sort of compatibility. with users using %%% today.

The most logical thing to do here is assume the user isn't escaping any of them and double them (to escape them).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another interpretation of %%% input could be as a combination of %% + % inputs. Therefore, by default, we assume the user's intent is to do the right escaping (like in case of %% input) and we do the minimum necessary to ensure the correct syntax(== %%%%).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes it's all coming back to me now. You are correct, we could assume mixed intention for escaped & unescaped, taking each % by itself. However, the doubling %%% to %%%%%% then gets interpreted back to %%% again, which is exactly what the author specified in the first place, keeping some consistency.

output: `%%%%%%foo\1`,
},
{
name: "quad percent should be not changed",
input: `%%%%foo`,
output: `%%%%foo\1`,
},
{
name: "single quotes should be removed, unless escaped",
input: `'foo'foo\'`,
output: `foofoo'\''\1`,
},
{
name: "single backslash should be dropped",
input: `\foo\`,
output: `foo\1`,
},
{
name: "double backslash should be single",
input: `\\foo\\`,
output: `\foo\\1`,
},
{
name: "triple backslash should be double",
input: `\\\foo\\\`,
output: `\\foo\\\1`,
},
{
name: "quad backslash should be double",
input: `\\\\foo\\\\`,
output: `\\foo\\\1`,
},
{
name: "comment in beginning should be remove everything",
input: `# foo # foo`,
output: ``,
},
{
name: "don't remove escaped comments",
input: `\# foo # foo`,
output: `# foo `,
},
{
name: "comment in middle should remove everything following",
input: `foo # foo`,
output: `foo `,
},
{
name: "double quotes should get removed, unless escaped",
input: `"foo"foo\"`,
output: `foofoo"\1`,
},
{
name: "combination",
input: `\\foo\"foo"\#foo\foo'foo\'#foo`,
output: `\foo"foo#foofoofoo'\''`,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
got := rewritetarget.SanitizeInput(tc.input)
if got != tc.output {
t.Errorf("Failure: expected %s, got %s", tc.output, got)
}
})
}
}