diff --git a/Gopkg.lock b/Gopkg.lock index 3354686f6..b3e5335d6 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -191,14 +191,6 @@ revision = "a38c50148365edc8df43c1580c48fb2b3a1e9cd7" version = "v1.0.0" -[[projects]] - branch = "go1" - digest = "1:b8b842b1c221e57f599be4e31421eb804da1bf81538e2e9b8c3073938ad8e690" - name = "github.com/moovweb/rubex" - packages = ["."] - pruneopts = "" - revision = "b3d9ff6ad7d9b14f94a91c8271cd9ad9e77132e5" - [[projects]] branch = "master" digest = "1:563cb49ec2d3da93eb4a0f49ba5d0aca4d3edd09e668d23d0f6f71df73a7e534" @@ -219,6 +211,14 @@ revision = "1949ddbfd147afd4d964a9f00b24eb291e0e7c38" version = "v1.0.2" +[[projects]] + branch = "master" + digest = "1:867d96f5b8ccd0e2bb6ec0dfda653a14c249e7d29d92095f245e8fbcb88ea027" + name = "github.com/pbnjay/memory" + packages = ["."] + pruneopts = "" + revision = "974d429e7ae40c89e7dcd41cfcc22a0bfbe42510" + [[projects]] digest = "1:049b5bee78dfdc9628ee0e557219c41f683e5b06c5a5f20eaba0105ccc586689" name = "github.com/pelletier/go-buffruneio" @@ -672,7 +672,7 @@ source = "github.com/src-d/go-git" [[projects]] - digest = "1:f1dc8dd661130c9362c7e99d5b05450bebe60778bfc85dac991f05e8275d0459" + digest = "1:328cf8293a551847642352b4853f9b333e958fcfbf21fad4981866d5a8939c81" name = "gopkg.in/src-d/go-mysql-server.v0" packages = [ ".", @@ -691,7 +691,7 @@ "sql/plan", ] pruneopts = "NUT" - revision = "b32d2fdea095e2743d13f3ab4da5ae83aef55bc7" + revision = "0093a7562ad1cf31f179396dfa5be32893059dbb" [[projects]] digest = "1:f995136b53497081f1b3f29b99e78a597da6afb2bc6f22908382559a863df4ea" diff --git a/Gopkg.toml b/Gopkg.toml index d16b92f83..2e8c2f114 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -1,6 +1,6 @@ [[constraint]] name = "gopkg.in/src-d/go-mysql-server.v0" - revision = "b32d2fdea095e2743d13f3ab4da5ae83aef55bc7" + revision = "0093a7562ad1cf31f179396dfa5be32893059dbb" [[constraint]] name = "github.com/jessevdk/go-flags" diff --git a/docs/using-gitbase/functions.md b/docs/using-gitbase/functions.md index 2a83d011a..8a041aa5d 100644 --- a/docs/using-gitbase/functions.md +++ b/docs/using-gitbase/functions.md @@ -95,4 +95,4 @@ Also, if you want to retrieve values from a non common property, you can pass it ## Standard functions -You can check standard functions in [`go-mysql-server` documentation](https://github.com/src-d/go-mysql-server/tree/b32d2fdea095e2743d13f3ab4da5ae83aef55bc7#custom-functions). +You can check standard functions in [`go-mysql-server` documentation](https://github.com/src-d/go-mysql-server/tree/0093a7562ad1cf31f179396dfa5be32893059dbb#custom-functions). diff --git a/docs/using-gitbase/indexes.md b/docs/using-gitbase/indexes.md index 1612d1df7..d2512a0af 100644 --- a/docs/using-gitbase/indexes.md +++ b/docs/using-gitbase/indexes.md @@ -26,4 +26,4 @@ and for the second query also two indexes will be used and the result will be a You can find some more examples in the [examples](./examples.md#create-an-index-for-columns-on-a-table) section. -See [go-mysql-server](https://github.com/src-d/go-mysql-server/tree/b32d2fdea095e2743d13f3ab4da5ae83aef55bc7#indexes) documentation for more details +See [go-mysql-server](https://github.com/src-d/go-mysql-server/tree/0093a7562ad1cf31f179396dfa5be32893059dbb#indexes) documentation for more details diff --git a/docs/using-gitbase/optimize-queries.md b/docs/using-gitbase/optimize-queries.md index decc8f8ef..ba00cbe30 100644 --- a/docs/using-gitbase/optimize-queries.md +++ b/docs/using-gitbase/optimize-queries.md @@ -5,6 +5,7 @@ Even though in each release performance improvements are included to make gitbas There are two ways to optimize a gitbase query: - Create an index for some parts. - Making sure the joined tables are squashed. +- Making sure not squashed joins are performed in memory. ## Assessing performance bottlenecks @@ -57,6 +58,24 @@ Some performance issues might not be obvious, but there are a few that really st - Joins not squashed. If you performed some joins between tables and instead of a `SquashedTable` node you see `Join` and `Table` nodes, it means the joins were not successfully squashed. There is a more detailed explanation about this in next sections of this document. - Indexes not used. If you can't see the indexes in your table nodes, it means somehow those indexes are not being used by the table. There is a more detailed explanation about this in next sections of this document. +- Joins not squashed that are not being executed in memory. There is a more detailed explanation about this in the next sections of this document. + +## In-memory joins + +There are two modes in which gitbase can execute an inner join: + +- Multipass: it fully iterates the right side of the join one time for each row in the left side. This is really expensive, but avoids having to load one side fully in memory. +- In-memory: loads the whole right side in memory and iterates the left side. Both sides are iterated exactly once, thus it makes the query much faster, but it has the disadvantage of potentially requiring a lot of memory. + +The default mode is multipass, unless the right side fits in memory (there's a more elaborate explanation about this below). + +In-memory joins can be enabled at the user request, either with the `EXPERIMENTAL_IN_MEMORY_JOIN=on` environment variable or executing `SET inmemory_joins = 1`. The last method only enables it for the current connection. + +Even if they are not globally enabled for all queries, there is an optimization that checks if the join could be performed in memory and if it can't, switches to multipass mode. +As long as the whole gitbase server memory usage is under the 20% of all available physical (not counting other memory used by other processes) memory in the machine, the join will be performed in memory. When this limit is passed, the multipass mode will be used instead. +20% is just a default value that can be changed using the `MAX_MEMORY_INNER_JOIN` environment variable to the maximum amount of bytes the gitbase server can be using before switching to multipass mode. It can also be changed per session using `SET max_memory_joins=`. + +So, as a good rule of thumb, the right side of an inner join should always be the smaller one, because that way, it has bigger chances of being executed in memory and it will be faster. ## Indexes diff --git a/docs/using-gitbase/supported-clients.md b/docs/using-gitbase/supported-clients.md index 4a7e2a9cb..32cb982f1 100644 --- a/docs/using-gitbase/supported-clients.md +++ b/docs/using-gitbase/supported-clients.md @@ -1,3 +1,3 @@ ## Supported clients -To see the supported MySQL clients and examples about how to use them, take a look [here](https://github.com/src-d/go-mysql-server/blob/b32d2fdea095e2743d13f3ab4da5ae83aef55bc7/SUPPORTED_CLIENTS.md). +To see the supported MySQL clients and examples about how to use them, take a look [here](https://github.com/src-d/go-mysql-server/blob/0093a7562ad1cf31f179396dfa5be32893059dbb/SUPPORTED_CLIENTS.md). diff --git a/docs/using-gitbase/supported-syntax.md b/docs/using-gitbase/supported-syntax.md index 2bd604de9..7fca3715d 100644 --- a/docs/using-gitbase/supported-syntax.md +++ b/docs/using-gitbase/supported-syntax.md @@ -1,3 +1,3 @@ ## Supported syntax -To see the SQL subset currently supported take a look at [this list](https://github.com/src-d/go-mysql-server/blob/b32d2fdea095e2743d13f3ab4da5ae83aef55bc7/SUPPORTED.md) from [src-d/go-mysql-server](https://github.com/src-d/go-mysql-server). +To see the SQL subset currently supported take a look at [this list](https://github.com/src-d/go-mysql-server/blob/0093a7562ad1cf31f179396dfa5be32893059dbb/SUPPORTED.md) from [src-d/go-mysql-server](https://github.com/src-d/go-mysql-server). diff --git a/vendor/github.com/moovweb/rubex/AUTHOR b/vendor/github.com/moovweb/rubex/AUTHOR deleted file mode 100644 index c4b0ee40b..000000000 --- a/vendor/github.com/moovweb/rubex/AUTHOR +++ /dev/null @@ -1 +0,0 @@ -Zhigang Chen diff --git a/vendor/github.com/moovweb/rubex/LICENSE b/vendor/github.com/moovweb/rubex/LICENSE deleted file mode 100644 index b49ac84f6..000000000 --- a/vendor/github.com/moovweb/rubex/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (C) 2011 by Zhigang Chen - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/vendor/github.com/moovweb/rubex/README b/vendor/github.com/moovweb/rubex/README deleted file mode 100644 index 32c9e7cd3..000000000 --- a/vendor/github.com/moovweb/rubex/README +++ /dev/null @@ -1,38 +0,0 @@ -Rubex - -by Zhigang Chen (zhigang.chen@moovweb.com or zhigangc@gmail.com) - -A simple regular expression library that supports Ruby's regex syntax. It implements all the public functions of Go's Regexp package, except LiteralPrefix. By the benchmark tests in Regexp, the library is 40% to 10X faster than Regexp on all but one test. Unlike Go's Regrexp, this library supports named capture groups and also allow "\\1" and "\\k" in replacement strings. - -The library calls the Oniguruma regex library (5.9.2, the latest release as of now) for regex pattern searching. All replacement code is done in Go. This library can be easily adapted to support the regex syntax used by other programming languages or tools, like Java, Perl, grep, and emacs. - -To install everything, just run - - make install - -To run tests, do - - make test - -Also it uses a build tool called "gb", available at "http://code.google.com/p/go-gb/". - -So, to build the project, just go to the root directory and type - - gb -bm - -To install the package - - gb -im - -To run tests - - gb -t - -To clean up - - gb -cm - -To run benchmarks - - cd lib; gotest -test.bench="." - diff --git a/vendor/github.com/moovweb/rubex/VERSION b/vendor/github.com/moovweb/rubex/VERSION deleted file mode 100644 index d3827e75a..000000000 --- a/vendor/github.com/moovweb/rubex/VERSION +++ /dev/null @@ -1 +0,0 @@ -1.0 diff --git a/vendor/github.com/moovweb/rubex/all_test.go b/vendor/github.com/moovweb/rubex/all_test.go deleted file mode 100644 index a6220a16f..000000000 --- a/vendor/github.com/moovweb/rubex/all_test.go +++ /dev/null @@ -1,655 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package rubex - -import ( - "errors" - "runtime" - "strings" - "testing" -) - -var good_re = []string{ - ``, - `.`, - `^.$`, - `a`, - `a*`, - `a+`, - `a?`, - `a|b`, - `a*|b*`, - `(a*|b)(c*|d)`, - `[a-z]`, - `[a-abc-c\-\]\[]`, - `[a-z]+`, - //`[]`, //this is not considered as good by ruby/javascript regex - `[abc]`, - `[^1234]`, - `[^\n]`, - `\!\\`, -} - -type stringError struct { - re string - err error -} - -var bad_re = []stringError{ - {`*`, errors.New("target of repeat operator is not specified")}, - {`+`, errors.New("target of repeat operator is not specified")}, - {`?`, errors.New("target of repeat operator is not specified")}, - {`(abc`, errors.New("end pattern with unmatched parenthesis")}, - {`abc)`, errors.New("unmatched close parenthesis")}, - {`x[a-z`, errors.New("premature end of char-class")}, - //{`abc]`, Err}, //this is not considered as bad by ruby/javascript regex; nor are the following commented out regex patterns - {`abc[`, errors.New("premature end of char-class")}, - {`[z-a]`, errors.New("empty range in char class")}, - {`abc\`, errors.New("end pattern at escape")}, - //{`a**`, Err}, - //{`a*+`, Err}, - //{`a??`, Err}, - //{`\x`, Err}, -} - -func runParallel(testFunc func(chan bool), concurrency int) { - runtime.GOMAXPROCS(4) - done := make(chan bool, concurrency) - for i := 0; i < concurrency; i++ { - go testFunc(done) - } - for i := 0; i < concurrency; i++ { - <-done - <-done - } - runtime.GOMAXPROCS(1) -} - -const numConcurrentRuns = 200 - -func compileTest(t *testing.T, expr string, error error) *Regexp { - re, err := Compile(expr) - if (error == nil && err != error) || (error != nil && err.Error() != error.Error()) { - t.Error("compiling `", expr, "`; unexpected error: ", err.Error()) - } - return re -} - -func TestGoodCompile(t *testing.T) { - testFunc := func(done chan bool) { - done <- false - for i := 0; i < len(good_re); i++ { - compileTest(t, good_re[i], nil) - } - done <- true - } - runParallel(testFunc, numConcurrentRuns) -} - -func TestBadCompile(t *testing.T) { - for i := 0; i < len(bad_re); i++ { - compileTest(t, bad_re[i].re, bad_re[i].err) - } -} - -func matchTest(t *testing.T, test *FindTest) { - re := compileTest(t, test.pat, nil) - if re == nil { - return - } - m := re.MatchString(test.text) - if m != (len(test.matches) > 0) { - t.Errorf("MatchString failure on %s: %t should be %t", test.pat, m, len(test.matches) > 0) - } - // now try bytes - m = re.Match([]byte(test.text)) - if m != (len(test.matches) > 0) { - t.Errorf("Match failure on %s: %t should be %t", test.pat, m, len(test.matches) > 0) - } -} - -func TestMatch(t *testing.T) { - for _, test := range findTests { - matchTest(t, &test) - } -} - -func matchFunctionTest(t *testing.T, test *FindTest) { - m, err := MatchString(test.pat, test.text) - if err == nil { - return - } - if m != (len(test.matches) > 0) { - t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0) - } -} - -func TestMatchFunction(t *testing.T) { - for _, test := range findTests { - matchFunctionTest(t, &test) - } -} - -type ReplaceTest struct { - pattern, replacement, input, output string -} - -var replaceTests = []ReplaceTest{ - // Test empty input and/or replacement, with pattern that matches the empty string. - {"", "", "", ""}, - {"", "x", "", "x"}, - {"", "", "abc", "abc"}, - {"", "x", "abc", "xaxbxcx"}, - - // Test empty input and/or replacement, with pattern that does not match the empty string. - {"b", "", "", ""}, - {"b", "x", "", ""}, - {"b", "", "abc", "ac"}, - {"b", "x", "abc", "axc"}, - {"y", "", "", ""}, - {"y", "x", "", ""}, - {"y", "", "abc", "abc"}, - {"y", "x", "abc", "abc"}, - - // Multibyte characters -- verify that we don't try to match in the middle - // of a character. - {"[a-c]*", "x", "\u65e5", "x\u65e5x"}, - {"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"}, - - // Start and end of a string. - {"^[a-c]*", "x", "abcdabc", "xdabc"}, - {"[a-c]*$", "x", "abcdabc", "abcdxx"}, - {"^[a-c]*$", "x", "abcdabc", "abcdabc"}, - {"^[a-c]*", "x", "abc", "x"}, - {"[a-c]*$", "x", "abc", "xx"}, - {"^[a-c]*$", "x", "abc", "x"}, - {"^[a-c]*", "x", "dabce", "xdabce"}, - {"[a-c]*$", "x", "dabce", "dabcex"}, - {"^[a-c]*$", "x", "dabce", "dabce"}, - {"^[a-c]*", "x", "", "x"}, - {"[a-c]*$", "x", "", "x"}, - {"^[a-c]*$", "x", "", "x"}, - - {"^[a-c]+", "x", "abcdabc", "xdabc"}, - {"[a-c]+$", "x", "abcdabc", "abcdx"}, - {"^[a-c]+$", "x", "abcdabc", "abcdabc"}, - {"^[a-c]+", "x", "abc", "x"}, - {"[a-c]+$", "x", "abc", "x"}, - {"^[a-c]+$", "x", "abc", "x"}, - {"^[a-c]+", "x", "dabce", "dabce"}, - {"[a-c]+$", "x", "dabce", "dabce"}, - {"^[a-c]+$", "x", "dabce", "dabce"}, - {"^[a-c]+", "x", "", ""}, - {"[a-c]+$", "x", "", ""}, - {"^[a-c]+$", "x", "", ""}, - - // Other cases. - {"abc", "def", "abcdefg", "defdefg"}, - {"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"}, - {"abc", "", "abcdabc", "d"}, - {"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"}, - {"abc", "d", "", ""}, - {"abc", "d", "abc", "d"}, - {".+", "x", "abc", "x"}, - {"[a-c]*", "x", "def", "xdxexfx"}, - {"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"}, - {"[a-c]*", "x", "abcbcdcdedef", "xxdxxdxexdxexfx"}, - {"(foo)*bar(s)", "\\1", "bars", ""}, -} - -type ReplaceFuncTest struct { - pattern string - replacement func(string) string - input, output string -} - -var replaceFuncTests = []ReplaceFuncTest{ - {"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"}, - {"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"}, - {"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcyxydxyexyfxy"}, -} - -func TestReplaceAll(t *testing.T) { - for _, tc := range replaceTests { - re, err := Compile(tc.pattern) - - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) - continue - } - - actual := re.ReplaceAllString(tc.input, tc.replacement) - - if actual != tc.output { - t.Errorf("%q.Replace(%q,%q) = %q; want %q", - tc.pattern, tc.input, tc.replacement, actual, tc.output) - } - - // now try bytes - - actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement))) - if actual != tc.output { - t.Errorf("%q.Replace(%q,%q) = %q; want %q", - tc.pattern, tc.input, tc.replacement, actual, tc.output) - } - - } -} - -func TestReplaceAllFunc(t *testing.T) { - for _, tc := range replaceFuncTests { - re, err := Compile(tc.pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) - continue - } - actual := re.ReplaceAllStringFunc(tc.input, tc.replacement) - if actual != tc.output { - t.Errorf("%q.ReplaceFunc(%q,%q) = %q; want %q", - tc.pattern, tc.input, tc.replacement, actual, tc.output) - } - // now try bytes - actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) })) - if actual != tc.output { - t.Errorf("%q.ReplaceFunc(%q,%q) = %q; want %q", - tc.pattern, tc.input, tc.replacement, actual, tc.output) - } - } -} - -/* -* "hallo".gsub(/h(.*)llo/, "e") - */ -func TestGsub1(t *testing.T) { - input := "hallo" - pattern := "h(.*)llo" - expected := "e" - re, err := Compile(pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", pattern, err) - return - } - actual := re.Gsub(input, "e") - if actual != expected { - t.Errorf("expected %q, actual %q\n", expected, actual) - } -} - -/* -* "hallo".gsub(/h(?.*)llo/, "\\k") - */ -func TestGsubNamedCapture1(t *testing.T) { - input := "hallo" - pattern := "h(?.*)llo" - expected := "a" - re, err := Compile(pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", pattern, err) - return - } - actual := re.Gsub(input, "\\k") - if actual != expected { - t.Errorf("expected %q, actual %q\n", expected, actual) - } -} - -/* -* "hallo".gsub(/h(?.*)ll(?.*)/, "\\k\\k\\k") - */ -func TestGsubNamedCapture2(t *testing.T) { - input := "hallo" - pattern := "h(?.*)ll(?.*)" - expected := "aoa" - re, err := Compile(pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", pattern, err) - return - } - actual := re.Gsub(input, "\\k\\k\\k") - if actual != expected { - t.Errorf("expected %q, actual %q\n", expected, actual) - } -} - -/* -* "hallo".gsub(/h(?.*)(l*)(?.*)/, "\\k\\k\\k\\1") - */ -func TestGsubNamedCapture3(t *testing.T) { - input := "hallo" - pattern := "h(?.*)(l*)(?.*)" - expected := "alloallo" - re, err := Compile(pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", pattern, err) - return - } - actual := re.Gsub(input, "\\k\\k\\k\\1") - if actual != expected { - t.Errorf("expected %q, actual %q\n", expected, actual) - } -} - -/* -* "hallo".gsub(/h(?.*)(l*)(?.*)/, "\\k\\k\\k\\1") - */ -func TestGsubNamedCapture4(t *testing.T) { - input := "The lamb was sure to go." - pattern := "(?[^\\s\\.]+)(?\\s)" - expected := "They lamby wasy surey toy go." - re, err := Compile(pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", pattern, err) - return - } - - actual := re.GsubFunc(input, func(_ string, captures map[string]string) string { - return captures["word"] + "y" + captures["white_space"] - }) - if actual != expected { - t.Errorf("expected %q, actual %q\n", expected, actual) - } - -} - -/* -* "hallo".gsub(/h(.*)llo/) { |match| -* "e" -* } - */ -func TestGsubFunc1(t *testing.T) { - input := "hallo" - pattern := "h(.*)llo" - expected := "e" - re, err := Compile(pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", pattern, err) - return - } - actual := re.GsubFunc(input, func(match string, captures map[string]string) string { - return "e" - }) - if actual != expected { - t.Errorf("expected %q, actual %q\n", expected, actual) - } -} - -/* -* @env = {} -* "hallo".gsub(/h(.*)llo/) { |match| -* $~.captures.each_with_index do |arg, index| -* @env["#{index + 1}"] = arg -* "abcd".gsub(/(d)/) do -* env["1"] -* end -* end -* } - */ -func TestGsubFunc2(t *testing.T) { - input := "hallo" - pattern := "h(.*)llo" - expected := "abca" - env := make(map[string]string) - re, err := Compile(pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", pattern, err) - return - } - actual := re.GsubFunc(input, func(_ string, captures map[string]string) string { - for name, capture := range captures { - env[name] = capture - } - re1 := MustCompile("(d)") - return re1.GsubFunc("abcd", func(_ string, captures2 map[string]string) string { - return env["1"] - }) - }) - if actual != expected { - t.Errorf("expected %q, actual %q\n", expected, actual) - } -} - -/* how to match $ as itself */ -func TestPattern1(t *testing.T) { - re := MustCompile(`b\$a`) - if !re.MatchString("b$a") { - t.Errorf("expect to match\n") - } - re = MustCompile("b\\$a") - if !re.MatchString("b$a") { - t.Errorf("expect to match 2\n") - } -} - -/* how to use $ as the end of line */ -func TestPattern2(t *testing.T) { - re := MustCompile("a$") - if !re.MatchString("a") { - t.Errorf("expect to match\n") - } - if re.MatchString("ab") { - t.Errorf("expect to mismatch\n") - } -} - -func TestCompileWithOption(t *testing.T) { - re := MustCompileWithOption("a$", ONIG_OPTION_IGNORECASE) - if !re.MatchString("A") { - t.Errorf("expect to match\n") - } - re = MustCompile("a$") - if re.MatchString("A") { - t.Errorf("expect to mismatch\n") - } - -} - -type MetaTest struct { - pattern, output, literal string - isLiteral bool -} - -var metaTests = []MetaTest{ - {``, ``, ``, true}, - {`foo`, `foo`, `foo`, true}, - {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator - {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators - {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[{\]}\\\|,<\.>/\?~`, `!@#`, false}, -} - -func TestQuoteMeta(t *testing.T) { - for _, tc := range metaTests { - // Verify that QuoteMeta returns the expected string. - quoted := QuoteMeta(tc.pattern) - if quoted != tc.output { - t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`", - tc.pattern, quoted, tc.output) - continue - } - - // Verify that the quoted string is in fact treated as expected - // by Compile -- i.e. that it matches the original, unquoted string. - if tc.pattern != "" { - re, err := Compile(quoted) - if err != nil { - t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err) - continue - } - src := "abc" + tc.pattern + "def" - repl := "xyz" - replaced := re.ReplaceAllString(src, repl) - expected := "abcxyzdef" - if replaced != expected { - t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`", - tc.pattern, src, repl, replaced, expected) - } - } - } -} - -/* - * LiteralPrefix is not supported by rubex - * -//LiteralPrefix -func TestLiteralPrefix(t *testing.T) { - for _, tc := range metaTests { - // Literal method needs to scan the pattern. - re := MustCompile(tc.pattern) - str, complete := re.LiteralPrefix() - if complete != tc.isLiteral { - t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral) - } - if str != tc.literal { - t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal) - } - } -} -*/ -type numSubexpCase struct { - input string - expected int -} - -var numSubexpCases = []numSubexpCase{ - {``, 0}, - {`.*`, 0}, - {`abba`, 0}, - {`ab(b)a`, 1}, - {`ab(.*)a`, 1}, - {`(.*)ab(.*)a`, 2}, - {`(.*)(ab)(.*)a`, 3}, - {`(.*)((a)b)(.*)a`, 4}, - {`(.*)(\(ab)(.*)a`, 3}, - {`(.*)(\(a\)b)(.*)a`, 3}, -} - -func TestNumSubexp(t *testing.T) { - for _, c := range numSubexpCases { - re := MustCompile(c.input) - n := re.NumSubexp() - if n != c.expected { - t.Errorf("NumSubexp for %q returned %d, expected %d", c.input, n, c.expected) - } - } -} - -func BenchmarkLiteral(b *testing.B) { - x := strings.Repeat("x", 50) + "y" - b.StopTimer() - re := MustCompile("y") - b.StartTimer() - for i := 0; i < b.N; i++ { - if !re.MatchString(x) { - println("no match!") - break - } - } -} - -func BenchmarkNotLiteral(b *testing.B) { - x := strings.Repeat("x", 50) + "y" - b.StopTimer() - re := MustCompile(".y") - b.StartTimer() - for i := 0; i < b.N; i++ { - if !re.MatchString(x) { - println("no match!") - break - } - } -} - -func BenchmarkMatchClass(b *testing.B) { - b.StopTimer() - x := strings.Repeat("xxxx", 20) + "w" - re := MustCompile("[abcdw]") - b.StartTimer() - for i := 0; i < b.N; i++ { - if !re.MatchString(x) { - println("no match!") - break - } - } -} - -func BenchmarkMatchClass_InRange(b *testing.B) { - b.StopTimer() - // 'b' is between 'a' and 'c', so the charclass - // range checking is no help here. - x := strings.Repeat("bbbb", 20) + "c" - re := MustCompile("[ac]") - b.StartTimer() - for i := 0; i < b.N; i++ { - if !re.MatchString(x) { - println("no match!") - break - } - } -} - -func BenchmarkReplaceAll(b *testing.B) { - x := "abcdefghijklmnopqrstuvwxyz" - b.StopTimer() - re := MustCompile("[cjrw]") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.ReplaceAllString(x, "") - } -} - -func BenchmarkFindAllStringSubmatchIndex(b *testing.B) { - x := "abcdefghijklmnopqrstuvwxyz" - b.StopTimer() - re := MustCompile("[cjrw]") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.FindAllStringSubmatchIndex(x, 0) - } -} - -func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) { - b.StopTimer() - x := []byte("abcdefghijklmnopqrstuvwxyz") - re := MustCompile("^zbc(d|e)") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) { - b.StopTimer() - x := []byte("abcdefghijklmnopqrstuvwxyz") - for i := 0; i < 15; i++ { - x = append(x, x...) - } - re := MustCompile("^zbc(d|e)") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkAnchoredShortMatch(b *testing.B) { - b.StopTimer() - x := []byte("abcdefghijklmnopqrstuvwxyz") - re := MustCompile("^.bc(d|e)") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkAnchoredLongMatch(b *testing.B) { - b.StopTimer() - x := []byte("abcdefghijklmnopqrstuvwxyz") - for i := 0; i < 15; i++ { - x = append(x, x...) - } - re := MustCompile("^.bc(d|e)") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} diff --git a/vendor/github.com/moovweb/rubex/bstorm/reg.go b/vendor/github.com/moovweb/rubex/bstorm/reg.go deleted file mode 100644 index 0b2cec94c..000000000 --- a/vendor/github.com/moovweb/rubex/bstorm/reg.go +++ /dev/null @@ -1,135 +0,0 @@ -// Comparing the speeds of the golang native regex library and rubex. -// The numbers show a dramatic difference, with rubex being nearly 400 -// times slower than the native go libraries. Unfortunately for us, -// the native go libraries have a different regex behavior than rubex, -// so we'll have to hack at it a bit to fit our needs if we decide to use it. -// (which we should, I mean, come on, 400 times faster? That's mad wins.) - -package main - -import re "rubex" -import "time" -import "regexp" -import "runtime" -import "os" -import "strconv" -import "sync" - -var mu sync.Mutex -var count = 0 -var re1 []Matcher -var re2 []Matcher -const NUM = 100 -const NNN = 1000 -const CCC = 100000 -var STR = "abcdabc" - -type Matcher interface { - MatchString(string) bool -} - -type Task struct { - str string - m Matcher - t time.Time -} - -var TaskChann chan *Task - -func init() { - re1 = make([]Matcher, NUM) - re2 = make([]Matcher, NUM) - for i := 0; i < NUM; i ++ { - re1[i] = regexp.MustCompile("[a-c]*$") - re2[i] = re.MustCompile("[a-c]*$") - } - TaskChann = make(chan *Task, 100) - for i := 0; i < 10; i ++ { - STR += STR - } - println("len:", len(STR)) -} - -func render_pages(name string, marray []Matcher, num_routines, num_renders int) { - for i := 0; i < num_routines; i++ { - m := marray[i] - go func () { - runtime.LockOSThread() - for j := 0; j < num_renders; j++ { - var totalDuration int64 = 0 - for i := 0; i < NNN; i++ { - t := time.Now() - mu.Lock() - if count > CCC { - mu.Unlock() - return - } - count += 1 - m.MatchString(STR) - mu.Unlock() - totalDuration += time.Since(t).Nanoseconds() - } - println(name + "-average: ", totalDuration/int64(1000*NNN), "us") - } - }() - } -} - -func render_pages2(name string, marray []Matcher, num_routines, num_renders int) { - go func() { - for i := 0; i < CCC; i ++ { - t := &Task{str: STR, m: marray[0], t: time.Now()} - TaskChann <- t - } - }() - for i := 0; i < num_routines; i++ { - m := marray[i] - go func () { - runtime.LockOSThread() - for j := 0; j < num_renders; j++ { - var totalDuration int64 = 0 - for i := 0; i < NNN; i++ { - task := <-TaskChann - m.MatchString(task.str) - totalDuration += time.Since(task.t).Nanoseconds() - } - println(name + "-average: ", totalDuration/int64(1000*NNN), "us") - } - }() - } -} - - - -func main() { - cpu, _ := strconv.Atoi(os.Args[1]) - lib := os.Args[2] - method := os.Args[3] - println("using CPUs:", cpu) - runtime.GOMAXPROCS(cpu) - num_routines := 6 - num_renders := 20 - - if method == "chan" { - if lib == "rubex" { - render_pages2("rubex", re2, num_routines, num_renders) - } else { - render_pages2("regexp", re1, num_routines, num_renders) - } - } else { - if lib == "rubex" { - render_pages("rubex", re2, num_routines, num_renders) - } else { - render_pages("regexp", re1, num_routines, num_renders) - } - - } - d, _ := time.ParseDuration("5s") - for i := 0; i < 100; i ++ { - println("goroutine:", runtime.NumGoroutine()) - time.Sleep(d) - - } - println ("Done") -} - diff --git a/vendor/github.com/moovweb/rubex/chelper.c b/vendor/github.com/moovweb/rubex/chelper.c deleted file mode 100644 index c11721648..000000000 --- a/vendor/github.com/moovweb/rubex/chelper.c +++ /dev/null @@ -1,184 +0,0 @@ -#include -#include -#include -#ifdef BENCHMARK_CHELP -#include -#endif -#include "chelper.h" - -int NewOnigRegex( char *pattern, int pattern_length, int option, - OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) { - int ret = ONIG_NORMAL; - int error_msg_len = 0; - - OnigUChar *pattern_start = (OnigUChar *) pattern; - OnigUChar *pattern_end = (OnigUChar *) (pattern + pattern_length); - - *error_info = (OnigErrorInfo *) malloc(sizeof(OnigErrorInfo)); - memset(*error_info, 0, sizeof(OnigErrorInfo)); - - *encoding = (void*)ONIG_ENCODING_UTF8; - - *error_buffer = (char*) malloc(ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char)); - - memset(*error_buffer, 0, ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char)); - - *region = onig_region_new(); - - ret = onig_new(regex, pattern_start, pattern_end, (OnigOptionType)(option), *encoding, OnigDefaultSyntax, *error_info); - - if (ret != ONIG_NORMAL) { - error_msg_len = onig_error_code_to_str((unsigned char*)(*error_buffer), ret, *error_info); - if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) { - error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1; - } - (*error_buffer)[error_msg_len] = '\0'; - } - return ret; -} - -int SearchOnigRegex( void *str, int str_length, int offset, int option, - OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) { - int ret = ONIG_MISMATCH; - int error_msg_len = 0; -#ifdef BENCHMARK_CHELP - struct timeval tim1, tim2; - long t; -#endif - - OnigUChar *str_start = (OnigUChar *) str; - OnigUChar *str_end = (OnigUChar *) (str_start + str_length); - OnigUChar *search_start = (OnigUChar *)(str_start + offset); - OnigUChar *search_end = str_end; - -#ifdef BENCHMARK_CHELP - gettimeofday(&tim1, NULL); -#endif - - ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option); - if (ret < 0 && error_buffer != NULL) { - error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info); - if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) { - error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1; - } - error_buffer[error_msg_len] = '\0'; - } - else if (captures != NULL) { - int i; - int count = 0; - for (i = 0; i < region->num_regs; i++) { - captures[2*count] = region->beg[i]; - captures[2*count+1] = region->end[i]; - count ++; - } - *numCaptures = count; - } - -#ifdef BENCHMARK_CHELP - gettimeofday(&tim2, NULL); - t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; - printf("%ld microseconds elapsed\n", t); -#endif - return ret; -} - -int MatchOnigRegex(void *str, int str_length, int offset, int option, - OnigRegex regex, OnigRegion *region) { - int ret = ONIG_MISMATCH; - int error_msg_len = 0; -#ifdef BENCHMARK_CHELP - struct timeval tim1, tim2; - long t; -#endif - - OnigUChar *str_start = (OnigUChar *) str; - OnigUChar *str_end = (OnigUChar *) (str_start + str_length); - OnigUChar *search_start = (OnigUChar *)(str_start + offset); - -#ifdef BENCHMARK_CHELP - gettimeofday(&tim1, NULL); -#endif - ret = onig_match(regex, str_start, str_end, search_start, region, option); -#ifdef BENCHMARK_CHELP - gettimeofday(&tim2, NULL); - t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; - printf("%ld microseconds elapsed\n", t); -#endif - return ret; -} - -int LookupOnigCaptureByName(char *name, int name_length, - OnigRegex regex, OnigRegion *region) { - int ret = ONIGERR_UNDEFINED_NAME_REFERENCE; -#ifdef BENCHMARK_CHELP - struct timeval tim1, tim2; - long t; -#endif - OnigUChar *name_start = (OnigUChar *) name; - OnigUChar *name_end = (OnigUChar *) (name_start + name_length); -#ifdef BENCHMARK_CHELP - gettimeofday(&tim1, NULL); -#endif - ret = onig_name_to_backref_number(regex, name_start, name_end, region); -#ifdef BENCHMARK_CHELP - gettimeofday(&tim2, NULL); - t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; - printf("%ld microseconds elapsed\n", t); -#endif - return ret; -} - -typedef struct { - char *nameBuffer; - int bufferOffset; - int bufferSize; - int *numbers; - int numIndex; -} group_info_t; - -int name_callback(const UChar* name, const UChar* name_end, - int ngroup_num, int* group_nums, - regex_t* reg, void* arg) -{ - int nameLen, offset, newOffset; - group_info_t *groupInfo; - - groupInfo = (group_info_t*) arg; - offset = groupInfo->bufferOffset; - nameLen = name_end - name; - newOffset = offset + nameLen; - - //if there are already names, add a ";" - if (offset > 0) { - newOffset += 1; - } - - if (newOffset <= groupInfo->bufferSize) { - if (offset > 0) { - groupInfo->nameBuffer[offset] = ';'; - offset += 1; - } - strncpy(&groupInfo->nameBuffer[offset], name, nameLen); - } - groupInfo->bufferOffset = newOffset; - if (ngroup_num > 0) { - groupInfo->numbers[groupInfo->numIndex] = group_nums[ngroup_num-1]; - } else { - groupInfo->numbers[groupInfo->numIndex] = -1; - } - groupInfo->numIndex += 1; - return 0; /* 0: continue */ -} - -int GetCaptureNames(OnigRegex reg, void *buffer, int bufferSize, int* groupNumbers) { - int ret; - group_info_t groupInfo; - groupInfo.nameBuffer = (char*)buffer; - groupInfo.bufferOffset = 0; - groupInfo.bufferSize = bufferSize; - groupInfo.numbers = groupNumbers; - groupInfo.numIndex = 0; - onig_foreach_name(reg, name_callback, (void* )&groupInfo); - return groupInfo.bufferOffset; -} - diff --git a/vendor/github.com/moovweb/rubex/chelper.h b/vendor/github.com/moovweb/rubex/chelper.h deleted file mode 100644 index 7926fc23c..000000000 --- a/vendor/github.com/moovweb/rubex/chelper.h +++ /dev/null @@ -1,14 +0,0 @@ -#include - -extern int NewOnigRegex( char *pattern, int pattern_length, int option, - OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer); - -extern int SearchOnigRegex( void *str, int str_length, int offset, int option, - OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures); - -extern int MatchOnigRegex( void *str, int str_length, int offset, int option, - OnigRegex regex, OnigRegion *region); - -extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex, OnigRegion *region); - -extern int GetCaptureNames(OnigRegex regex, void *buffer, int bufferSize, int* groupNumbers); diff --git a/vendor/github.com/moovweb/rubex/constants.go b/vendor/github.com/moovweb/rubex/constants.go deleted file mode 100644 index afd8da86e..000000000 --- a/vendor/github.com/moovweb/rubex/constants.go +++ /dev/null @@ -1,27 +0,0 @@ -package rubex - -const ( - ONIG_OPTION_DEFAULT = ONIG_OPTION_NONE - /* options */ - ONIG_OPTION_NONE = 0 - ONIG_OPTION_IGNORECASE = 1 - ONIG_OPTION_EXTEND = (ONIG_OPTION_IGNORECASE << 1) - ONIG_OPTION_MULTILINE = (ONIG_OPTION_EXTEND << 1) - ONIG_OPTION_SINGLELINE = (ONIG_OPTION_MULTILINE << 1) - ONIG_OPTION_FIND_LONGEST = (ONIG_OPTION_SINGLELINE << 1) - ONIG_OPTION_FIND_NOT_EMPTY = (ONIG_OPTION_FIND_LONGEST << 1) - ONIG_OPTION_NEGATE_SINGLELINE = (ONIG_OPTION_FIND_NOT_EMPTY << 1) - ONIG_OPTION_DONT_CAPTURE_GROUP = (ONIG_OPTION_NEGATE_SINGLELINE << 1) - ONIG_OPTION_CAPTURE_GROUP = (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) - /* options (search time) */ - ONIG_OPTION_NOTBOL = (ONIG_OPTION_CAPTURE_GROUP << 1) - ONIG_OPTION_NOTEOL = (ONIG_OPTION_NOTBOL << 1) - ONIG_OPTION_POSIX_REGION = (ONIG_OPTION_NOTEOL << 1) - ONIG_OPTION_MAXBIT = ONIG_OPTION_POSIX_REGION /* limit */ - - ONIG_NORMAL = 0 - ONIG_MISMATCH = -1 - - ONIG_MISMATCH_STR = "mismatch" - ONIGERR_UNDEFINED_NAME_REFERENCE = -217 -) diff --git a/vendor/github.com/moovweb/rubex/find_test.go b/vendor/github.com/moovweb/rubex/find_test.go deleted file mode 100644 index b2986e8f3..000000000 --- a/vendor/github.com/moovweb/rubex/find_test.go +++ /dev/null @@ -1,497 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package rubex - -import ( - "fmt" - "strings" - "testing" -) - -// For each pattern/text pair, what is the expected output of each function? -// We can derive the textual results from the indexed results, the non-submatch -// results from the submatched results, the single results from the 'all' results, -// and the byte results from the string results. Therefore the table includes -// only the FindAllStringSubmatchIndex result. -type FindTest struct { - pat string - text string - matches [][]int -} - -func (t FindTest) String() string { - return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text) -} - -var findTests = []FindTest{ - {``, ``, build(1, 0, 0)}, - {`^abcdefg`, "abcdefg", build(1, 0, 7)}, - {`a+`, "baaab", build(1, 1, 4)}, - {"abcd..", "abcdef", build(1, 0, 6)}, - {`a`, "a", build(1, 0, 1)}, - {`x`, "y", nil}, - {`b`, "abc", build(1, 1, 2)}, - {`.`, "a", build(1, 0, 1)}, - {`.*`, "abcdef", build(2, 0, 6, 6, 6)}, - {`^`, "abcde", build(1, 0, 0)}, - {`$`, "abcde", build(1, 5, 5)}, - {`^abcd$`, "abcd", build(1, 0, 4)}, - {`^bcd'`, "abcdef", nil}, - {`^abcd$`, "abcde", nil}, - {`a+`, "baaab", build(1, 1, 4)}, - {`a*`, "baaab", build(4, 0, 0, 1, 4, 4, 4, 5, 5)}, - {`[a-z]+`, "abcd", build(1, 0, 4)}, - {`[^a-z]+`, "ab1234cd", build(1, 2, 6)}, - {`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)}, - {`[^\n]+`, "abcd\n", build(1, 0, 4)}, - {`[日本語]+`, "日本語日本語", build(1, 0, 18)}, - {`日本語+`, "日本語", build(1, 0, 9)}, - {`a*`, "日本語", build(4, 0, 0, 3, 3, 6, 6, 9, 9)}, - {`日本語+`, "日本語語語語", build(1, 0, 18)}, - {`()`, "", build(1, 0, 0, 0, 0)}, - {`(a)`, "a", build(1, 0, 1, 0, 1)}, - {`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)}, - {`(.*)`, "", build(1, 0, 0, 0, 0)}, - {`(.*)`, "abcd", build(2, 0, 4, 0, 4, 4, 4, 4, 4)}, - {`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)}, - {`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)}, - {`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)}, - {`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)}, - {"\a\b\f\n\r\t\v", "\a\b\f\n\r\t\v", build(1, 0, 7)}, - {`[\a\b\f\n\r\t\v]+`, "\a\b\f\n\r\t\v", build(1, 0, 7)}, - - //{`a*(|(b))c*`, "aacc", build(2, 0, 4, 4, 4)}, - {`(.*).*`, "ab", build(2, 0, 2, 0, 2, 2, 2, 2, 2)}, - {`[.]`, ".", build(1, 0, 1)}, - {`/$`, "/abc/", build(1, 4, 5)}, - {`/$`, "/abc", nil}, - - // multiple matches - {`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)}, - {`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)}, - {`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)}, - {`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)}, - {`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)}, - - // fixed bugs - {`ab$`, "cab", build(1, 1, 3)}, - {`axxb$`, "axxcb", nil}, - {`data`, "daXY data", build(1, 5, 9)}, - {`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)}, - {`zx+`, "zzx", build(1, 1, 3)}, - - // can backslash-escape any punctuation - {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`, - `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, - {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`, - `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, - {"\\`", "`", build(1, 0, 1)}, - {"[\\`]+", "`", build(1, 0, 1)}, - - // long set of matches (longer than startSize) - { - ".", - "qwertyuiopasdfghjklzxcvbnm1234567890", - build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, - 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, - 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, - 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36), - }, -} - -// build is a helper to construct a [][]int by extracting n sequences from x. -// This represents n matches with len(x)/n submatches each. -func build(n int, x ...int) [][]int { - ret := make([][]int, n) - runLength := len(x) / n - j := 0 - for i := range ret { - ret[i] = make([]int, runLength) - copy(ret[i], x[j:]) - j += runLength - if j > len(x) { - panic("invalid build entry") - } - } - return ret -} - -// First the simple cases. - -func TestFind(t *testing.T) { - for _, test := range findTests { - re := MustCompile(test.pat) - if re.String() != test.pat { - t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat) - } - result := re.Find([]byte(test.text)) - switch { - case len(test.matches) == 0 && len(result) == 0: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - expect := test.text[test.matches[0][0]:test.matches[0][1]] - if expect != string(result) { - t.Errorf("expected %q got %q: %s", expect, result, test) - } - } - } -} - -func TestFindString(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindString(test.text) - switch { - case len(test.matches) == 0 && len(result) == 0: - // ok - case test.matches == nil && result != "": - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == "": - // Tricky because an empty result has two meanings: no match or empty match. - if test.matches[0][0] != test.matches[0][1] { - t.Errorf("expected match; got none: %s", test) - } - case test.matches != nil && result != "": - expect := test.text[test.matches[0][0]:test.matches[0][1]] - if expect != result { - t.Errorf("expected %q got %q: %s", expect, result, test) - } - } - } -} - -func testFindIndex(test *FindTest, result []int, t *testing.T) { - switch { - case len(test.matches) == 0 && len(result) == 0: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - expect := test.matches[0] - if expect[0] != result[0] || expect[1] != result[1] { - t.Errorf("expected %v got %v: %s", expect, result, test) - } - } -} - -func TestFindIndex(t *testing.T) { - for _, test := range findTests { - testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t) - } -} - -func TestFindStringIndex(t *testing.T) { - for _, test := range findTests { - testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t) - } -} - -func TestFindStringContentType(t *testing.T) { - pattern := `text/(.*);\s*charset\s*=\s*(.*)` - regex := MustCompile(pattern) - - data1 := "text/html; charset=utf8" - data2 := "text/;charset=iso-8859-1" - data3 := "image/png" - matches := regex.FindStringSubmatch(data1) - if matches[1] != "html" || matches[2] != "utf8" { - t.Errorf("does not match content-type 1") - } - matches = regex.FindStringSubmatch(data2) - if matches[1] != "" || matches[2] != "iso-8859-1" { - println(matches[1]) - println(matches[2]) - t.Errorf("does not match content-type 2") - } - matches = regex.FindStringSubmatch(data3) - if len(matches) != 0 { - t.Errorf("does not match content-type 3") - } -} - - -func TestFindReaderIndex(t *testing.T) { - for _, test := range findTests { - testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t) - } -} - -// Now come the simple All cases. - -func TestFindAll(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindAll([]byte(test.text), -1) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - if len(test.matches) != len(result) { - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - continue - } - for k, e := range test.matches { - expect := test.text[e[0]:e[1]] - if expect != string(result[k]) { - t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test) - } - } - } - } -} - -func TestFindAllString(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindAllString(test.text, -1) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - if len(test.matches) != len(result) { - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - continue - } - for k, e := range test.matches { - expect := test.text[e[0]:e[1]] - if expect != result[k] { - t.Errorf("expected %q got %q: %s", expect, result, test) - } - } - } - } -} - -func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) { - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - if len(test.matches) != len(result) { - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - return - } - for k, e := range test.matches { - if e[0] != result[k][0] || e[1] != result[k][1] { - t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test) - } - } - } -} - -func TestFindAllIndex(t *testing.T) { - for _, test := range findTests { - testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t) - } -} - -func TestFindAllStringIndex(t *testing.T) { - for _, test := range findTests { - testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t) - } -} - -// Now come the Submatch cases. - -func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) { - if len(submatches) != len(result)*2 { - t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) - return - } - for k := 0; k < len(submatches); k += 2 { - if submatches[k] == -1 { - if result[k/2] != nil { - t.Errorf("match %d: expected nil got %q: %s", n, result, test) - } - continue - } - expect := test.text[submatches[k]:submatches[k+1]] - if expect != string(result[k/2]) { - t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) - return - } - } -} - -func TestFindSubmatch(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindSubmatch([]byte(test.text)) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - testSubmatchBytes(&test, 0, test.matches[0], result, t) - } - } -} - -func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) { - if len(submatches) != len(result)*2 { - t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) - return - } - for k := 0; k < len(submatches); k += 2 { - if submatches[k] == -1 { - if result[k/2] != "" { - t.Errorf("match %d: expected nil got %q: %s", n, result, test) - } - continue - } - expect := test.text[submatches[k]:submatches[k+1]] - if expect != result[k/2] { - t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) - return - } - } -} - -func TestFindStringSubmatch(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindStringSubmatch(test.text) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - testSubmatchString(&test, 0, test.matches[0], result, t) - } - } -} - -func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) { - if len(expect) != len(result) { - t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test) - return - } - for k, e := range expect { - if e != result[k] { - t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test) - } - } -} - -func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) { - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - testSubmatchIndices(test, 0, test.matches[0], result, t) - } -} - -func TestFindSubmatchIndex(t *testing.T) { - for _, test := range findTests { - testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t) - } -} - -func TestFindStringSubmatchIndex(t *testing.T) { - for _, test := range findTests { - testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t) - } -} - -func TestFindReaderSubmatchIndex(t *testing.T) { - for _, test := range findTests { - testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t) - } -} - -// Now come the monster AllSubmatch cases. - -func TestFindAllSubmatch(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case len(test.matches) != len(result): - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - case test.matches != nil && result != nil: - for k, match := range test.matches { - testSubmatchBytes(&test, k, match, result[k], t) - } - } - } -} - -func TestFindAllStringSubmatch(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case len(test.matches) != len(result): - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - case test.matches != nil && result != nil: - for k, match := range test.matches { - testSubmatchString(&test, k, match, result[k], t) - } - } - } -} - -func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) { - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case len(test.matches) != len(result): - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - case test.matches != nil && result != nil: - for k, match := range test.matches { - testSubmatchIndices(test, k, match, result[k], t) - } - } -} - -func TestFindAllSubmatchIndex(t *testing.T) { - for _, test := range findTests { - testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t) - } -} - -func TestFindAllStringSubmatchIndex(t *testing.T) { - for _, test := range findTests { - testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t) - } -} diff --git a/vendor/github.com/moovweb/rubex/quotemeta.go b/vendor/github.com/moovweb/rubex/quotemeta.go deleted file mode 100644 index 80be2bc74..000000000 --- a/vendor/github.com/moovweb/rubex/quotemeta.go +++ /dev/null @@ -1,36 +0,0 @@ -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package regexp implements a simple regular expression library. - -// QuoteMeta func is copied here to avoid linking the entire Regexp library. - -package rubex - -func special(c int) bool { - for _, r := range `\.+*?()|[]^$` { - if c == int(r) { - return true - } - } - return false -} - -// QuoteMeta returns a string that quotes all regular expression metacharacters -// inside the argument text; the returned string is a regular expression matching -// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`. -func QuoteMeta(s string) string { - b := make([]byte, 2*len(s)) - - // A byte loop is correct because all metacharacters are ASCII. - j := 0 - for i := 0; i < len(s); i++ { - if special(int(s[i])) { - b[j] = '\\' - j++ - } - b[j] = s[i] - j++ - } - return string(b[0:j]) -} diff --git a/vendor/github.com/moovweb/rubex/regex.go b/vendor/github.com/moovweb/rubex/regex.go deleted file mode 100644 index 9bfc0a06a..000000000 --- a/vendor/github.com/moovweb/rubex/regex.go +++ /dev/null @@ -1,650 +0,0 @@ -package rubex - -/* -#cgo CFLAGS: -I/usr/local/include -#cgo LDFLAGS: -L/usr/local/lib -lonig -#include -#include -#include "chelper.h" -*/ -import "C" - -import ( - "bytes" - "errors" - "fmt" - "io" - "log" - "runtime" - "strconv" - "sync" - "unicode/utf8" - "unsafe" -) - -type strRange []int - -const numMatchStartSize = 4 -const numReadBufferStartSize = 256 - -var mutex sync.Mutex - -type MatchData struct { - count int - indexes [][]int32 -} - -type NamedGroupInfo map[string]int - -type Regexp struct { - pattern string - regex C.OnigRegex - region *C.OnigRegion - encoding C.OnigEncoding - errorInfo *C.OnigErrorInfo - errorBuf *C.char - matchData *MatchData - namedGroupInfo NamedGroupInfo -} - -func NewRegexp(pattern string, option int) (re *Regexp, err error) { - re = &Regexp{pattern: pattern} - patternCharPtr := C.CString(pattern) - defer C.free(unsafe.Pointer(patternCharPtr)) - - mutex.Lock() - defer mutex.Unlock() - error_code := C.NewOnigRegex(patternCharPtr, C.int(len(pattern)), C.int(option), &re.regex, &re.region, &re.encoding, &re.errorInfo, &re.errorBuf) - if error_code != C.ONIG_NORMAL { - err = errors.New(C.GoString(re.errorBuf)) - } else { - err = nil - numCapturesInPattern := int(C.onig_number_of_captures(re.regex)) + 1 - re.matchData = &MatchData{} - re.matchData.indexes = make([][]int32, numMatchStartSize) - for i := 0; i < numMatchStartSize; i++ { - re.matchData.indexes[i] = make([]int32, numCapturesInPattern*2) - } - re.namedGroupInfo = re.getNamedGroupInfo() - runtime.SetFinalizer(re, (*Regexp).Free) - } - return re, err -} - -func Compile(str string) (*Regexp, error) { - return NewRegexp(str, ONIG_OPTION_DEFAULT) -} - -func MustCompile(str string) *Regexp { - regexp, error := NewRegexp(str, ONIG_OPTION_DEFAULT) - if error != nil { - panic("regexp: compiling " + str + ": " + error.Error()) - } - return regexp -} - -func CompileWithOption(str string, option int) (*Regexp, error) { - return NewRegexp(str, option) -} - -func MustCompileWithOption(str string, option int) *Regexp { - regexp, error := NewRegexp(str, option) - if error != nil { - panic("regexp: compiling " + str + ": " + error.Error()) - } - return regexp -} - -func (re *Regexp) Free() { - mutex.Lock() - if re.regex != nil { - C.onig_free(re.regex) - re.regex = nil - } - if re.region != nil { - C.onig_region_free(re.region, 1) - re.region = nil - } - mutex.Unlock() - if re.errorInfo != nil { - C.free(unsafe.Pointer(re.errorInfo)) - re.errorInfo = nil - } - if re.errorBuf != nil { - C.free(unsafe.Pointer(re.errorBuf)) - re.errorBuf = nil - } -} - -func (re *Regexp) getNamedGroupInfo() (namedGroupInfo NamedGroupInfo) { - numNamedGroups := int(C.onig_number_of_names(re.regex)) - //when any named capture exisits, there is no numbered capture even if there are unnamed captures - if numNamedGroups > 0 { - namedGroupInfo = make(map[string]int) - //try to get the names - bufferSize := len(re.pattern) * 2 - nameBuffer := make([]byte, bufferSize) - groupNumbers := make([]int32, numNamedGroups) - bufferPtr := unsafe.Pointer(&nameBuffer[0]) - numbersPtr := unsafe.Pointer(&groupNumbers[0]) - length := int(C.GetCaptureNames(re.regex, bufferPtr, (C.int)(bufferSize), (*C.int)(numbersPtr))) - if length > 0 { - namesAsBytes := bytes.Split(nameBuffer[:length], ([]byte)(";")) - if len(namesAsBytes) != numNamedGroups { - log.Fatalf("the number of named groups (%d) does not match the number names found (%d)\n", numNamedGroups, len(namesAsBytes)) - } - for i, nameAsBytes := range namesAsBytes { - name := string(nameAsBytes) - namedGroupInfo[name] = int(groupNumbers[i]) - } - } else { - log.Fatalf("could not get the capture group names from %q", re.String()) - } - } - return -} - -func (re *Regexp) groupNameToId(name string) (id int) { - if re.namedGroupInfo == nil { - id = ONIGERR_UNDEFINED_NAME_REFERENCE - } else { - id = re.namedGroupInfo[name] - } - return -} - -func (re *Regexp) processMatch(numCaptures int) (match []int32) { - if numCaptures <= 0 { - panic("cannot have 0 captures when processing a match") - } - matchData := re.matchData - return matchData.indexes[matchData.count][:numCaptures*2] -} - -func (re *Regexp) ClearMatchData() { - matchData := re.matchData - matchData.count = 0 -} - -func (re *Regexp) find(b []byte, n int, offset int) (match []int) { - if n == 0 { - b = []byte{0} - } - ptr := unsafe.Pointer(&b[0]) - matchData := re.matchData - capturesPtr := unsafe.Pointer(&(matchData.indexes[matchData.count][0])) - numCaptures := int32(0) - numCapturesPtr := unsafe.Pointer(&numCaptures) - pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr))) - if pos >= 0 { - if numCaptures <= 0 { - panic("cannot have 0 captures when processing a match") - } - match2 := matchData.indexes[matchData.count][:numCaptures*2] - match = make([]int, len(match2)) - for i := range match2 { - match[i] = int(match2[i]) - } - numCapturesInPattern := int32(C.onig_number_of_captures(re.regex)) + 1 - if numCapturesInPattern != numCaptures { - log.Fatalf("expected %d captures but got %d\n", numCapturesInPattern, numCaptures) - } - } - return -} - -func getCapture(b []byte, beg int, end int) []byte { - if beg < 0 || end < 0 { - return nil - } - return b[beg:end] -} - -func (re *Regexp) match(b []byte, n int, offset int) bool { - re.ClearMatchData() - if n == 0 { - b = []byte{0} - } - ptr := unsafe.Pointer(&b[0]) - pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(nil), (*C.int)(nil))) - return pos >= 0 -} - -func (re *Regexp) findAll(b []byte, n int) (matches [][]int) { - re.ClearMatchData() - - if n < 0 { - n = len(b) - } - matchData := re.matchData - offset := 0 - for offset <= n { - if matchData.count >= len(matchData.indexes) { - length := len(matchData.indexes[0]) - matchData.indexes = append(matchData.indexes, make([]int32, length)) - } - if match := re.find(b, n, offset); len(match) > 0 { - matchData.count += 1 - //move offset to the ending index of the current match and prepare to find the next non-overlapping match - offset = match[1] - //if match[0] == match[1], it means the current match does not advance the search. we need to exit the loop to avoid getting stuck here. - if match[0] == match[1] { - if offset < n && offset >= 0 { - //there are more bytes, so move offset by a word - _, width := utf8.DecodeRune(b[offset:]) - offset += width - } else { - //search is over, exit loop - break - } - } - } else { - break - } - } - matches2 := matchData.indexes[:matchData.count] - matches = make([][]int, len(matches2)) - for i, v := range matches2 { - matches[i] = make([]int, len(v)) - for j, v2 := range v { - matches[i][j] = int(v2) - } - } - return -} - -func (re *Regexp) FindIndex(b []byte) []int { - re.ClearMatchData() - match := re.find(b, len(b), 0) - if len(match) == 0 { - return nil - } - return match[:2] -} - -func (re *Regexp) Find(b []byte) []byte { - loc := re.FindIndex(b) - if loc == nil { - return nil - } - return getCapture(b, loc[0], loc[1]) -} - -func (re *Regexp) FindString(s string) string { - b := []byte(s) - mb := re.Find(b) - if mb == nil { - return "" - } - return string(mb) -} - -func (re *Regexp) FindStringIndex(s string) []int { - b := []byte(s) - return re.FindIndex(b) -} - -func (re *Regexp) FindAllIndex(b []byte, n int) [][]int { - matches := re.findAll(b, n) - if len(matches) == 0 { - return nil - } - return matches -} - -func (re *Regexp) FindAll(b []byte, n int) [][]byte { - matches := re.FindAllIndex(b, n) - if matches == nil { - return nil - } - matchBytes := make([][]byte, 0, len(matches)) - for _, match := range matches { - matchBytes = append(matchBytes, getCapture(b, match[0], match[1])) - } - return matchBytes -} - -func (re *Regexp) FindAllString(s string, n int) []string { - b := []byte(s) - matches := re.FindAllIndex(b, n) - if matches == nil { - return nil - } - matchStrings := make([]string, 0, len(matches)) - for _, match := range matches { - m := getCapture(b, match[0], match[1]) - if m == nil { - matchStrings = append(matchStrings, "") - } else { - matchStrings = append(matchStrings, string(m)) - } - } - return matchStrings - -} - -func (re *Regexp) FindAllStringIndex(s string, n int) [][]int { - b := []byte(s) - return re.FindAllIndex(b, n) -} - -func (re *Regexp) findSubmatchIndex(b []byte) (match []int) { - re.ClearMatchData() - match = re.find(b, len(b), 0) - return -} - -func (re *Regexp) FindSubmatchIndex(b []byte) []int { - match := re.findSubmatchIndex(b) - if len(match) == 0 { - return nil - } - return match -} - -func (re *Regexp) FindSubmatch(b []byte) [][]byte { - match := re.findSubmatchIndex(b) - if match == nil { - return nil - } - length := len(match) / 2 - if length == 0 { - return nil - } - results := make([][]byte, 0, length) - for i := 0; i < length; i++ { - results = append(results, getCapture(b, match[2*i], match[2*i+1])) - } - return results -} - -func (re *Regexp) FindStringSubmatch(s string) []string { - b := []byte(s) - match := re.findSubmatchIndex(b) - if match == nil { - return nil - } - length := len(match) / 2 - if length == 0 { - return nil - } - - results := make([]string, 0, length) - for i := 0; i < length; i++ { - cap := getCapture(b, match[2*i], match[2*i+1]) - if cap == nil { - results = append(results, "") - } else { - results = append(results, string(cap)) - } - } - return results -} - -func (re *Regexp) FindStringSubmatchIndex(s string) []int { - b := []byte(s) - return re.FindSubmatchIndex(b) -} - -func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int { - matches := re.findAll(b, n) - if len(matches) == 0 { - return nil - } - return matches -} - -func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { - matches := re.findAll(b, n) - if len(matches) == 0 { - return nil - } - allCapturedBytes := make([][][]byte, 0, len(matches)) - for _, match := range matches { - length := len(match) / 2 - capturedBytes := make([][]byte, 0, length) - for i := 0; i < length; i++ { - capturedBytes = append(capturedBytes, getCapture(b, match[2*i], match[2*i+1])) - } - allCapturedBytes = append(allCapturedBytes, capturedBytes) - } - - return allCapturedBytes -} - -func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string { - b := []byte(s) - matches := re.findAll(b, n) - if len(matches) == 0 { - return nil - } - allCapturedStrings := make([][]string, 0, len(matches)) - for _, match := range matches { - length := len(match) / 2 - capturedStrings := make([]string, 0, length) - for i := 0; i < length; i++ { - cap := getCapture(b, match[2*i], match[2*i+1]) - if cap == nil { - capturedStrings = append(capturedStrings, "") - } else { - capturedStrings = append(capturedStrings, string(cap)) - } - } - allCapturedStrings = append(allCapturedStrings, capturedStrings) - } - return allCapturedStrings -} - -func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int { - b := []byte(s) - return re.FindAllSubmatchIndex(b, n) -} - -func (re *Regexp) Match(b []byte) bool { - return re.match(b, len(b), 0) -} - -func (re *Regexp) MatchString(s string) bool { - b := []byte(s) - return re.Match(b) -} - -func (re *Regexp) NumSubexp() int { - return (int)(C.onig_number_of_captures(re.regex)) -} - -func (re *Regexp) getNamedCapture(name []byte, capturedBytes [][]byte) []byte { - nameStr := string(name) - capNum := re.groupNameToId(nameStr) - if capNum < 0 || capNum >= len(capturedBytes) { - panic(fmt.Sprintf("capture group name (%q) has error\n", nameStr)) - } - return capturedBytes[capNum] -} - -func (re *Regexp) getNumberedCapture(num int, capturedBytes [][]byte) []byte { - //when named capture groups exist, numbered capture groups returns "" - if re.namedGroupInfo == nil && num <= (len(capturedBytes)-1) && num >= 0 { - return capturedBytes[num] - } - return ([]byte)("") -} - -func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) []byte { - replLen := len(repl) - newRepl := make([]byte, 0, replLen*3) - inEscapeMode := false - inGroupNameMode := false - groupName := make([]byte, 0, replLen) - for index := 0; index < replLen; index += 1 { - ch := repl[index] - if inGroupNameMode && ch == byte('<') { - } else if inGroupNameMode && ch == byte('>') { - inGroupNameMode = false - groupNameStr := string(groupName) - capBytes := capturedBytes[groupNameStr] - newRepl = append(newRepl, capBytes...) - groupName = groupName[:0] //reset the name - } else if inGroupNameMode { - groupName = append(groupName, ch) - } else if inEscapeMode && ch <= byte('9') && byte('1') <= ch { - capNumStr := string(ch) - capBytes := capturedBytes[capNumStr] - newRepl = append(newRepl, capBytes...) - } else if inEscapeMode && ch == byte('k') && (index+1) < replLen && repl[index+1] == byte('<') { - inGroupNameMode = true - inEscapeMode = false - index += 1 //bypass the next char '<' - } else if inEscapeMode { - newRepl = append(newRepl, '\\') - newRepl = append(newRepl, ch) - } else if ch != '\\' { - newRepl = append(newRepl, ch) - } - if ch == byte('\\') || inEscapeMode { - inEscapeMode = !inEscapeMode - } - } - return newRepl -} - -func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map[string][]byte) []byte) []byte { - srcLen := len(src) - matches := re.findAll(src, srcLen) - if len(matches) == 0 { - return src - } - dest := make([]byte, 0, srcLen) - for i, match := range matches { - length := len(match) / 2 - capturedBytes := make(map[string][]byte) - if re.namedGroupInfo == nil { - for j := 0; j < length; j++ { - capturedBytes[strconv.Itoa(j)] = getCapture(src, match[2*j], match[2*j+1]) - } - } else { - for name, j := range re.namedGroupInfo { - capturedBytes[name] = getCapture(src, match[2*j], match[2*j+1]) - } - } - matchBytes := getCapture(src, match[0], match[1]) - newRepl := replFunc(repl, matchBytes, capturedBytes) - prevEnd := 0 - if i > 0 { - prevMatch := matches[i-1][:2] - prevEnd = prevMatch[1] - } - if match[0] > prevEnd && prevEnd >= 0 && match[0] <= srcLen { - dest = append(dest, src[prevEnd:match[0]]...) - } - dest = append(dest, newRepl...) - } - lastEnd := matches[len(matches)-1][1] - if lastEnd < srcLen && lastEnd >= 0 { - dest = append(dest, src[lastEnd:]...) - } - return dest -} - -func (re *Regexp) ReplaceAll(src, repl []byte) []byte { - return re.replaceAll(src, repl, fillCapturedValues) -} - -func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { - return re.replaceAll(src, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { - return repl(matchBytes) - }) -} - -func (re *Regexp) ReplaceAllString(src, repl string) string { - return string(re.ReplaceAll([]byte(src), []byte(repl))) -} - -func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { - srcB := []byte(src) - destB := re.replaceAll(srcB, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { - return []byte(repl(string(matchBytes))) - }) - return string(destB) -} - -func (re *Regexp) String() string { - return re.pattern -} - -func grow_buffer(b []byte, offset int, n int) []byte { - if offset+n > cap(b) { - buf := make([]byte, 2*cap(b)+n) - copy(buf, b[:offset]) - return buf - } - return b -} - -func fromReader(r io.RuneReader) []byte { - b := make([]byte, numReadBufferStartSize) - offset := 0 - var err error = nil - for err == nil { - rune, runeWidth, err := r.ReadRune() - if err == nil { - b = grow_buffer(b, offset, runeWidth) - writeWidth := utf8.EncodeRune(b[offset:], rune) - if runeWidth != writeWidth { - panic("reading rune width not equal to the written rune width") - } - offset += writeWidth - } else { - break - } - } - return b[:offset] -} - -func (re *Regexp) FindReaderIndex(r io.RuneReader) []int { - b := fromReader(r) - return re.FindIndex(b) -} - -func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { - b := fromReader(r) - return re.FindSubmatchIndex(b) -} - -func (re *Regexp) MatchReader(r io.RuneReader) bool { - b := fromReader(r) - return re.Match(b) -} - -func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { - //no easy way to implement this - return "", false -} - -func MatchString(pattern string, s string) (matched bool, error error) { - re, err := Compile(pattern) - if err != nil { - return false, err - } - return re.MatchString(s), nil -} - -func (re *Regexp) Gsub(src, repl string) string { - srcBytes := ([]byte)(src) - replBytes := ([]byte)(repl) - replaced := re.replaceAll(srcBytes, replBytes, fillCapturedValues) - return string(replaced) -} - -func (re *Regexp) GsubFunc(src string, replFunc func(string, map[string]string) string) string { - srcBytes := ([]byte)(src) - replaced := re.replaceAll(srcBytes, nil, func(_ []byte, matchBytes []byte, capturedBytes map[string][]byte) []byte { - capturedStrings := make(map[string]string) - for name, capBytes := range capturedBytes { - capturedStrings[name] = string(capBytes) - } - matchString := string(matchBytes) - return ([]byte)(replFunc(matchString, capturedStrings)) - }) - return string(replaced) -} diff --git a/vendor/github.com/pbnjay/memory/LICENSE b/vendor/github.com/pbnjay/memory/LICENSE new file mode 100644 index 000000000..63ca4a6d2 --- /dev/null +++ b/vendor/github.com/pbnjay/memory/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2017, Jeremy Jay +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/pbnjay/memory/README.md b/vendor/github.com/pbnjay/memory/README.md new file mode 100644 index 000000000..07534906b --- /dev/null +++ b/vendor/github.com/pbnjay/memory/README.md @@ -0,0 +1,40 @@ +# memory + +Package `memory` provides a single method reporting total physical system memory +accessible to the kernel. It does not account for memory used by other processes. + +This package has no external dependency beside the standard library. + +Documentation: +[![GoDoc](https://godoc.org/github.com/pbnjay/memory?status.svg)](https://godoc.org/github.com/pbnjay/memory) + +This is useful for dynamic code to minimize thrashing and other contention, similar to the stdlib `runtime.NumCPU` +See some history of the proposal at https://github.com/golang/go/issues/21816 + + +## Example + +```go +fmt.Printf("Total system memory: %d\n", memory.TotalMemory()) +``` + + +## Testing + +Tested/working on: + - macOS 10.12.6 (16G29) + - Windows 10 1511 (10586.1045) + - Linux RHEL (3.10.0-327.3.1.el7.x86_64) + - Raspberry Pi 3 (ARMv8) on Raspbian, ODROID-C1+ (ARMv7) on Ubuntu, C.H.I.P + (ARMv7). + +Tested on virtual machines: + - Windows 7 SP1 386 + - Debian stretch 386 + - NetBSD 7.1 amd64 + 386 + - OpenBSD 6.1 amd64 + 386 + - FreeBSD 11.1 amd64 + 386 + - DragonFly BSD 4.8.1 amd64 + +If you have access to untested systems (notably arm) please +test and file bugs if necessary. diff --git a/vendor/github.com/pbnjay/memory/doc.go b/vendor/github.com/pbnjay/memory/doc.go new file mode 100644 index 000000000..f1944bf8f --- /dev/null +++ b/vendor/github.com/pbnjay/memory/doc.go @@ -0,0 +1,14 @@ +// Package memory provides a single method reporting total system memory +// accessible to the kernel. +package memory + +// TotalMemory returns the total accessible system memory in bytes. +// +// The total accessible memory is installed physical memory size minus reserved +// areas for the kernel and hardware, if such reservations are reported by +// the operating system. +// +// If accessible memory size could not be determined, then 0 is returned. +func TotalMemory() uint64 { + return sysTotalMemory() +} diff --git a/vendor/github.com/pbnjay/memory/example_test.go b/vendor/github.com/pbnjay/memory/example_test.go new file mode 100644 index 000000000..a24a16bcc --- /dev/null +++ b/vendor/github.com/pbnjay/memory/example_test.go @@ -0,0 +1,11 @@ +package memory_test + +import ( + "fmt" + + "github.com/pbnjay/memory" +) + +func ExampleTotalMemory() { + fmt.Printf("Total system memory: %d\n", memory.TotalMemory()) +} diff --git a/vendor/github.com/pbnjay/memory/memory_bsd.go b/vendor/github.com/pbnjay/memory/memory_bsd.go new file mode 100644 index 000000000..22eb93bd0 --- /dev/null +++ b/vendor/github.com/pbnjay/memory/memory_bsd.go @@ -0,0 +1,11 @@ +// +build freebsd openbsd dragonfly netbsd + +package memory + +func sysTotalMemory() uint64 { + s, err := sysctlUint64("hw.physmem") + if err != nil { + return 0 + } + return s +} diff --git a/vendor/github.com/pbnjay/memory/memory_darwin.go b/vendor/github.com/pbnjay/memory/memory_darwin.go new file mode 100644 index 000000000..55096952f --- /dev/null +++ b/vendor/github.com/pbnjay/memory/memory_darwin.go @@ -0,0 +1,11 @@ +// +build darwin + +package memory + +func sysTotalMemory() uint64 { + s, err := sysctlUint64("hw.memsize") + if err != nil { + return 0 + } + return s +} diff --git a/vendor/github.com/pbnjay/memory/memory_linux.go b/vendor/github.com/pbnjay/memory/memory_linux.go new file mode 100644 index 000000000..bcc6017a9 --- /dev/null +++ b/vendor/github.com/pbnjay/memory/memory_linux.go @@ -0,0 +1,17 @@ +// +build linux + +package memory + +import "syscall" + +func sysTotalMemory() uint64 { + in := &syscall.Sysinfo_t{} + err := syscall.Sysinfo(in) + if err != nil { + return 0 + } + // If this is a 32-bit system, then these fields are + // uint32 instead of uint64. + // So we always convert to uint64 to match signature. + return uint64(in.Totalram) * uint64(in.Unit) +} diff --git a/vendor/github.com/pbnjay/memory/memory_test.go b/vendor/github.com/pbnjay/memory/memory_test.go new file mode 100644 index 000000000..9c910e6cd --- /dev/null +++ b/vendor/github.com/pbnjay/memory/memory_test.go @@ -0,0 +1,9 @@ +package memory + +import "testing" + +func TestNonZero(t *testing.T) { + if TotalMemory() == 0 { + t.Fatal("TotalMemory returned 0") + } +} diff --git a/vendor/github.com/pbnjay/memory/memory_windows.go b/vendor/github.com/pbnjay/memory/memory_windows.go new file mode 100644 index 000000000..6d77edbb4 --- /dev/null +++ b/vendor/github.com/pbnjay/memory/memory_windows.go @@ -0,0 +1,38 @@ +// +build windows + +package memory + +import ( + "syscall" + "unsafe" +) + +// omitting a few fields for brevity... +// https://msdn.microsoft.com/en-us/library/windows/desktop/aa366589(v=vs.85).aspx +type memStatusEx struct { + dwLength uint32 + dwMemoryLoad uint32 + ullTotalPhys uint64 + unused [6]uint64 +} + +func sysTotalMemory() uint64 { + kernel32, err := syscall.LoadDLL("kernel32.dll") + if err != nil { + return 0 + } + // GetPhysicallyInstalledSystemMemory is simpler, but broken on + // older versions of windows (and uses this under the hood anyway). + globalMemoryStatusEx, err := kernel32.FindProc("GlobalMemoryStatusEx") + if err != nil { + return 0 + } + msx := &memStatusEx{ + dwLength: 64, + } + r, _, _ := globalMemoryStatusEx.Call(uintptr(unsafe.Pointer(msx))) + if r == 0 { + return 0 + } + return msx.ullTotalPhys +} diff --git a/vendor/github.com/pbnjay/memory/memsysctl.go b/vendor/github.com/pbnjay/memory/memsysctl.go new file mode 100644 index 000000000..438d9eff8 --- /dev/null +++ b/vendor/github.com/pbnjay/memory/memsysctl.go @@ -0,0 +1,21 @@ +// +build darwin freebsd openbsd dragonfly netbsd + +package memory + +import ( + "syscall" + "unsafe" +) + +func sysctlUint64(name string) (uint64, error) { + s, err := syscall.Sysctl(name) + if err != nil { + return 0, err + } + // hack because the string conversion above drops a \0 + b := []byte(s) + if len(b) < 8 { + b = append(b, 0) + } + return *(*uint64)(unsafe.Pointer(&b[0])), nil +} diff --git a/vendor/github.com/pbnjay/memory/stub.go b/vendor/github.com/pbnjay/memory/stub.go new file mode 100644 index 000000000..790dca642 --- /dev/null +++ b/vendor/github.com/pbnjay/memory/stub.go @@ -0,0 +1,7 @@ +// +build !linux,!darwin,!windows,!freebsd,!dragonfly,!netbsd,!openbsd + +package memory + +func sysTotalMemory() uint64 { + return 0 +} diff --git a/vendor/gopkg.in/src-d/go-mysql-server.v0/sql/plan/innerjoin.go b/vendor/gopkg.in/src-d/go-mysql-server.v0/sql/plan/innerjoin.go index 5645ade09..1ca2696c6 100644 --- a/vendor/gopkg.in/src-d/go-mysql-server.v0/sql/plan/innerjoin.go +++ b/vendor/gopkg.in/src-d/go-mysql-server.v0/sql/plan/innerjoin.go @@ -4,15 +4,52 @@ import ( "io" "os" "reflect" + "runtime" + "strconv" + "strings" opentracing "github.com/opentracing/opentracing-go" + "github.com/pbnjay/memory" + "github.com/sirupsen/logrus" "gopkg.in/src-d/go-mysql-server.v0/sql" ) -const experimentalInMemoryJoinKey = "EXPERIMENTAL_IN_MEMORY_JOIN" -const inMemoryJoinSessionVar = "inmemory_joins" +const ( + experimentalInMemoryJoinKey = "EXPERIMENTAL_IN_MEMORY_JOIN" + maxMemoryJoinKey = "MAX_MEMORY_INNER_JOIN" + inMemoryJoinSessionVar = "inmemory_joins" + memoryThresholdSessionVar = "max_memory_joins" +) + +var ( + useInMemoryJoins = shouldUseMemoryJoinsByEnv() + // One fifth of the total physical memory available on the OS (ignoring the + // memory used by other processes). + defaultMemoryThreshold = memory.TotalMemory() / 5 + // Maximum amount of memory the gitbase server can have in use before + // considering all inner joins should be done using multipass mode. + maxMemoryJoin = loadMemoryThreshold() +) + +func shouldUseMemoryJoinsByEnv() bool { + v := strings.TrimSpace(strings.ToLower(os.Getenv(experimentalInMemoryJoinKey))) + return v == "on" || v == "1" +} + +func loadMemoryThreshold() uint64 { + v, ok := os.LookupEnv(maxMemoryJoinKey) + if !ok { + return defaultMemoryThreshold + } -var useInMemoryJoins = os.Getenv(experimentalInMemoryJoinKey) != "" + n, err := strconv.ParseUint(v, 10, 64) + if err != nil { + logrus.Warnf("invalid value %q given to %s environment variable", v, maxMemoryJoinKey) + return defaultMemoryThreshold + } + + return n +} // InnerJoin is an inner join between two tables. type InnerJoin struct { @@ -73,27 +110,17 @@ func (j *InnerJoin) RowIter(ctx *sql.Context) (sql.RowIter, error) { inMemorySession = true } - var iter sql.RowIter + var mode = unknownMode if useInMemoryJoins || inMemorySession { - r, err := j.Right.RowIter(ctx) - if err != nil { - span.Finish() - return nil, err - } + mode = memoryMode + } - iter = &innerJoinMemoryIter{ - l: l, - r: r, - ctx: ctx, - cond: j.Cond, - } - } else { - iter = &innerJoinIter{ - l: l, - rp: j.Right, - ctx: ctx, - cond: j.Cond, - } + iter := &innerJoinIter{ + l: l, + rp: j.Right, + ctx: ctx, + cond: j.Cond, + mode: mode, } return sql.NewSpanIter(span, iter), nil @@ -156,6 +183,25 @@ func (j *InnerJoin) TransformExpressions(f sql.TransformExprFunc) (sql.Node, err return NewInnerJoin(j.Left, j.Right, cond), nil } +// innerJoinMode defines the mode in which an inner join will be performed. +type innerJoinMode byte + +const ( + // unknownMode is the default mode. It will start iterating without really + // knowing in which mode it will end up computing the inner join. If it + // iterates the right side fully one time and so far it fits in memory, + // then it will switch to memory mode. Otherwise, if at some point during + // this first iteration it finds that it does not fit in memory, will + // switch to multipass mode. + unknownMode innerJoinMode = iota + // memoryMode computes all the inner join directly in memory iterating each + // side of the join exactly once. + memoryMode + // multipassMode computes the inner join by iterating the left side once, + // and the right side one time for each row in the left side. + multipassMode +) + type innerJoinIter struct { l sql.RowIter rp rowIterProvider @@ -164,118 +210,140 @@ type innerJoinIter struct { cond sql.Expression leftRow sql.Row -} -func (i *innerJoinIter) Next() (sql.Row, error) { - for { - if i.leftRow == nil { - r, err := i.l.Next() - if err != nil { - return nil, err - } + // used to compute in-memory + mode innerJoinMode + right []sql.Row + pos int +} - i.leftRow = r +func (i *innerJoinIter) loadLeft() error { + if i.leftRow == nil { + r, err := i.l.Next() + if err != nil { + return err } - if i.r == nil { - iter, err := i.rp.RowIter(i.ctx) - if err != nil { - return nil, err - } + i.leftRow = r + } - i.r = iter - } + return nil +} - rightRow, err := i.r.Next() - if err == io.EOF { - i.r = nil - i.leftRow = nil - continue +func (i *innerJoinIter) loadRightInMemory() error { + iter, err := i.rp.RowIter(i.ctx) + if err != nil { + return err + } + + i.right, err = sql.RowIterToRows(iter) + if err != nil { + return err + } + + if len(i.right) == 0 { + return io.EOF + } + + return nil +} + +func (i *innerJoinIter) fitsInMemory() bool { + var maxMemory uint64 + _, v := i.ctx.Session.Get(memoryThresholdSessionVar) + if n, ok := v.(int64); ok { + maxMemory = uint64(n) + } else { + maxMemory = maxMemoryJoin + } + + if maxMemory <= 0 { + return true + } + + var ms runtime.MemStats + runtime.ReadMemStats(&ms) + + return (ms.HeapInuse + ms.StackInuse) < maxMemory +} + +func (i *innerJoinIter) loadRight() (row sql.Row, skip bool, err error) { + if i.mode == memoryMode { + if len(i.right) == 0 { + if err := i.loadRightInMemory(); err != nil { + return nil, false, err + } } - if err != nil { - return nil, err + if i.pos >= len(i.right) { + i.leftRow = nil + i.pos = 0 + return nil, true, nil } - var row = make(sql.Row, len(i.leftRow)+len(rightRow)) - copy(row, i.leftRow) - copy(row[len(i.leftRow):], rightRow) + row := i.right[i.pos] + i.pos++ + return row, false, nil + } - v, err := i.cond.Eval(i.ctx, row) + if i.r == nil { + iter, err := i.rp.RowIter(i.ctx) if err != nil { - return nil, err + return nil, false, err } - if v == true { - return row, nil - } + i.r = iter } -} -func (i *innerJoinIter) Close() error { - if err := i.l.Close(); err != nil { - if i.r != nil { - _ = i.r.Close() + rightRow, err := i.r.Next() + if err != nil { + if err == io.EOF { + i.r = nil + i.leftRow = nil + + // If we got to this point and the mode is still unknown it means + // the right side fits in memory, so the mode changes to memory + // inner join. + if i.mode == unknownMode { + i.mode = memoryMode + } + + return nil, true, nil } - return err + return nil, false, err } - if i.r != nil { - return i.r.Close() + if i.mode == unknownMode { + if !i.fitsInMemory() { + i.right = nil + i.mode = multipassMode + } else { + i.right = append(i.right, rightRow) + } } - return nil -} - -type innerJoinMemoryIter struct { - l sql.RowIter - r sql.RowIter - ctx *sql.Context - cond sql.Expression - pos int - leftRow sql.Row - right []sql.Row + return rightRow, false, err } -func (i *innerJoinMemoryIter) Next() (sql.Row, error) { +func (i *innerJoinIter) Next() (sql.Row, error) { for { - if i.leftRow == nil { - r, err := i.l.Next() - if err != nil { - return nil, err - } - - i.leftRow = r + if err := i.loadLeft(); err != nil { + return nil, err } - if i.r != nil { - for { - row, err := i.r.Next() - if err != nil { - if err == io.EOF { - break - } - return nil, err - } - - i.right = append(i.right, row) - } - i.r = nil + rightRow, skip, err := i.loadRight() + if err != nil { + return nil, err } - if i.pos >= len(i.right) { - i.pos = 0 - i.leftRow = nil + if skip { continue } - rightRow := i.right[i.pos] var row = make(sql.Row, len(i.leftRow)+len(rightRow)) copy(row, i.leftRow) copy(row[len(i.leftRow):], rightRow) - i.pos++ - v, err := i.cond.Eval(i.ctx, row) if err != nil { return nil, err @@ -287,7 +355,7 @@ func (i *innerJoinMemoryIter) Next() (sql.Row, error) { } } -func (i *innerJoinMemoryIter) Close() error { +func (i *innerJoinIter) Close() error { if err := i.l.Close(); err != nil { if i.r != nil { _ = i.r.Close() @@ -299,5 +367,7 @@ func (i *innerJoinMemoryIter) Close() error { return i.r.Close() } + i.right = nil + return nil }