Skip to content

Commit

Permalink
speedup phpgrep with concurrent workers
Browse files Browse the repository at this point in the history
Use 8 workers by default.
Number of workers controlled by -workers=N param.

Fixes #21

Signed-off-by: Iskander Sharipov <quasilyte@gmail.com>
  • Loading branch information
quasilyte committed Aug 14, 2019
1 parent bb8e691 commit 17f68a7
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 38 deletions.
6 changes: 5 additions & 1 deletion cmd/phpgrep/main.go
Expand Up @@ -21,6 +21,8 @@ type arguments struct {
target string
pattern string
filters []string

workers int
}

func main() {
Expand Down Expand Up @@ -54,7 +56,7 @@ func main() {
}
}

if len(p.matches) == 0 {
if p.matches == 0 {
os.Exit(exitNotMatched)
} else {
os.Exit(exitMatched)
Expand Down Expand Up @@ -98,6 +100,8 @@ Supported command-line flags:
`multiline mode: print matches without escaping newlines to \n`)
flag.BoolVar(&args.abs, "abs", false,
`print absolute filenames in the output`)
flag.IntVar(&args.workers, "workers", 8,
`set the number of concurrent workers`)

flag.Parse()

Expand Down
97 changes: 60 additions & 37 deletions cmd/phpgrep/program.go
Expand Up @@ -2,11 +2,11 @@ package main

import (
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"
"strings"
"sync"

"github.com/quasilyte/phpgrep"
)
Expand All @@ -20,13 +20,19 @@ type match struct {
type program struct {
args arguments

m *phpgrep.Matcher
workers []*worker
filters []phpgrep.Filter

matches []match
matches int
}

func (p *program) validateFlags() error {
if p.args.workers < 1 {
return fmt.Errorf("workers value can't be less than 1")
}
if p.args.workers > 512 {
// Users won't notice.
p.args.workers = 512
}
if p.args.target == "" {
return fmt.Errorf("target can't be empty")
}
Expand Down Expand Up @@ -54,25 +60,15 @@ func (p *program) compilePattern() error {
if err != nil {
return err
}
p.m = m
return nil
}

func (p *program) grepFile(filename string) error {
data, err := ioutil.ReadFile(filename)
if err != nil {
return fmt.Errorf("read target: %v", err)
p.workers = make([]*worker, p.args.workers)
for i := range p.workers {
p.workers[i] = &worker{
id: i,
m: m.Clone(),
}
}

p.m.Find(data, func(m *phpgrep.MatchData) bool {
p.matches = append(p.matches, match{
text: string(data[m.PosFrom:m.PosTo]),
filename: filename,
line: m.LineFrom,
})
return true
})

return nil
}

Expand All @@ -92,7 +88,30 @@ func (p *program) executePattern() error {
return false
}

return filepath.Walk(p.args.target, func(path string, info os.FileInfo, err error) error {
filenameQueue := make(chan string)

var wg sync.WaitGroup
wg.Add(len(p.workers))
defer func() {
close(filenameQueue)
wg.Wait()
}()
for _, w := range p.workers {
go func(w *worker) {
defer wg.Done()

for filename := range filenameQueue {
if p.args.verbose {
log.Printf("debug: worker#%d greps %q file", w.id, filename)
}
if err := w.grepFile(filename); err != nil {
log.Printf("error: execute pattern: %s: %v", filename, err)
}
}
}(w)
}

err := filepath.Walk(p.args.target, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
Expand All @@ -104,29 +123,33 @@ func (p *program) executePattern() error {
return nil
}

if p.args.verbose {
log.Printf("debug: grep %q file", path)
}
return p.grepFile(path)
filenameQueue <- path
return nil
})

return err
}

func (p *program) printResults() error {
// TODO(quasilyte): add JSON output format?
for _, m := range p.matches {
text := m.text
if !p.args.multiline {
text = strings.Replace(text, "\n", `\n`, -1)
}
filename := m.filename
if p.args.abs {
abs, err := filepath.Abs(filename)
if err != nil {
return fmt.Errorf("abs(%q): %v", m.filename, err)
for _, w := range p.workers {
for _, m := range w.matches {
p.matches++

text := m.text
if !p.args.multiline {
text = strings.Replace(text, "\n", `\n`, -1)
}
filename := m.filename
if p.args.abs {
abs, err := filepath.Abs(filename)
if err != nil {
return fmt.Errorf("abs(%q): %v", m.filename, err)
}
filename = abs
}
filename = abs
fmt.Printf("%s:%d: %s\n", filename, m.line, text)
}
fmt.Printf("%s:%d: %s\n", filename, m.line, text)
}

return nil
Expand Down
32 changes: 32 additions & 0 deletions cmd/phpgrep/worker.go
@@ -0,0 +1,32 @@
package main

import (
"fmt"
"io/ioutil"

"github.com/quasilyte/phpgrep"
)

type worker struct {
id int
m *phpgrep.Matcher
matches []match
}

func (w *worker) grepFile(filename string) error {
data, err := ioutil.ReadFile(filename)
if err != nil {
return fmt.Errorf("read target: %v", err)
}

w.m.Find(data, func(m *phpgrep.MatchData) bool {
w.matches = append(w.matches, match{
text: string(data[m.PosFrom:m.PosTo]),
filename: filename,
line: m.LineFrom,
})
return true
})

return nil
}
5 changes: 5 additions & 0 deletions phpgrep.go
Expand Up @@ -49,6 +49,11 @@ type MatchData struct {
PosTo int
}

// Clone returns a deep copy of m.
func (m *Matcher) Clone() *Matcher {
return &Matcher{m: m.m}
}

// Match reports whether given PHP code matches the bound pattern.
//
// For malformed inputs (like code with syntax errors), returns false.
Expand Down

0 comments on commit 17f68a7

Please sign in to comment.