diff --git a/README.md b/README.md index a32ccc6..07f945d 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,13 @@ ![Build Status](https://github.com/nscuro/fdnssearch/workflows/Continuous%20Integration/badge.svg?branch=master) -**Disclaimer**: You can do most of what *fdnssearch* does with [`bash`, `curl`, `pigz`, `jq` and GNU `parallel`](https://github.com/rapid7/sonar/wiki/Forward-DNS). -This is nothing revolutionary, I made this because I prefer simple commands over wonky shell scripts. +*Swiftly search [FDNS](ttps://github.com/rapid7/sonar/wiki/Forward-DNS) datasets from Rapid7 Open Data* + +**Disclaimer**: You can do most of what *fdnssearch* does with [`bash`, `curl`, `pigz`, `jq` and GNU `parallel`](https://github.com/rapid7/sonar/wiki/Analyzing-Datasets). This is nothing revolutionary. ## Installation -`GO111MODULE=on go get -v github.com/nscuro/fdnssearch/cmd/fdnssearch` +`GO111MODULE=on go get -v github.com/nscuro/fdnssearch/...` Alternatively, clone this repo and run `make install`. Make sure `$GOPATH/bin` is in your `$PATH`. diff --git a/go.mod b/go.mod index 990211f..aa26b8a 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/spf13/cobra v1.0.0 github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/testify v1.6.1 + github.com/valyala/fastjson v1.5.3 golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect gopkg.in/ini.v1 v1.57.0 // indirect ) diff --git a/go.sum b/go.sum index 92d20eb..c9f4fff 100644 --- a/go.sum +++ b/go.sum @@ -120,6 +120,8 @@ github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/valyala/fastjson v1.5.3 h1:z4Z1Bll4WaXo+FXJoiCdW8ss7sKY2d/jYfE2ZzoT284= +github.com/valyala/fastjson v1.5.3/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= diff --git a/internal/search/search.go b/internal/search/search.go index 1b3233e..9c217b1 100644 --- a/internal/search/search.go +++ b/internal/search/search.go @@ -3,7 +3,6 @@ package search import ( "bufio" "context" - "encoding/json" "fmt" "io" "strings" @@ -11,16 +10,18 @@ import ( "github.com/nscuro/fdnssearch/internal/dataset" "github.com/panjf2000/ants" + "github.com/valyala/fastjson" ) type searchWorkerContext struct { - chunk string - domains *[]string - exclusions *[]string - types *[]string - resultsChan chan<- dataset.Entry - errorsChan chan<- error - waitGroup *sync.WaitGroup + chunk string + domains *[]string + exclusions *[]string + types *[]string + jsonParserPool *fastjson.ParserPool + resultsChan chan<- dataset.Entry + errorsChan chan<- error + waitGroup *sync.WaitGroup } func searchWorker(workerCtx interface{}) { @@ -32,69 +33,94 @@ func searchWorker(workerCtx interface{}) { } defer ctx.waitGroup.Done() + entry, err := filter(ctx.chunk, ctx.types, ctx.domains, ctx.exclusions, ctx.jsonParserPool) + if err != nil { + ctx.errorsChan <- err + return + } else if entry == nil { + return + } + + ctx.resultsChan <- *entry +} + +func filter(chunk string, types *[]string, domains *[]string, exclusions *[]string, jsonParserPool *fastjson.ParserPool) (*dataset.Entry, error) { // prevent the necessity to decode entries that definitely - // do not match the given search criteria. decoding json appears - // to be drastically more computationally expensive than this. + // do not match the given search criteria. decoding json is + // drastically more computationally expensive than this simple + // loop. possibleMatch := false - for _, domain := range *ctx.domains { - if strings.Contains(ctx.chunk, domain) { + for _, domain := range *domains { + if strings.Contains(chunk, domain) { possibleMatch = true break } } if !possibleMatch { - return + return nil, nil } - var entry dataset.Entry - if err := json.Unmarshal([]byte(ctx.chunk), &entry); err != nil { - ctx.errorsChan <- fmt.Errorf("failed to decode entry: %w", err) - return + jsonParser := jsonParserPool.Get() + parsedEntry, err := jsonParser.Parse(chunk) + if err != nil { + jsonParserPool.Put(jsonParser) + return nil, fmt.Errorf("failed to parse entry: %w", err) } + // parse everything we need in advance so jsonParser can + // be put back into the pool as fast as possible + entryName := string(parsedEntry.GetStringBytes("name")) + entryValue := string(parsedEntry.GetStringBytes("value")) + entryType := string(parsedEntry.GetStringBytes("type")) + jsonParserPool.Put(jsonParser) + // filter by type - if len(*ctx.types) > 0 { + if len(*types) > 0 { found := false - for _, ttype := range *ctx.types { - if entry.Type == ttype { + for _, ttype := range *types { + if entryType == ttype { found = true break } } if !found { - return + return nil, nil } } // filter by domain - if len(*ctx.domains) > 0 { + if len(*domains) > 0 { found := false - for _, domain := range *ctx.domains { - if entry.Name == domain || strings.HasSuffix(entry.Name, "."+domain) { + for _, domain := range *domains { + if entryName == domain || strings.HasSuffix(entryName, "."+domain) { found = true break } } if !found { - return + return nil, nil } } // filter by exclusion - if len(*ctx.exclusions) > 0 { + if len(*exclusions) > 0 { found := false - for _, exclusion := range *ctx.exclusions { - if entry.Name == exclusion || strings.HasSuffix(entry.Name, "."+exclusion) { + for _, exclusion := range *exclusions { + if entryName == exclusion || strings.HasSuffix(entryName, "."+exclusion) { found = true break } } if found { - return + return nil, nil } } - ctx.resultsChan <- entry + return &dataset.Entry{ + Name: entryName, + Type: entryType, + Value: entryValue, + }, nil } type Options struct { @@ -105,7 +131,8 @@ type Options struct { } type Searcher struct { - workerCount int + workerCount int + jsonParserPool fastjson.ParserPool } func NewSearcher(workerCount int) *Searcher { @@ -135,6 +162,10 @@ func (s Searcher) Search(ctx context.Context, options Options) (<-chan dataset.E // wait group for search workers waitGroup := sync.WaitGroup{} + // pool for fastjson.Parser to encourage reusing + // of instances without causing race conditions + jsonParserPool := fastjson.ParserPool{} + scanner := bufio.NewScanner(options.DatasetReader) scanLoop: for scanner.Scan() { @@ -147,13 +178,14 @@ func (s Searcher) Search(ctx context.Context, options Options) (<-chan dataset.E waitGroup.Add(1) err = workerPool.Invoke(searchWorkerContext{ - chunk: scanner.Text(), - domains: &options.Domains, - exclusions: &options.Exclusions, - types: &options.Types, - resultsChan: resultsChan, - errorsChan: errorsChan, - waitGroup: &waitGroup, + chunk: scanner.Text(), + domains: &options.Domains, + exclusions: &options.Exclusions, + types: &options.Types, + jsonParserPool: &jsonParserPool, + resultsChan: resultsChan, + errorsChan: errorsChan, + waitGroup: &waitGroup, }) if err != nil { errorsChan <- fmt.Errorf("failed to submit chunk to worker pool: %w", err) diff --git a/internal/search/search_test.go b/internal/search/search_test.go index cb46866..d4395ae 100644 --- a/internal/search/search_test.go +++ b/internal/search/search_test.go @@ -6,6 +6,7 @@ import ( "github.com/nscuro/fdnssearch/internal/dataset" "github.com/stretchr/testify/assert" + "github.com/valyala/fastjson" ) func TestSearchWorker(t *testing.T) { @@ -18,13 +19,14 @@ func TestSearchWorker(t *testing.T) { waitGroup.Add(1) go searchWorker(searchWorkerContext{ - chunk: "{\"name\":\"acme.example.com\",\"value\":\"1.1.1.1\",\"type\":\"a\"}", - domains: &[]string{"example.com"}, - exclusions: &[]string{}, - types: &[]string{"a"}, - resultsChan: resultsChan, - errorsChan: errorsChan, - waitGroup: &waitGroup, + chunk: "{\"name\":\"acme.example.com\",\"value\":\"1.1.1.1\",\"type\":\"a\"}", + domains: &[]string{"example.com"}, + exclusions: &[]string{}, + types: &[]string{"a"}, + jsonParserPool: &fastjson.ParserPool{}, + resultsChan: resultsChan, + errorsChan: errorsChan, + waitGroup: &waitGroup, }) waitGroup.Wait() @@ -48,13 +50,14 @@ func TestSearchWorkerFilterByDomain(t *testing.T) { waitGroup.Add(1) go searchWorker(searchWorkerContext{ - chunk: "{\"name\":\"acme.example.com\",\"value\":\"1.1.1.1\",\"type\":\"a\"}", - domains: &[]string{"example.de"}, - exclusions: &[]string{}, - types: &[]string{}, - resultsChan: resultsChan, - errorsChan: errorsChan, - waitGroup: &waitGroup, + chunk: "{\"name\":\"acme.example.com\",\"value\":\"1.1.1.1\",\"type\":\"a\"}", + domains: &[]string{"example.de"}, + exclusions: &[]string{}, + types: &[]string{}, + jsonParserPool: &fastjson.ParserPool{}, + resultsChan: resultsChan, + errorsChan: errorsChan, + waitGroup: &waitGroup, }) waitGroup.Wait() @@ -73,13 +76,14 @@ func TestSearchWorkerFilterByType(t *testing.T) { waitGroup.Add(1) go searchWorker(searchWorkerContext{ - chunk: "{\"name\":\"acme.example.com\",\"value\":\"1.1.1.1\",\"type\":\"a\"}", - domains: &[]string{"example.com"}, - exclusions: &[]string{}, - types: &[]string{"aaaa"}, - resultsChan: resultsChan, - errorsChan: errorsChan, - waitGroup: &waitGroup, + chunk: "{\"name\":\"acme.example.com\",\"value\":\"1.1.1.1\",\"type\":\"a\"}", + domains: &[]string{"example.com"}, + exclusions: &[]string{}, + types: &[]string{"aaaa"}, + jsonParserPool: &fastjson.ParserPool{}, + resultsChan: resultsChan, + errorsChan: errorsChan, + waitGroup: &waitGroup, }) waitGroup.Wait() @@ -98,13 +102,14 @@ func TestSearchWorkerFilterByExclusion(t *testing.T) { waitGroup.Add(1) go searchWorker(searchWorkerContext{ - chunk: "{\"name\":\"acme.example.com\",\"value\":\"1.1.1.1\",\"type\":\"a\"}", - domains: &[]string{"example.com"}, - exclusions: &[]string{"acme.example.com"}, - types: &[]string{}, - resultsChan: resultsChan, - errorsChan: errorsChan, - waitGroup: &waitGroup, + chunk: "{\"name\":\"acme.example.com\",\"value\":\"1.1.1.1\",\"type\":\"a\"}", + domains: &[]string{"example.com"}, + exclusions: &[]string{"acme.example.com"}, + types: &[]string{}, + jsonParserPool: &fastjson.ParserPool{}, + resultsChan: resultsChan, + errorsChan: errorsChan, + waitGroup: &waitGroup, }) waitGroup.Wait() @@ -123,13 +128,14 @@ func TestSearchWorkerError(t *testing.T) { waitGroup.Add(1) go searchWorker(searchWorkerContext{ - chunk: "invalidJsonThatContainsexample.com", - domains: &[]string{"example.com"}, - exclusions: &[]string{}, - types: &[]string{}, - resultsChan: resultsChan, - errorsChan: errorsChan, - waitGroup: &waitGroup, + chunk: "invalidJsonThatContainsexample.com", + domains: &[]string{"example.com"}, + exclusions: &[]string{}, + types: &[]string{}, + jsonParserPool: &fastjson.ParserPool{}, + resultsChan: resultsChan, + errorsChan: errorsChan, + waitGroup: &waitGroup, }) waitGroup.Wait() @@ -137,3 +143,58 @@ func TestSearchWorkerError(t *testing.T) { assert.Len(t, resultsChan, 0) assert.Len(t, errorsChan, 1) } + +func BenchmarkFilterForPositiveMatch(b *testing.B) { + domains := []string{"example.com"} + exclusions := []string{} + types := []string{"a"} + jsonParserPool := fastjson.ParserPool{} + + for i := 0; i < b.N; i++ { + filter(`{"name":"acme.example.com","value":"1.1.1.1","type":"a"}`, &types, &domains, &exclusions, &jsonParserPool) + } +} + +func BenchmarkFilterForDefinitiveMismatch(b *testing.B) { + domains := []string{"somethingelse.com"} + exclusions := []string{} + types := []string{} + jsonParserPool := fastjson.ParserPool{} + + for i := 0; i < b.N; i++ { + filter(`{"name":"acme.example.com","value":"1.1.1.1","type":"a"}`, &types, &domains, &exclusions, &jsonParserPool) + } +} + +func BenchmarkFilterForDomainMismatch(b *testing.B) { + domains := []string{"example.com"} + exclusions := []string{} + types := []string{"cname"} + jsonParserPool := fastjson.ParserPool{} + + for i := 0; i < b.N; i++ { + filter(`{"name":"acme.com","value":"other.example.com","type":"cname"}`, &types, &domains, &exclusions, &jsonParserPool) + } +} + +func BenchmarkFilterForTypeMismatch(b *testing.B) { + domains := []string{"example.com"} + exclusions := []string{} + types := []string{"cname"} + jsonParserPool := fastjson.ParserPool{} + + for i := 0; i < b.N; i++ { + filter(`{"name":"acme.example.com","value":"1.1.1.1","type":"a"}`, &types, &domains, &exclusions, &jsonParserPool) + } +} + +func BenchmarkFilterForExclusionMatch(b *testing.B) { + domains := []string{"example.com"} + exclusions := []string{"acme.example.com"} + types := []string{"a"} + jsonParserPool := fastjson.ParserPool{} + + for i := 0; i < b.N; i++ { + filter(`{"name":"acme.example.com","value":"1.1.1.1","type":"a"}`, &types, &domains, &exclusions, &jsonParserPool) + } +}