Skip to content
This repository has been archived by the owner on Dec 4, 2022. It is now read-only.

Commit

Permalink
use fastjson for json decoding; add basic benchmarks
Browse files Browse the repository at this point in the history
based on benchmarks with the old and the new approach, filtering is now up to 4x faster than before.
  • Loading branch information
nscuro committed Jul 25, 2020
1 parent 1758feb commit 6fdd393
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 76 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@

![Build Status](https://github.com/nscuro/fdnssearch/workflows/Continuous%20Integration/badge.svg?branch=master)

**Disclaimer**: You can do most of what *fdnssearch* does with [`bash`, `curl`, `pigz`, `jq` and GNU `parallel`](https://github.com/rapid7/sonar/wiki/Forward-DNS).
This is nothing revolutionary, I made this because I prefer simple commands over wonky shell scripts.
*Swiftly search [FDNS](ttps://github.com/rapid7/sonar/wiki/Forward-DNS) datasets from Rapid7 Open Data*

**Disclaimer**: You can do most of what *fdnssearch* does with [`bash`, `curl`, `pigz`, `jq` and GNU `parallel`](https://github.com/rapid7/sonar/wiki/Analyzing-Datasets). This is nothing revolutionary.

## Installation

`GO111MODULE=on go get -v github.com/nscuro/fdnssearch/cmd/fdnssearch`
`GO111MODULE=on go get -v github.com/nscuro/fdnssearch/...`

Alternatively, clone this repo and run `make install`. Make sure `$GOPATH/bin` is in your `$PATH`.

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ require (
github.com/spf13/cobra v1.0.0
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.6.1
github.com/valyala/fastjson v1.5.3
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect
gopkg.in/ini.v1 v1.57.0 // indirect
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
github.com/valyala/fastjson v1.5.3 h1:z4Z1Bll4WaXo+FXJoiCdW8ss7sKY2d/jYfE2ZzoT284=
github.com/valyala/fastjson v1.5.3/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
Expand Down
108 changes: 70 additions & 38 deletions internal/search/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,25 @@ package search
import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"
"strings"
"sync"

"github.com/nscuro/fdnssearch/internal/dataset"
"github.com/panjf2000/ants"
"github.com/valyala/fastjson"
)

type searchWorkerContext struct {
chunk string
domains *[]string
exclusions *[]string
types *[]string
resultsChan chan<- dataset.Entry
errorsChan chan<- error
waitGroup *sync.WaitGroup
chunk string
domains *[]string
exclusions *[]string
types *[]string
jsonParserPool *fastjson.ParserPool
resultsChan chan<- dataset.Entry
errorsChan chan<- error
waitGroup *sync.WaitGroup
}

func searchWorker(workerCtx interface{}) {
Expand All @@ -32,69 +33,94 @@ func searchWorker(workerCtx interface{}) {
}
defer ctx.waitGroup.Done()

entry, err := filter(ctx.chunk, ctx.types, ctx.domains, ctx.exclusions, ctx.jsonParserPool)
if err != nil {
ctx.errorsChan <- err
return
} else if entry == nil {
return
}

ctx.resultsChan <- *entry
}

func filter(chunk string, types *[]string, domains *[]string, exclusions *[]string, jsonParserPool *fastjson.ParserPool) (*dataset.Entry, error) {
// prevent the necessity to decode entries that definitely
// do not match the given search criteria. decoding json appears
// to be drastically more computationally expensive than this.
// do not match the given search criteria. decoding json is
// drastically more computationally expensive than this simple
// loop.
possibleMatch := false
for _, domain := range *ctx.domains {
if strings.Contains(ctx.chunk, domain) {
for _, domain := range *domains {
if strings.Contains(chunk, domain) {
possibleMatch = true
break
}
}
if !possibleMatch {
return
return nil, nil
}

var entry dataset.Entry
if err := json.Unmarshal([]byte(ctx.chunk), &entry); err != nil {
ctx.errorsChan <- fmt.Errorf("failed to decode entry: %w", err)
return
jsonParser := jsonParserPool.Get()
parsedEntry, err := jsonParser.Parse(chunk)
if err != nil {
jsonParserPool.Put(jsonParser)
return nil, fmt.Errorf("failed to parse entry: %w", err)
}

// parse everything we need in advance so jsonParser can
// be put back into the pool as fast as possible
entryName := string(parsedEntry.GetStringBytes("name"))
entryValue := string(parsedEntry.GetStringBytes("value"))
entryType := string(parsedEntry.GetStringBytes("type"))
jsonParserPool.Put(jsonParser)

// filter by type
if len(*ctx.types) > 0 {
if len(*types) > 0 {
found := false
for _, ttype := range *ctx.types {
if entry.Type == ttype {
for _, ttype := range *types {
if entryType == ttype {
found = true
break
}
}
if !found {
return
return nil, nil
}
}

// filter by domain
if len(*ctx.domains) > 0 {
if len(*domains) > 0 {
found := false
for _, domain := range *ctx.domains {
if entry.Name == domain || strings.HasSuffix(entry.Name, "."+domain) {
for _, domain := range *domains {
if entryName == domain || strings.HasSuffix(entryName, "."+domain) {
found = true
break
}
}
if !found {
return
return nil, nil
}
}

// filter by exclusion
if len(*ctx.exclusions) > 0 {
if len(*exclusions) > 0 {
found := false
for _, exclusion := range *ctx.exclusions {
if entry.Name == exclusion || strings.HasSuffix(entry.Name, "."+exclusion) {
for _, exclusion := range *exclusions {
if entryName == exclusion || strings.HasSuffix(entryName, "."+exclusion) {
found = true
break
}
}
if found {
return
return nil, nil
}
}

ctx.resultsChan <- entry
return &dataset.Entry{
Name: entryName,
Type: entryType,
Value: entryValue,
}, nil
}

type Options struct {
Expand All @@ -105,7 +131,8 @@ type Options struct {
}

type Searcher struct {
workerCount int
workerCount int
jsonParserPool fastjson.ParserPool
}

func NewSearcher(workerCount int) *Searcher {
Expand Down Expand Up @@ -135,6 +162,10 @@ func (s Searcher) Search(ctx context.Context, options Options) (<-chan dataset.E
// wait group for search workers
waitGroup := sync.WaitGroup{}

// pool for fastjson.Parser to encourage reusing
// of instances without causing race conditions
jsonParserPool := fastjson.ParserPool{}

scanner := bufio.NewScanner(options.DatasetReader)
scanLoop:
for scanner.Scan() {
Expand All @@ -147,13 +178,14 @@ func (s Searcher) Search(ctx context.Context, options Options) (<-chan dataset.E

waitGroup.Add(1)
err = workerPool.Invoke(searchWorkerContext{
chunk: scanner.Text(),
domains: &options.Domains,
exclusions: &options.Exclusions,
types: &options.Types,
resultsChan: resultsChan,
errorsChan: errorsChan,
waitGroup: &waitGroup,
chunk: scanner.Text(),
domains: &options.Domains,
exclusions: &options.Exclusions,
types: &options.Types,
jsonParserPool: &jsonParserPool,
resultsChan: resultsChan,
errorsChan: errorsChan,
waitGroup: &waitGroup,
})
if err != nil {
errorsChan <- fmt.Errorf("failed to submit chunk to worker pool: %w", err)
Expand Down
Loading

0 comments on commit 6fdd393

Please sign in to comment.