Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add max file size support #20

Merged
merged 7 commits into from
May 30, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/alterx/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ func main() {
Payloads: cliOpts.Payloads,
Limit: cliOpts.Limit,
Enrich: cliOpts.Enrich, // enrich payloads
MaxSize: cliOpts.MaxSize,
}

if cliOpts.PermutationConfig != "" {
Expand Down
15 changes: 11 additions & 4 deletions dedupe.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package alterx

import "github.com/projectdiscovery/alterx/internal/dedupe"
import (
"github.com/projectdiscovery/alterx/internal/dedupe"
"github.com/syndtr/goleveldb/leveldb/filter"
"github.com/syndtr/goleveldb/leveldb/opt"
)

// MaxInMemoryDedupeSize (default : 100 MB)
var MaxInMemoryDedupeSize = 100 * 1024 * 1024
Expand Down Expand Up @@ -47,15 +51,18 @@ func (d *Dedupe) GetResults() <-chan string {

// NewDedupe returns a dedupe instance which removes all duplicates
// Note: If byteLen is not correct/specified alterx may consume lot of memory
func NewDedupe(ch <-chan string, byteLen int) *Dedupe {
func NewDedupe(ch <-chan string, byteLen, maxkeyLenInBytes int) *Dedupe {
d := &Dedupe{
receive: ch,
}
if byteLen <= MaxInMemoryDedupeSize {
d.backend = dedupe.NewMapBackend()
} else {
// gologger print a info message here
d.backend = dedupe.NewLevelDBBackend()
leveldbOpts := &opt.Options{
// BloomFilter reduces disk reads and improves key lookup performance
Filter: filter.NewBloomFilter(maxkeyLenInBytes * 8),
}
d.backend = dedupe.NewLevelDBBackend(leveldbOpts)
}
return d
}
11 changes: 7 additions & 4 deletions internal/dedupe/leveldb.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,34 @@ package dedupe
import (
"os"
"reflect"
"runtime/debug"
"unsafe"

"github.com/projectdiscovery/gologger"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/opt"
)

type LevelDBBackend struct {
storage *leveldb.DB
tempdir string
}

func NewLevelDBBackend() *LevelDBBackend {
func NewLevelDBBackend(leveldbOpts *opt.Options) *LevelDBBackend {
l := &LevelDBBackend{}
dbPath, err := os.MkdirTemp("", "nuclei-report-*")
dbPath, err := os.MkdirTemp("", "alterx-dedupe-*")
if err != nil {
gologger.Fatal().Msgf("failed to create temp dir for alterx dedupe got: %v", err)
}
l.tempdir = dbPath
l.storage, err = leveldb.OpenFile(dbPath, nil)
l.storage, err = leveldb.OpenFile(dbPath, leveldbOpts)
if err != nil {
if !errors.IsCorrupted(err) {
gologger.Fatal().Msgf("goleveldb: failed to open db got %v", err)
}
// If the metadata is corrupted, try to recover
l.storage, err = leveldb.RecoverFile(dbPath, nil)
l.storage, err = leveldb.RecoverFile(dbPath, leveldbOpts)
if err != nil {
gologger.Fatal().Msgf("goleveldb: corrupted db found, recovery failed got %v", err)
}
Expand All @@ -53,6 +55,7 @@ func (l *LevelDBBackend) Cleanup() {
if err := os.RemoveAll(l.tempdir); err != nil {
gologger.Error().Msgf("leveldb: cleanup got %v", err)
}
debug.FreeOSMemory()
}

// unsafeToBytes converts a string to byte slice and does it with
Expand Down
7 changes: 7 additions & 0 deletions internal/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package runner
import (
"fmt"
"io"
"math"
"os"
"strings"

Expand All @@ -26,6 +27,7 @@ type Options struct {
Silent bool
Enrich bool
Limit int
MaxSize int
// internal/unexported fields
wordlists goflags.RuntimeMap
}
Expand All @@ -44,6 +46,7 @@ func ParseFlags() *Options {
flagSet.CreateGroup("output", "Output",
flagSet.BoolVarP(&opts.Estimate, "estimate", "es", false, "estimate permutation count without generating payloads"),
flagSet.StringVarP(&opts.Output, "output", "o", "", "output file to write altered subdomain list"),
flagSet.IntVar(&opts.MaxSize, "max-size", math.MaxInt, "Max export data size (altered subdomain list will be truncated)"),
flagSet.BoolVarP(&opts.Verbose, "verbose", "v", false, "display verbose output"),
flagSet.BoolVar(&opts.Silent, "silent", false, "display results only"),
flagSet.CallbackVar(printVersion, "version", "display alterx version"),
Expand Down Expand Up @@ -71,6 +74,10 @@ func ParseFlags() *Options {
}
}

if opts.MaxSize < 0 {
gologger.Fatal().Msgf("max-size cannot be negative")
}

if opts.Silent {
gologger.DefaultLogger.SetMaxLevel(levels.LevelSilent)
} else if opts.Verbose {
Expand Down
31 changes: 27 additions & 4 deletions mutator.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ type Options struct {
// Enrich when true alterx extra possible words from input
// and adds them to default payloads word,number
Enrich bool
// MaxSize limits output data size
MaxSize int
}

// Mutator
Expand Down Expand Up @@ -93,14 +95,22 @@ func New(opts *Options) (*Mutator, error) {
// Execute calculates all permutations using input wordlist and patterns
// and writes them to a string channel
func (m *Mutator) Execute(ctx context.Context) <-chan string {
results := make(chan string, len(m.Options.Patterns))
var maxBytes int
var d *Dedupe
if DedupeResults {
count := m.EstimateCount()
maxBytes = count * m.maxkeyLenInBytes
d = NewDedupe(results, maxBytes, m.maxkeyLenInBytes)
}

results := make(chan string, len(m.Options.Patterns))
go func() {
defer func() {
close(results)
if d != nil && d.backend != nil {
d.backend.Cleanup()
}
}()
now := time.Now()
for _, v := range m.Inputs {
varMap := getSampleMap(v.GetMap(), m.Options.Payloads)
Expand All @@ -119,12 +129,10 @@ func (m *Mutator) Execute(ctx context.Context) <-chan string {
}
}
m.timeTaken = time.Since(now)
close(results)
}()

if DedupeResults {
// drain results
d := NewDedupe(results, maxBytes)
d.Drain()
return d.GetResults()
}
Expand All @@ -138,6 +146,7 @@ func (m *Mutator) ExecuteWithWriter(Writer io.Writer) error {
}
resChan := m.Execute(context.TODO())
m.payloadCount = 0
maxFileSize := m.Options.MaxSize
for {
value, ok := <-resChan
if !ok {
Expand All @@ -148,11 +157,25 @@ func (m *Mutator) ExecuteWithWriter(Writer io.Writer) error {
gologger.Info().Msgf("Generated %v permutations in %v", m.payloadCount, m.Time())
return nil
}
_, err := Writer.Write([]byte(value + "\n"))
outputData := []byte(value + "\n")
if len(outputData) > maxFileSize {
// truncate output data if it exceeds maxFileSize
outputData = outputData[:maxFileSize]
maxFileSize = 0
}

n, err := Writer.Write(outputData)
m.payloadCount++
if err != nil {
return err
}
// update maxFileSize limit after each write
maxFileSize -= n
if maxFileSize <= 0 {
gologger.Info().Msgf("MaxSize limit reached, truncating output")
gologger.Info().Msgf("Generated %v permutations in %v", m.payloadCount, m.Time())
return nil
}
}
}

Expand Down