diff --git a/pkg/protocols/http/httputils/chain.go b/pkg/protocols/http/httputils/chain.go deleted file mode 100644 index 6ff9097922..0000000000 --- a/pkg/protocols/http/httputils/chain.go +++ /dev/null @@ -1,169 +0,0 @@ -package httputils - -import ( - "bytes" - "fmt" - "net/http" - "sync" - - protoUtil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils" -) - -// use buffer pool for storing response body -// and reuse it for each request -var bufPool = sync.Pool{ - New: func() any { - // The Pool's New function should generally only return pointer - // types, since a pointer can be put into the return interface - // value without an allocation: - return new(bytes.Buffer) - }, -} - -// getBuffer returns a buffer from the pool -func getBuffer() *bytes.Buffer { - return bufPool.Get().(*bytes.Buffer) -} - -// putBuffer returns a buffer to the pool -func putBuffer(buf *bytes.Buffer) { - buf.Reset() - bufPool.Put(buf) -} - -// Performance Notes: -// do not use http.Response once we create ResponseChain from it -// as this reuses buffers and saves allocations and also drains response -// body automatically. -// In required cases it can be used but should never be used for anything -// related to response body. -// Bytes.Buffer returned by getters should not be used and are only meant for convinience -// purposes like .String() or .Bytes() calls. -// Remember to call Close() on ResponseChain once you are done with it. - -// ResponseChain is a response chain for a http request -// on every call to previous it returns the previous response -// if it was redirected. -type ResponseChain struct { - headers *bytes.Buffer - body *bytes.Buffer - fullResponse *bytes.Buffer - resp *http.Response - reloaded bool // if response was reloaded to its previous redirect -} - -// NewResponseChain creates a new response chain for a http request -// with a maximum body size. (if -1 stick to default 4MB) -func NewResponseChain(resp *http.Response, maxBody int64) *ResponseChain { - if _, ok := resp.Body.(protoUtil.LimitResponseBody); !ok { - resp.Body = protoUtil.NewLimitResponseBodyWithSize(resp.Body, maxBody) - } - return &ResponseChain{ - headers: getBuffer(), - body: getBuffer(), - fullResponse: getBuffer(), - resp: resp, - } -} - -// Response returns the current response in the chain -func (r *ResponseChain) Headers() *bytes.Buffer { - return r.headers -} - -// Body returns the current response body in the chain -func (r *ResponseChain) Body() *bytes.Buffer { - return r.body -} - -// FullResponse returns the current response in the chain -func (r *ResponseChain) FullResponse() *bytes.Buffer { - return r.fullResponse -} - -// previous updates response pointer to previous response -// if it was redirected and returns true else false -func (r *ResponseChain) Previous() bool { - if r.resp != nil && r.resp.Request != nil && r.resp.Request.Response != nil { - r.resp = r.resp.Request.Response - r.reloaded = true - return true - } - return false -} - -// Fill buffers -func (r *ResponseChain) Fill() error { - r.reset() - if r.resp == nil { - return fmt.Errorf("response is nil") - } - - // load headers - err := DumpResponseIntoBuffer(r.resp, false, r.headers) - if err != nil { - return fmt.Errorf("error dumping response headers: %s", err) - } - - if r.resp.StatusCode != http.StatusSwitchingProtocols && !r.reloaded { - // Note about reloaded: - // this is a known behaviour existing from earlier version - // when redirect is followed and operators are executed on all redirect chain - // body of those requests is not available since its already been redirected - // This is not a issue since redirect happens with empty body according to RFC - // but this may be required sometimes - // Solution: Manual redirect using dynamic matchers or hijack redirected responses - // at transport level at replace with bytes buffer and then use it - - // load body - err = readNNormalizeRespBody(r, r.body) - if err != nil { - return fmt.Errorf("error reading response body: %s", err) - } - - // response body should not be used anymore - // drain and close - DrainResponseBody(r.resp) - } - - // join headers and body - r.fullResponse.Write(r.headers.Bytes()) - r.fullResponse.Write(r.body.Bytes()) - return nil -} - -// Close the response chain and releases the buffers. -func (r *ResponseChain) Close() { - putBuffer(r.headers) - putBuffer(r.body) - putBuffer(r.fullResponse) - r.headers = nil - r.body = nil - r.fullResponse = nil -} - -// Has returns true if the response chain has a response -func (r *ResponseChain) Has() bool { - return r.resp != nil -} - -// Request is request of current response -func (r *ResponseChain) Request() *http.Request { - if r.resp == nil { - return nil - } - return r.resp.Request -} - -// Response is response of current response -func (r *ResponseChain) Response() *http.Response { - return r.resp -} - -// reset without releasing the buffers -// useful for redirect chain -func (r *ResponseChain) reset() { - r.headers.Reset() - r.body.Reset() - r.fullResponse.Reset() -} diff --git a/pkg/protocols/http/httputils/internal.go b/pkg/protocols/http/httputils/internal.go deleted file mode 100644 index 98f261328e..0000000000 --- a/pkg/protocols/http/httputils/internal.go +++ /dev/null @@ -1,47 +0,0 @@ -package httputils - -import ( - "bytes" - "errors" - "io" - "net/http" - "strings" -) - -// implementations copied from stdlib - -// errNoBody is a sentinel error value used by failureToReadBody so we -// can detect that the lack of body was intentional. -var errNoBody = errors.New("sentinel error value") - -// failureToReadBody is an io.ReadCloser that just returns errNoBody on -// Read. It's swapped in when we don't actually want to consume -// the body, but need a non-nil one, and want to distinguish the -// error from reading the dummy body. -type failureToReadBody struct{} - -func (failureToReadBody) Read([]byte) (int, error) { return 0, errNoBody } -func (failureToReadBody) Close() error { return nil } - -// emptyBody is an instance of empty reader. -var emptyBody = io.NopCloser(strings.NewReader("")) - -// drainBody reads all of b to memory and then returns two equivalent -// ReadClosers yielding the same bytes. -// -// It returns an error if the initial slurp of all bytes fails. It does not attempt -// to make the returned ReadClosers have identical error-matching behavior. -func drainBody(b io.ReadCloser) (r1, r2 io.ReadCloser, err error) { - if b == nil || b == http.NoBody { - // No copying needed. Preserve the magic sentinel meaning of NoBody. - return http.NoBody, http.NoBody, nil - } - var buf bytes.Buffer - if _, err = buf.ReadFrom(b); err != nil { - return nil, b, err - } - if err = b.Close(); err != nil { - return nil, b, err - } - return io.NopCloser(&buf), io.NopCloser(bytes.NewReader(buf.Bytes())), nil -} diff --git a/pkg/protocols/http/httputils/normalization.go b/pkg/protocols/http/httputils/normalization.go deleted file mode 100644 index 86af712404..0000000000 --- a/pkg/protocols/http/httputils/normalization.go +++ /dev/null @@ -1,88 +0,0 @@ -package httputils - -import ( - "bytes" - "compress/gzip" - "compress/zlib" - "fmt" - "io" - "net/http" - "strings" - - "github.com/pkg/errors" - "golang.org/x/text/encoding/simplifiedchinese" - "golang.org/x/text/transform" - - stringsutil "github.com/projectdiscovery/utils/strings" -) - -// readNNormalizeRespBody performs normalization on the http response object. -// and fills body buffer with actual response body. -func readNNormalizeRespBody(rc *ResponseChain, body *bytes.Buffer) (err error) { - response := rc.resp - if response == nil { - return fmt.Errorf("something went wrong response is nil") - } - // net/http doesn't automatically decompress the response body if an - // encoding has been specified by the user in the request so in case we have to - // manually do it. - - origBody := rc.resp.Body - if origBody == nil { - // skip normalization if body is nil - return nil - } - // wrap with decode if applicable - wrapped, err := wrapDecodeReader(response) - if err != nil { - wrapped = origBody - } - // read response body to buffer - _, err = body.ReadFrom(wrapped) - if err != nil { - if strings.Contains(err.Error(), "gzip: invalid header") { - // its invalid gzip but we will still use it from original body - _, err = body.ReadFrom(origBody) - if err != nil { - return errors.Wrap(err, "could not read response body after gzip error") - } - } - if stringsutil.ContainsAny(err.Error(), "unexpected EOF", "read: connection reset by peer", "user canceled") { - // keep partial body and continue (skip error) (add meta header in response for debugging) - if response.Header == nil { - response.Header = make(http.Header) - } - response.Header.Set("x-nuclei-ignore-error", err.Error()) - return nil - } - return errors.Wrap(err, "could not read response body") - } - return nil -} - -// wrapDecodeReader wraps a decompression reader around the response body if it's compressed -// using gzip or deflate. -func wrapDecodeReader(resp *http.Response) (rc io.ReadCloser, err error) { - switch resp.Header.Get("Content-Encoding") { - case "gzip": - rc, err = gzip.NewReader(resp.Body) - case "deflate": - rc, err = zlib.NewReader(resp.Body) - default: - rc = resp.Body - } - if err != nil { - return nil, err - } - // handle GBK encoding - if isContentTypeGbk(resp.Header.Get("Content-Type")) { - rc = io.NopCloser(transform.NewReader(rc, simplifiedchinese.GBK.NewDecoder())) - } - return rc, nil -} - -// isContentTypeGbk checks if the content-type header is gbk -func isContentTypeGbk(contentType string) bool { - contentType = strings.ToLower(contentType) - return stringsutil.ContainsAny(contentType, "gbk", "gb2312", "gb18030") -} diff --git a/pkg/protocols/http/httputils/response.go b/pkg/protocols/http/httputils/response.go deleted file mode 100644 index 5803e1271d..0000000000 --- a/pkg/protocols/http/httputils/response.go +++ /dev/null @@ -1,52 +0,0 @@ -package httputils - -import ( - "bytes" - "fmt" - "io" - "net/http" - - protocolutil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils" -) - -// DumpResponseIntoBuffer dumps a http response without allocating a new buffer -// for the response body. -func DumpResponseIntoBuffer(resp *http.Response, body bool, buff *bytes.Buffer) (err error) { - if resp == nil { - return fmt.Errorf("response is nil") - } - save := resp.Body - savecl := resp.ContentLength - - if !body { - // For content length of zero. Make sure the body is an empty - // reader, instead of returning error through failureToReadBody{}. - if resp.ContentLength == 0 { - resp.Body = emptyBody - } else { - resp.Body = failureToReadBody{} - } - } else if resp.Body == nil { - resp.Body = emptyBody - } else { - save, resp.Body, err = drainBody(resp.Body) - if err != nil { - return err - } - } - err = resp.Write(buff) - if err == errNoBody { - err = nil - } - resp.Body = save - resp.ContentLength = savecl - return -} - -// DrainResponseBody drains the response body and closes it. -func DrainResponseBody(resp *http.Response) { - defer resp.Body.Close() - // don't reuse connection and just close if body length is more than 2 * MaxBodyRead - // to avoid DOS - _, _ = io.CopyN(io.Discard, resp.Body, 2*protocolutil.MaxBodyRead) -} diff --git a/pkg/protocols/http/request.go b/pkg/protocols/http/request.go index bcbd00670c..9bacd6d92e 100644 --- a/pkg/protocols/http/request.go +++ b/pkg/protocols/http/request.go @@ -34,11 +34,11 @@ import ( "github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/httputils" "github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/signer" "github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/signerpool" - protocolutil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils" templateTypes "github.com/projectdiscovery/nuclei/v3/pkg/templates/types" "github.com/projectdiscovery/nuclei/v3/pkg/types" "github.com/projectdiscovery/rawhttp" convUtil "github.com/projectdiscovery/utils/conversion" + httpUtils "github.com/projectdiscovery/utils/http" "github.com/projectdiscovery/utils/reader" sliceutil "github.com/projectdiscovery/utils/slice" stringsutil "github.com/projectdiscovery/utils/strings" @@ -49,6 +49,10 @@ const ( defaultMaxWorkers = 150 ) +var ( + MaxBodyRead = int64(1 << 22) // 4MB using shift operator +) + // Type returns the type of the protocol request func (request *Request) Type() templateTypes.ProtocolType { return templateTypes.HTTPProtocol @@ -753,18 +757,7 @@ func (request *Request) executeRequest(input *contextargs.Context, generatedRequ } } } - // define max body read limit - maxBodylimit := -1 // stick to default 4MB - if request.MaxSize > 0 { - maxBodylimit = request.MaxSize - } else if request.options.Options.ResponseReadSize != 0 { - maxBodylimit = request.options.Options.ResponseReadSize - } - // global wrap response body reader - if resp != nil && resp.Body != nil { - resp.Body = protocolutil.NewLimitResponseBodyWithSize(resp.Body, int64(maxBodylimit)) - } if err != nil { // rawhttp doesn't support draining response bodies. if resp != nil && resp.Body != nil && generatedRequest.rawRequest == nil && !generatedRequest.original.Pipeline { @@ -811,9 +804,18 @@ func (request *Request) executeRequest(input *contextargs.Context, generatedRequ duration := time.Since(timeStart) + // define max body read limit + maxBodylimit := MaxBodyRead // 10MB + if request.MaxSize > 0 { + maxBodylimit = int64(request.MaxSize) + } + if request.options.Options.ResponseReadSize != 0 { + maxBodylimit = int64(request.options.Options.ResponseReadSize) + } + // respChain is http response chain that reads response body // efficiently by reusing buffers and does all decoding and optimizations - respChain := httputils.NewResponseChain(resp, int64(maxBodylimit)) + respChain := httpUtils.NewResponseChain(resp, maxBodylimit) defer respChain.Close() // reuse buffers // we only intend to log/save the final redirected response diff --git a/pkg/protocols/utils/reader.go b/pkg/protocols/utils/reader.go deleted file mode 100644 index e6ed52530b..0000000000 --- a/pkg/protocols/utils/reader.go +++ /dev/null @@ -1,43 +0,0 @@ -package utils - -import ( - "io" -) - -var ( - MaxBodyRead = int64(1 << 22) // 4MB using shift operator -) - -var _ io.ReadCloser = &LimitResponseBody{} - -type LimitResponseBody struct { - io.Reader - io.Closer -} - -// NewLimitResponseBody wraps response body with a limit reader. -// thus only allowing MaxBodyRead bytes to be read. i.e 4MB -func NewLimitResponseBody(body io.ReadCloser) io.ReadCloser { - return NewLimitResponseBodyWithSize(body, MaxBodyRead) -} - -// NewLimitResponseBody wraps response body with a limit reader. -// thus only allowing MaxBodyRead bytes to be read. i.e 4MB -func NewLimitResponseBodyWithSize(body io.ReadCloser, size int64) io.ReadCloser { - if body == nil { - return nil - } - if size == -1 { - // stick to default 4MB - size = MaxBodyRead - } - return &LimitResponseBody{ - Reader: io.LimitReader(body, size), - Closer: body, - } -} - -// LimitBodyRead limits the body read to MaxBodyRead bytes. -func LimitBodyRead(r io.Reader) ([]byte, error) { - return io.ReadAll(io.LimitReader(r, MaxBodyRead)) -}