Skip to content

Commit

Permalink
Merge pull request #339 from projectdiscovery/http-resp-chain
Browse files Browse the repository at this point in the history
add ResponseChain & deprecate GetChain
  • Loading branch information
tarunKoyalwar committed Feb 20, 2024
2 parents 05d6a16 + 1bd72bb commit 788663f
Show file tree
Hide file tree
Showing 7 changed files with 358 additions and 10 deletions.
3 changes: 2 additions & 1 deletion http/chain.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
)

// ChainItem request=>response
// Deprecated: use ResponseChain instead which is more efficient and lazy
type ChainItem struct {
Request []byte
Response []byte
Expand All @@ -15,6 +16,7 @@ type ChainItem struct {
}

// GetChain if redirects
// Deprecated: use ResponseChain instead which is more efficient and lazy
func GetChain(r *http.Response) (chain []ChainItem, err error) {
lastresp := r
for lastresp != nil {
Expand All @@ -40,6 +42,5 @@ func GetChain(r *http.Response) (chain []ChainItem, err error) {
for i, j := 0, len(chain)-1; i < j; i, j = i+1, j-1 {
chain[i], chain[j] = chain[j], chain[i]
}

return
}
1 change: 0 additions & 1 deletion http/chain_test.go

This file was deleted.

8 changes: 0 additions & 8 deletions http/httputil.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,3 @@ func DumpResponseHeadersAndRaw(resp *http.Response) (headers, fullresp []byte, e
resp.Body = io.NopCloser(bytes.NewReader(buf1.Bytes()))
return
}

// DrainResponseBody drains and closes the response body
func DrainResponseBody(resp *http.Response) {
if resp.Body != http.NoBody && resp.Body != nil {
_, _ = io.Copy(io.Discard, resp.Body)
resp.Body.Close()
}
}
47 changes: 47 additions & 0 deletions http/internal.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package httputil

import (
"bytes"
"errors"
"io"
"net/http"
"strings"
)

// implementations copied from stdlib

// errNoBody is a sentinel error value used by failureToReadBody so we
// can detect that the lack of body was intentional.
var errNoBody = errors.New("sentinel error value")

// failureToReadBody is an io.ReadCloser that just returns errNoBody on
// Read. It's swapped in when we don't actually want to consume
// the body, but need a non-nil one, and want to distinguish the
// error from reading the dummy body.
type failureToReadBody struct{}

func (failureToReadBody) Read([]byte) (int, error) { return 0, errNoBody }
func (failureToReadBody) Close() error { return nil }

// emptyBody is an instance of empty reader.
var emptyBody = io.NopCloser(strings.NewReader(""))

// drainBody reads all of b to memory and then returns two equivalent
// ReadClosers yielding the same bytes.
//
// It returns an error if the initial slurp of all bytes fails. It does not attempt
// to make the returned ReadClosers have identical error-matching behavior.
func drainBody(b io.ReadCloser) (r1, r2 io.ReadCloser, err error) {
if b == nil || b == http.NoBody {
// No copying needed. Preserve the magic sentinel meaning of NoBody.
return http.NoBody, http.NoBody, nil
}
var buf bytes.Buffer
if _, err = buf.ReadFrom(b); err != nil {
return nil, b, err
}
if err = b.Close(); err != nil {
return nil, b, err
}
return io.NopCloser(&buf), io.NopCloser(bytes.NewReader(buf.Bytes())), nil
}
88 changes: 88 additions & 0 deletions http/normalization.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package httputil

import (
"bytes"
"compress/gzip"
"compress/zlib"
"fmt"
"io"
"net/http"
"strings"

"github.com/pkg/errors"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"

stringsutil "github.com/projectdiscovery/utils/strings"
)

// readNNormalizeRespBody performs normalization on the http response object.
// and fills body buffer with actual response body.
func readNNormalizeRespBody(rc *ResponseChain, body *bytes.Buffer) (err error) {
response := rc.resp
if response == nil {
return fmt.Errorf("something went wrong response is nil")
}
// net/http doesn't automatically decompress the response body if an
// encoding has been specified by the user in the request so in case we have to
// manually do it.

origBody := rc.resp.Body
if origBody == nil {
// skip normalization if body is nil
return nil
}
// wrap with decode if applicable
wrapped, err := wrapDecodeReader(response)
if err != nil {
wrapped = origBody
}
// read response body to buffer
_, err = body.ReadFrom(wrapped)
if err != nil {
if strings.Contains(err.Error(), "gzip: invalid header") {
// its invalid gzip but we will still use it from original body
_, err = body.ReadFrom(origBody)
if err != nil {
return errors.Wrap(err, "could not read response body after gzip error")
}
}
if stringsutil.ContainsAny(err.Error(), "unexpected EOF", "read: connection reset by peer", "user canceled") {
// keep partial body and continue (skip error) (add meta header in response for debugging)
if response.Header == nil {
response.Header = make(http.Header)
}
response.Header.Set("x-nuclei-ignore-error", err.Error())
return nil
}
return errors.Wrap(err, "could not read response body")
}
return nil
}

// wrapDecodeReader wraps a decompression reader around the response body if it's compressed
// using gzip or deflate.
func wrapDecodeReader(resp *http.Response) (rc io.ReadCloser, err error) {
switch resp.Header.Get("Content-Encoding") {
case "gzip":
rc, err = gzip.NewReader(resp.Body)
case "deflate":
rc, err = zlib.NewReader(resp.Body)
default:
rc = resp.Body
}
if err != nil {
return nil, err
}
// handle GBK encoding
if isContentTypeGbk(resp.Header.Get("Content-Type")) {
rc = io.NopCloser(transform.NewReader(rc, simplifiedchinese.GBK.NewDecoder()))
}
return rc, nil
}

// isContentTypeGbk checks if the content-type header is gbk
func isContentTypeGbk(contentType string) bool {
contentType = strings.ToLower(contentType)
return stringsutil.ContainsAny(contentType, "gbk", "gb2312", "gb18030")
}
167 changes: 167 additions & 0 deletions http/respChain.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package httputil

import (
"bytes"
"fmt"
"net/http"
"sync"
)

// use buffer pool for storing response body
// and reuse it for each request
var bufPool = sync.Pool{
New: func() any {
// The Pool's New function should generally only return pointer
// types, since a pointer can be put into the return interface
// value without an allocation:
return new(bytes.Buffer)
},
}

// getBuffer returns a buffer from the pool
func getBuffer() *bytes.Buffer {
return bufPool.Get().(*bytes.Buffer)
}

// putBuffer returns a buffer to the pool
func putBuffer(buf *bytes.Buffer) {
buf.Reset()
bufPool.Put(buf)
}

// Performance Notes:
// do not use http.Response once we create ResponseChain from it
// as this reuses buffers and saves allocations and also drains response
// body automatically.
// In required cases it can be used but should never be used for anything
// related to response body.
// Bytes.Buffer returned by getters should not be used and are only meant for convinience
// purposes like .String() or .Bytes() calls.
// Remember to call Close() on ResponseChain once you are done with it.

// ResponseChain is a response chain for a http request
// on every call to previous it returns the previous response
// if it was redirected.
type ResponseChain struct {
headers *bytes.Buffer
body *bytes.Buffer
fullResponse *bytes.Buffer
resp *http.Response
reloaded bool // if response was reloaded to its previous redirect
}

// NewResponseChain creates a new response chain for a http request
// with a maximum body size. (if -1 stick to default 4MB)
func NewResponseChain(resp *http.Response, maxBody int64) *ResponseChain {
if maxBody > 0 && resp.Body != nil {
resp.Body = http.MaxBytesReader(nil, resp.Body, maxBody)
}
return &ResponseChain{
headers: getBuffer(),
body: getBuffer(),
fullResponse: getBuffer(),
resp: resp,
}
}

// Response returns the current response in the chain
func (r *ResponseChain) Headers() *bytes.Buffer {
return r.headers
}

// Body returns the current response body in the chain
func (r *ResponseChain) Body() *bytes.Buffer {
return r.body
}

// FullResponse returns the current response in the chain
func (r *ResponseChain) FullResponse() *bytes.Buffer {
return r.fullResponse
}

// previous updates response pointer to previous response
// if it was redirected and returns true else false
func (r *ResponseChain) Previous() bool {
if r.resp != nil && r.resp.Request != nil && r.resp.Request.Response != nil {
r.resp = r.resp.Request.Response
r.reloaded = true
return true
}
return false
}

// Fill buffers
func (r *ResponseChain) Fill() error {
r.reset()
if r.resp == nil {
return fmt.Errorf("response is nil")
}

// load headers
err := DumpResponseIntoBuffer(r.resp, false, r.headers)
if err != nil {
return fmt.Errorf("error dumping response headers: %s", err)
}

if r.resp.StatusCode != http.StatusSwitchingProtocols && !r.reloaded {
// Note about reloaded:
// this is a known behaviour existing from earlier version
// when redirect is followed and operators are executed on all redirect chain
// body of those requests is not available since its already been redirected
// This is not a issue since redirect happens with empty body according to RFC
// but this may be required sometimes
// Solution: Manual redirect using dynamic matchers or hijack redirected responses
// at transport level at replace with bytes buffer and then use it

// load body
err = readNNormalizeRespBody(r, r.body)
if err != nil {
return fmt.Errorf("error reading response body: %s", err)
}

// response body should not be used anymore
// drain and close
DrainResponseBody(r.resp)
}

// join headers and body
r.fullResponse.Write(r.headers.Bytes())
r.fullResponse.Write(r.body.Bytes())
return nil
}

// Close the response chain and releases the buffers.
func (r *ResponseChain) Close() {
putBuffer(r.headers)
putBuffer(r.body)
putBuffer(r.fullResponse)
r.headers = nil
r.body = nil
r.fullResponse = nil
}

// Has returns true if the response chain has a response
func (r *ResponseChain) Has() bool {
return r.resp != nil
}

// Request is request of current response
func (r *ResponseChain) Request() *http.Request {
if r.resp == nil {
return nil
}
return r.resp.Request
}

// Response is response of current response
func (r *ResponseChain) Response() *http.Response {
return r.resp
}

// reset without releasing the buffers
// useful for redirect chain
func (r *ResponseChain) reset() {
r.headers.Reset()
r.body.Reset()
r.fullResponse.Reset()
}
54 changes: 54 additions & 0 deletions http/response.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package httputil

import (
"bytes"
"fmt"
"io"
"net/http"
)

var (
MaxBodyRead = int64(4 << 20) // 4MB
)

// DumpResponseIntoBuffer dumps a http response without allocating a new buffer
// for the response body.
func DumpResponseIntoBuffer(resp *http.Response, body bool, buff *bytes.Buffer) (err error) {
if resp == nil {
return fmt.Errorf("response is nil")
}
save := resp.Body
savecl := resp.ContentLength

if !body {
// For content length of zero. Make sure the body is an empty
// reader, instead of returning error through failureToReadBody{}.
if resp.ContentLength == 0 {
resp.Body = emptyBody
} else {
resp.Body = failureToReadBody{}
}
} else if resp.Body == nil {
resp.Body = emptyBody
} else {
save, resp.Body, err = drainBody(resp.Body)
if err != nil {
return err
}
}
err = resp.Write(buff)
if err == errNoBody {
err = nil
}
resp.Body = save
resp.ContentLength = savecl
return
}

// DrainResponseBody drains the response body and closes it.
func DrainResponseBody(resp *http.Response) {
defer resp.Body.Close()
// don't reuse connection and just close if body length is more than 2 * MaxBodyRead
// to avoid DOS
_, _ = io.CopyN(io.Discard, resp.Body, 2*MaxBodyRead)
}

0 comments on commit 788663f

Please sign in to comment.