Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added enhancements to favicon + made API public #1774

Merged
merged 9 commits into from
Jun 24, 2024
8 changes: 4 additions & 4 deletions common/httpx/cdn.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@ import (
)

// CdnCheck verifies if the given ip is part of Cdn/WAF ranges
func (h *HTTPX) CdnCheck(ip string) (bool, string, error) {
func (h *HTTPX) CdnCheck(ip string) (bool, string, string, error) {
if h.cdn == nil {
return false, "", fmt.Errorf("cdn client not configured")
return false, "", "", fmt.Errorf("cdn client not configured")
}

// the goal is to check if ip is part of cdn/waf to decide if target should be scanned or not
// since 'cloud' itemtype does not fit logic here , we consider target is not part of cdn/waf
matched, value, itemType, err := h.cdn.Check(net.ParseIP((ip)))
if itemType == "cloud" {
return false, "", err
return false, value, itemType, err
}
return matched, value, err
return matched, value, itemType, err
}
9 changes: 7 additions & 2 deletions common/httpx/httpx.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,13 @@ func New(options *Options) (*HTTPX, error) {

httpx.htmlPolicy = bluemonday.NewPolicy()
httpx.CustomHeaders = httpx.Options.CustomHeaders
if options.CdnCheck != "false" || options.ExcludeCdn {
httpx.cdn = cdncheck.New()

if options.CDNCheckClient != nil {
httpx.cdn = options.CDNCheckClient
} else {
if options.CdnCheck != "false" || options.ExcludeCdn {
httpx.cdn = cdncheck.New()
}
}

return httpx, nil
Expand Down
2 changes: 2 additions & 0 deletions common/httpx/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"strings"
"time"

"github.com/projectdiscovery/cdncheck"
"github.com/projectdiscovery/networkpolicy"
)

Expand Down Expand Up @@ -46,6 +47,7 @@ type Options struct {
SniName string
TlsImpersonate bool
NetworkPolicy *networkpolicy.NetworkPolicy
CDNCheckClient *cdncheck.Client
Protocol Proto
}

Expand Down
5 changes: 3 additions & 2 deletions runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,9 @@ type Options struct {
Protocol string

// Optional pre-created objects to reduce allocations
Wappalyzer *wappalyzer.Wappalyze
Networkpolicy *networkpolicy.NetworkPolicy
Wappalyzer *wappalyzer.Wappalyze
Networkpolicy *networkpolicy.NetworkPolicy
CDNCheckClient *cdncheck.Client
}

// ParseOptions parses the command line options for application
Expand Down
92 changes: 63 additions & 29 deletions runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ type Runner struct {
httpApiEndpoint *Server
}

func (r *Runner) HTTPX() *httpx.HTTPX {
return r.hp
}

// picked based on try-fail but it seems to close to one it's used https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html#c1992
var hammingDistanceThreshold int = 22

Expand Down Expand Up @@ -133,6 +137,7 @@ func New(options *Options) (*Runner, error) {
return nil, err
}
httpxOptions.NetworkPolicy = np
httpxOptions.CDNCheckClient = options.CDNCheckClient

// Enables automatically tlsgrab if tlsprobe is requested
httpxOptions.TLSGrab = options.TLSGrab || options.TLSProbe
Expand Down Expand Up @@ -1895,7 +1900,7 @@ retry:
builder.WriteString(fmt.Sprintf(" [%s]", cnames[0]))
}

isCDN, cdnName, err := hp.CdnCheck(ip)
isCDN, cdnName, cdnType, err := hp.CdnCheck(ip)
if scanopts.OutputCDN == "true" && isCDN && err == nil {
builder.WriteString(fmt.Sprintf(" [%s]", cdnName))
}
Expand Down Expand Up @@ -1943,10 +1948,11 @@ retry:
builder.WriteRune(']')
}

var faviconMMH3, faviconPath string
var faviconMMH3, faviconPath, faviconURL string
var faviconData []byte
if scanopts.Favicon {
var err error
faviconMMH3, faviconPath, err = r.handleFaviconHash(hp, req, resp)
faviconMMH3, faviconPath, faviconData, faviconURL, err = r.HandleFaviconHash(hp, req, resp.Data, true)
if err == nil {
builder.WriteString(" [")
if !scanopts.OutputWithNoColor {
Expand Down Expand Up @@ -2188,11 +2194,13 @@ retry:
CNAMEs: cnames,
CDN: isCDN,
CDNName: cdnName,
CDNType: cdnType,
ResponseTime: resp.Duration.String(),
Technologies: technologies,
FinalURL: finalURL,
FavIconMMH3: faviconMMH3,
FaviconPath: faviconPath,
FaviconURL: faviconURL,
Hashes: hashesMap,
Extracts: extractResult,
Jarm: jarmhash,
Expand All @@ -2210,6 +2218,7 @@ retry:
Resolvers: resolvers,
RequestRaw: requestDump,
Response: resp,
FaviconData: faviconData,
}
if resp.BodyDomains != nil {
result.Fqdns = resp.BodyDomains.Fqdns
Expand Down Expand Up @@ -2248,48 +2257,69 @@ func calculatePerceptionHash(screenshotBytes []byte) (uint64, error) {
return pHash.GetHash(), nil
}

func (r *Runner) handleFaviconHash(hp *httpx.HTTPX, req *retryablehttp.Request, currentResp *httpx.Response) (string, string, error) {
func (r *Runner) HandleFaviconHash(hp *httpx.HTTPX, req *retryablehttp.Request, currentResp []byte, defaultProbe bool) (string, string, []byte, string, error) {
// Check if current URI is ending with .ico => use current body without additional requests
if path.Ext(req.URL.Path) == ".ico" {
hash, err := r.calculateFaviconHashWithRaw(currentResp.Data)
return hash, req.URL.Path, err
hash, err := r.calculateFaviconHashWithRaw(currentResp)
return hash, req.URL.Path, currentResp, "", err
}

// search in the response of the requested path for element and rel shortcut/mask/apple-touch icon
// link with .ico extension (which will be prioritized if available)
// if not, any of link from other icons can be requested
potentialURLs, err := extractPotentialFavIconsURLs(currentResp)
if err != nil {
return "", "", err
return "", "", nil, "", err
}

faviconPath := "/favicon.ico"
// pick the first - we want only one request
if len(potentialURLs) > 0 {
URL, err := r.parseURL(potentialURLs[0])
clone := req.Clone(context.Background())

var faviconHash, faviconPath, faviconURL string
var faviconData []byte
errCount := 0
if len(potentialURLs) == 0 && defaultProbe {
potentialURLs = append(potentialURLs, "/favicon.ico")
}
// We only want upto two favicon requests, if the
// first one fails, we will try the second one
for _, potentialURL := range potentialURLs {
if errCount == 2 {
break
}
URL, err := r.parseURL(potentialURL)
if err != nil {
return "", "", err
continue
}
if URL.IsAbs() {
req.SetURL(URL)
req.Host = URL.Host
faviconPath = ""
clone.SetURL(URL)
clone.Host = URL.Host
potentialURL = ""
} else {
faviconPath = URL.String()
potentialURL = URL.String()
}
}
if faviconPath != "" {
err = req.URL.MergePath(faviconPath, false)

if potentialURL != "" {
err = clone.MergePath(potentialURL, false)
if err != nil {
continue
}
}
resp, err := hp.Do(clone, httpx.UnsafeOptions{})
if err != nil {
return "", "", errorutil.NewWithTag("favicon", "failed to add %v to url got %v", faviconPath, err)
errCount++
continue
}
hash, err := r.calculateFaviconHashWithRaw(resp.Data)
if err != nil {
continue
}
faviconURL = clone.URL.String()
faviconPath = potentialURL
faviconHash = hash
faviconData = resp.Data
break
}
resp, err := hp.Do(req, httpx.UnsafeOptions{})
if err != nil {
return "", "", errors.Wrap(err, "could not fetch favicon")
}
hash, err := r.calculateFaviconHashWithRaw(resp.Data)
return hash, req.URL.Path, err
return faviconHash, faviconPath, faviconData, faviconURL, nil
}

func (r *Runner) calculateFaviconHashWithRaw(data []byte) (string, error) {
Expand All @@ -2300,9 +2330,9 @@ func (r *Runner) calculateFaviconHashWithRaw(data []byte) (string, error) {
return fmt.Sprintf("%d", hashNum), nil
}

func extractPotentialFavIconsURLs(resp *httpx.Response) ([]string, error) {
func extractPotentialFavIconsURLs(resp []byte) ([]string, error) {
var potentialURLs []string
document, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Data))
document, err := goquery.NewDocumentFromReader(bytes.NewReader(resp))
if err != nil {
return nil, err
}
Expand All @@ -2314,6 +2344,10 @@ func extractPotentialFavIconsURLs(resp *httpx.Response) ([]string, error) {
potentialURLs = append(potentialURLs, href)
}
})
// Sort and prefer icon with .ico extension
sort.Slice(potentialURLs, func(i, j int) bool {
return !strings.HasSuffix(potentialURLs[i], ".ico")
})
return potentialURLs, nil
}

Expand Down Expand Up @@ -2412,7 +2446,7 @@ func (r *Runner) skipCDNPort(host string, port string) bool {
// pick the first ip as target
hostIP := dnsData.A[0]

isCdnIP, _, err := r.hp.CdnCheck(hostIP)
isCdnIP, _, _, err := r.hp.CdnCheck(hostIP)
if err != nil {
return false
}
Expand Down
3 changes: 3 additions & 0 deletions runner/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type Result struct {
Hashes map[string]interface{} `json:"hash,omitempty" csv:"hash"`
ExtractRegex []string `json:"extract_regex,omitempty" csv:"extract_regex"`
CDNName string `json:"cdn_name,omitempty" csv:"cdn_name"`
CDNType string `json:"cdn_type,omitempty" csv:"cdn_type"`
SNI string `json:"sni,omitempty" csv:"sni"`
Port string `json:"port,omitempty" csv:"port"`
Raw string `json:"-" csv:"-"`
Expand All @@ -59,6 +60,7 @@ type Result struct {
Path string `json:"path,omitempty" csv:"path"`
FavIconMMH3 string `json:"favicon,omitempty" csv:"favicon"`
FaviconPath string `json:"favicon_path,omitempty" csv:"favicon_path"`
FaviconURL string `json:"favicon_url,omitempty" csv:"favicon_url"`
FinalURL string `json:"final_url,omitempty" csv:"final_url"`
ResponseHeaders map[string]interface{} `json:"header,omitempty" csv:"header"`
RawHeaders string `json:"raw_header,omitempty" csv:"raw_header"`
Expand Down Expand Up @@ -96,6 +98,7 @@ type Result struct {
TechnologyDetails map[string]wappalyzer.AppInfo `json:"-" csv:"-"`
RequestRaw []byte `json:"-" csv:"-"`
Response *httpx.Response `json:"-" csv:"-"`
FaviconData []byte `json:"-" csv:"-"`
}

// function to get dsl variables from result struct
Expand Down
Loading