Skip to content

Commit

Permalink
Merge pull request #900 from projectdiscovery/xpath-extractors
Browse files Browse the repository at this point in the history
Added xpath extractors for http requests
  • Loading branch information
ehsandeep committed Aug 9, 2021
2 parents 004472a + 9b7c13f commit af6a720
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 2 deletions.
3 changes: 2 additions & 1 deletion v2/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.15
require (
github.com/Knetic/govaluate v3.0.0+incompatible
github.com/andygrunwald/go-jira v1.13.0
github.com/antchfx/htmlquery v1.2.3
github.com/apex/log v1.9.0
github.com/blang/semver v3.5.1+incompatible
github.com/c4milo/unpackit v0.1.0 // indirect
Expand Down Expand Up @@ -54,7 +55,7 @@ require (
go.uber.org/atomic v1.7.0
go.uber.org/multierr v1.6.0
go.uber.org/ratelimit v0.2.0
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f
golang.org/x/net v0.0.0-20210614182718-04defd469f4e
golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99
golang.org/x/time v0.0.0-20201208040808-7e3f01d25324 // indirect
google.golang.org/appengine v1.6.7 // indirect
Expand Down
9 changes: 8 additions & 1 deletion v2/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9or
github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129/go.mod h1:rFgpPQZYZ8vdbc+48xibu8ALc3yeyd64IhHS+PU6Yyg=
github.com/andygrunwald/go-jira v1.13.0 h1:vvIImGgX32bHfoiyUwkNo+/YrPnRczNarvhLOncP6dE=
github.com/andygrunwald/go-jira v1.13.0/go.mod h1:jYi4kFDbRPZTJdJOVJO4mpMMIwdB+rcZwSO58DzPd2I=
github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M=
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0=
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/apex/log v1.9.0 h1:FHtw/xuaM8AgmvDDTI9fiwoAL25Sq2cxojnZICUU8l0=
github.com/apex/log v1.9.0/go.mod h1:m82fZlWIuiWzWP04XCTXmnX0xRkYYbCdYn8jbJeLBEA=
github.com/apex/logs v1.0.0/go.mod h1:XzxuLZ5myVHDy9SAmYpamKKRNApGj54PfYLcFrXqDwo=
Expand Down Expand Up @@ -106,6 +110,7 @@ github.com/go-rod/rod v0.91.1/go.mod h1:/W4lcZiCALPD603MnJGIvhtywP3R6yRB9EDfFfsH
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
Expand Down Expand Up @@ -433,6 +438,7 @@ golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
Expand All @@ -442,8 +448,9 @@ golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81R
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f h1:Si4U+UcgJzya9kpiEUJKQvjr512OLli+gL4poHrz93U=
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand Down
33 changes: 33 additions & 0 deletions v2/pkg/operators/extractors/extract.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package extractors

import (
"strings"

"encoding/json"

"github.com/antchfx/htmlquery"

"github.com/projectdiscovery/nuclei/v2/pkg/types"
)

Expand Down Expand Up @@ -45,6 +49,35 @@ func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{}
return results
}

// ExtractHTML extracts items from text using XPath selectors
func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {
results := make(map[string]struct{})

doc, err := htmlquery.Parse(strings.NewReader(corpus))
if err != nil {
return results
}
for _, k := range e.XPath {
nodes, err := htmlquery.QueryAll(doc, k)
if err != nil {
continue
}
for _, node := range nodes {
var value string

if e.Attribute != "" {
value = htmlquery.SelectAttr(node, e.Attribute)
} else {
value = htmlquery.InnerText(node)
}
if _, ok := results[value]; !ok {
results[value] = struct{}{}
}
}
}
return results
}

// ExtractJSON extracts text from a corpus using JQ queries and returns it
func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {
results := make(map[string]struct{})
Expand Down
7 changes: 7 additions & 0 deletions v2/pkg/operators/extractors/extractors.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ type Extractor struct {
// KVal are the kval to be present in the response headers/cookies
KVal []string `yaml:"kval,omitempty"`

// XPath are the Xpath selectors for the extractor
XPath []string `yaml:"xpath"`
// Attribute is an optional attribute to extract from response XPath
Attribute string `yaml:"attribute"`
// JSON are the json pattern required to be present in the response
JSON []string `yaml:"json"`
// jsonCompiled is the compiled variant
Expand All @@ -46,6 +50,8 @@ const (
RegexExtractor ExtractorType = iota + 1
// KValExtractor extracts responses with key:value
KValExtractor
// XPathExtractor extracts responses with Xpath selectors
XPathExtractor
// JSONExtractor extracts responses with json
JSONExtractor
)
Expand All @@ -54,6 +60,7 @@ const (
var ExtractorTypes = map[string]ExtractorType{
"regex": RegexExtractor,
"kval": KValExtractor,
"xpath": XPathExtractor,
"json": JSONExtractor,
}

Expand Down
2 changes: 2 additions & 0 deletions v2/pkg/protocols/http/operators.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ func (r *Request) Extract(data map[string]interface{}, extractor *extractors.Ext
return extractor.ExtractRegex(item)
case extractors.KValExtractor:
return extractor.ExtractKval(data)
case extractors.XPathExtractor:
return extractor.ExtractHTML(item)
case extractors.JSONExtractor:
return extractor.ExtractJSON(item)
}
Expand Down

0 comments on commit af6a720

Please sign in to comment.