Skip to content

Commit

Permalink
Merge branch 'Stage2Sec-master'
Browse files Browse the repository at this point in the history
  • Loading branch information
rverton committed Nov 29, 2020
2 parents 19890c3 + 5e3c0f1 commit 3d7e35e
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 33 deletions.
9 changes: 6 additions & 3 deletions cmd/webanalyze/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ var (
hosts string
crawlCount int
searchSubdomain bool
silent bool
silent bool
redirect bool
)

func init() {
Expand All @@ -38,6 +39,7 @@ func init() {
flag.IntVar(&crawlCount, "crawl", 0, "links to follow from the root page (default 0)")
flag.BoolVar(&searchSubdomain, "search", true, "searches all urls with same base domain (i.e. example.com and sub.example.com)")
flag.BoolVar(&silent, "silent", false, "avoid printing header (default false)")
flag.BoolVar(&redirect, "redirect", false, "follow http redirects (default false)")
}

func main() {
Expand Down Expand Up @@ -111,12 +113,12 @@ func main() {
go func() {

for host := range hosts {
job := webanalyze.NewOnlineJob(host, "", nil, crawlCount, searchSubdomain)
job := webanalyze.NewOnlineJob(host, "", nil, crawlCount, searchSubdomain, redirect)
result, links := wa.Process(job)

if searchSubdomain {
for _, v := range links {
crawlJob := webanalyze.NewOnlineJob(v, "", nil, 0, false)
crawlJob := webanalyze.NewOnlineJob(v, "", nil, 0, false, redirect)
result, _ := wa.Process(crawlJob)
output(result, wa, outWriter)
}
Expand Down Expand Up @@ -200,6 +202,7 @@ func printHeader() {
printOption("apps", apps)
printOption("crawl count", crawlCount)
printOption("search subdomains", searchSubdomain)
printOption("follow redirects", redirect)
fmt.Printf("\n")
}

Expand Down
5 changes: 4 additions & 1 deletion jobdesc.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type Job struct {
Crawl int
SearchSubdomain bool
forceNotDownload bool
followRedirect bool
}

// NewOfflineJob constructs a job out of the constituents of a
Expand All @@ -35,20 +36,22 @@ func NewOfflineJob(url, body string, headers map[string][]string) *Job {
Crawl: 0,
SearchSubdomain: false,
forceNotDownload: true,
followRedirect: false,
}
}

// NewOnlineJob constructs a job that may either have a URL only,
// or a URL, Body and Headers. If it contains at least a URL and Body,
// then webanalyzer will not re-download the data, but if a Body is
// absent then downloading will be attempted.
func NewOnlineJob(url, body string, headers map[string][]string, crawlCount int, searchSubdomain bool) *Job {
func NewOnlineJob(url, body string, headers map[string][]string, crawlCount int, searchSubdomain bool, redirect bool) *Job {
return &Job{
URL: url,
Body: []byte(body),
Headers: headers,
Crawl: crawlCount,
SearchSubdomain: searchSubdomain,
forceNotDownload: false,
followRedirect: redirect,
}
}
75 changes: 46 additions & 29 deletions webanalyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,45 +152,51 @@ func sameUrl(u1, u2 *url.URL) bool {
u1.RequestURI() == u2.RequestURI()
}

func parseLinks(doc *goquery.Document, base *url.URL, searchSubdomain bool) []string {
var links []string
func resolveLink( base *url.URL, val string, searchSubdomain bool ) string {
u, err := url.Parse(val)
if err != nil {
return ""
}

doc.Find("a").Each(func(i int, s *goquery.Selection) {
val, ok := s.Attr("href")
if !ok {
return
}
urlResolved := base.ResolveReference(u)

u, err := url.Parse(val)
if err != nil {
return
}
if !searchSubdomain && urlResolved.Hostname() != base.Hostname() {
return ""
}

urlResolved := base.ResolveReference(u)
if searchSubdomain && !isSubdomain(base, u) {
return ""
}

if !searchSubdomain && urlResolved.Hostname() != base.Hostname() {
return
}
if urlResolved.RequestURI() == "" {
urlResolved.Path = "/"
}

if searchSubdomain && !isSubdomain(base, u) {
return
}
if sameUrl(base, urlResolved) {
return ""
}

if urlResolved.RequestURI() == "" {
urlResolved.Path = "/"
}
// only allow http/https
if urlResolved.Scheme != "http" && urlResolved.Scheme != "https" {
return ""
}

if sameUrl(base, urlResolved) {
return
}
return urlResolved.String()
}

func parseLinks(doc *goquery.Document, base *url.URL, searchSubdomain bool) []string {
var links []string

// only allow http/https
if urlResolved.Scheme != "http" && urlResolved.Scheme != "https" {
doc.Find("a").Each(func(i int, s *goquery.Selection) {
val, ok := s.Attr("href")
if !ok {
return
}

links = append(links, urlResolved.String())

u := resolveLink(base, val, searchSubdomain)
if u != "" {
links = append(links, u)
}
})

return unique(links)
Expand Down Expand Up @@ -219,14 +225,25 @@ func (wa *WebAnalyzer) process(job *Job, appDefs *AppsDefinition) ([]Match, []st
} else {
resp, err := fetchHost(job.URL, wa.client)
if err != nil {
return nil, links, fmt.Errorf("Failed to retrieve: %v", err)
return nil, links, fmt.Errorf("Failed to retrieve: %w", err)
}

defer resp.Body.Close()

body, err = ioutil.ReadAll(resp.Body)
if err == nil {
headers = resp.Header
if job.followRedirect {
for k, v := range resp.Header {
if k == "Location" {
base, _ := url.Parse(job.URL)
u := resolveLink(base, v[0], job.SearchSubdomain)
if u != "" {
links = append(links, v[0])
}
}
}
}
cookies = resp.Cookies()
}
}
Expand Down

0 comments on commit 3d7e35e

Please sign in to comment.