Skip to content

Commit

Permalink
Optionally follow redirects (default: true)
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulWhitingS2 committed Sep 30, 2020
1 parent 6a5cd81 commit 36acb80
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 3 deletions.
7 changes: 5 additions & 2 deletions cmd/webanalyze/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ var (
crawlCount int
searchSubdomain bool
silent bool
redirect bool
)

func init() {
Expand All @@ -38,6 +39,7 @@ func init() {
flag.IntVar(&crawlCount, "crawl", 0, "links to follow from the root page (default 0)")
flag.BoolVar(&searchSubdomain, "search", true, "searches all urls with same base domain (i.e. example.com and sub.example.com)")
flag.BoolVar(&silent, "silent", false, "avoid printing header (default false)")
flag.BoolVar(&redirect, "redirect", true, "follow http redirects (default true)")
}

func main() {
Expand Down Expand Up @@ -111,12 +113,12 @@ func main() {
go func() {

for host := range hosts {
job := webanalyze.NewOnlineJob(host, "", nil, crawlCount, searchSubdomain)
job := webanalyze.NewOnlineJob(host, "", nil, crawlCount, searchSubdomain, redirect)
result, links := wa.Process(job)

if searchSubdomain {
for _, v := range links {
crawlJob := webanalyze.NewOnlineJob(v, "", nil, 0, false)
crawlJob := webanalyze.NewOnlineJob(v, "", nil, 0, false, redirect)
result, _ := wa.Process(crawlJob)
output(result, wa, outWriter)
}
Expand Down Expand Up @@ -200,6 +202,7 @@ func printHeader() {
printOption("apps", apps)
printOption("crawl count", crawlCount)
printOption("search subdomains", searchSubdomain)
printOption("follow redirects", redirect)
fmt.Printf("\n")
}

Expand Down
5 changes: 4 additions & 1 deletion jobdesc.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type Job struct {
Crawl int
SearchSubdomain bool
forceNotDownload bool
followRedirect bool
}

// NewOfflineJob constructs a job out of the constituents of a
Expand All @@ -35,20 +36,22 @@ func NewOfflineJob(url, body string, headers map[string][]string) *Job {
Crawl: 0,
SearchSubdomain: false,
forceNotDownload: true,
followRedirect: false,
}
}

// NewOnlineJob constructs a job that may either have a URL only,
// or a URL, Body and Headers. If it contains at least a URL and Body,
// then webanalyzer will not re-download the data, but if a Body is
// absent then downloading will be attempted.
func NewOnlineJob(url, body string, headers map[string][]string, crawlCount int, searchSubdomain bool) *Job {
func NewOnlineJob(url, body string, headers map[string][]string, crawlCount int, searchSubdomain bool, redirect bool) *Job {
return &Job{
URL: url,
Body: []byte(body),
Headers: headers,
Crawl: crawlCount,
SearchSubdomain: searchSubdomain,
forceNotDownload: false,
followRedirect: redirect,
}
}
7 changes: 7 additions & 0 deletions webanalyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,13 @@ func (wa *WebAnalyzer) process(job *Job, appDefs *AppsDefinition) ([]Match, []st
body, err = ioutil.ReadAll(resp.Body)
if err == nil {
headers = resp.Header
if job.followRedirect {
for k, v := range resp.Header {
if k == "Location" {
links = append(links, v[0])
}
}
}
cookies = resp.Cookies()
}
}
Expand Down

0 comments on commit 36acb80

Please sign in to comment.