Skip to content

Commit

Permalink
Merge pull request #100 from lavafroth/refactor
Browse files Browse the repository at this point in the history
Refactor to dry up code
  • Loading branch information
lc committed Jul 17, 2023
2 parents e75ad3d + 6ad0592 commit 6ce890f
Show file tree
Hide file tree
Showing 12 changed files with 77 additions and 122 deletions.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
module github.com/lc/gau/v2

go 1.17
go 1.20

require (
github.com/bobesa/go-domain-util v0.0.0-20190911083921-4033b5f7dd89
github.com/deckarep/golang-set/v2 v2.3.0
github.com/json-iterator/go v1.1.12
github.com/lynxsecurity/pflag v1.1.3
github.com/lynxsecurity/viper v1.10.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ github.com/bobesa/go-domain-util v0.0.0-20190911083921-4033b5f7dd89/go.mod h1:/0
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deckarep/golang-set/v2 v2.3.0 h1:qs18EKUfHm2X9fA50Mr/M5hccg2tNnVqsiBImnyDs0g=
github.com/deckarep/golang-set/v2 v2.3.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
github.com/fsnotify/fsnotify v1.5.1 h1:mZcQUHVQUQWoPXXtuf9yuEXKudkV2sx1E06UadKWpgI=
github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
Expand Down
3 changes: 1 addition & 2 deletions pkg/httpclient/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,9 @@ func MakeRequest(c *fasthttp.Client, url string, maxRetries uint, timeout uint,
req.SetRequestURI(url)
respBody, err = doReq(c, req, timeout)
if err == nil {
goto done
break
}
}
done:
if err != nil {
return nil, err
}
Expand Down
60 changes: 19 additions & 41 deletions pkg/output/output.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package output

import (
mapset "github.com/deckarep/golang-set/v2"
jsoniter "github.com/json-iterator/go"
"github.com/valyala/bytebufferpool"
"io"
Expand All @@ -13,40 +14,24 @@ type JSONResult struct {
Url string `json:"url"`
}

func WriteURLs(writer io.Writer, results <-chan string, blacklistMap map[string]struct{}, RemoveParameters bool) error {
lastURL := make(map[string]struct{})
func WriteURLs(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool) error {
lastURL := mapset.NewThreadUnsafeSet[string]()
for result := range results {
buf := bytebufferpool.Get()
if len(blacklistMap) != 0 {
u, err := url.Parse(result)
if err != nil {
continue
}
base := strings.Split(path.Base(u.Path), ".")
ext := base[len(base)-1]
if ext != "" {
_, ok := blacklistMap[strings.ToLower(ext)]
if ok {
continue
}
}
u, err := url.Parse(result)
if err != nil {
continue
}
if RemoveParameters {
u, err := url.Parse(result)
if err != nil {
continue
}
if _, ok := lastURL[u.Host+u.Path]; ok {
continue
} else {
lastURL[u.Host+u.Path] = struct{}{} ;
}

if blacklistMap.Contains(strings.ToLower(path.Ext(u.Path))) {
continue
}
if RemoveParameters && !lastURL.Add(u.Host+u.Path) {
continue
}

buf.B = append(buf.B, []byte(result)...)
buf.B = append(buf.B, "\n"...)
_, err := writer.Write(buf.B)
_, err = writer.Write(buf.B)
if err != nil {
return err
}
Expand All @@ -55,23 +40,16 @@ func WriteURLs(writer io.Writer, results <-chan string, blacklistMap map[string]
return nil
}

func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap map[string]struct{}, RemoveParameters bool) {
func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool) {
var jr JSONResult
enc := jsoniter.NewEncoder(writer)
for result := range results {
if len(blacklistMap) != 0 {
u, err := url.Parse(result)
if err != nil {
continue
}
base := strings.Split(path.Base(u.Path), ".")
ext := base[len(base)-1]
if ext != "" {
_, ok := blacklistMap[strings.ToLower(ext)]
if ok {
continue
}
}
u, err := url.Parse(result)
if err != nil {
continue
}
if blacklistMap.Contains(strings.ToLower(path.Ext(u.Path))) {
continue
}
jr.Url = result
if err := enc.Encode(jr); err != nil {
Expand Down
27 changes: 9 additions & 18 deletions pkg/providers/commoncrawl/commoncrawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ type Client struct {
}

func New(c *providers.Config, filters providers.Filters) (*Client, error) {
client := &Client{config: c, filters: filters}
// Fetch the list of available CommonCrawl Api URLs.
resp, err := httpclient.MakeRequest(c.Client, "http://index.commoncrawl.org/collinfo.json", c.MaxRetries, c.Timeout)
if err != nil {
Expand All @@ -45,8 +44,7 @@ func New(c *providers.Config, filters providers.Filters) (*Client, error) {
return nil, errors.New("failed to grab latest commoncrawl index")
}

client.apiURL = r[0].API
return client, nil
return &Client{config: c, filters: filters, apiURL: r[0].API}, nil
}

func (c *Client) Name() string {
Expand All @@ -62,9 +60,7 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
}
// 0 pages means no results
if p.Pages == 0 {
if c.config.Verbose {
logrus.WithFields(logrus.Fields{"provider": Name}).Infof("no results for %s", domain)
}
logrus.WithFields(logrus.Fields{"provider": Name}).Infof("no results for %s", domain)
return nil
}

Expand All @@ -74,9 +70,7 @@ paginate:
case <-ctx.Done():
break paginate
default:
if c.config.Verbose {
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
}
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
apiURL := c.formatURL(domain, page)
resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
if err != nil {
Expand Down Expand Up @@ -111,18 +105,15 @@ func (c *Client) formatURL(domain string, page uint) string {
}

// Fetch the number of pages.
func (c *Client) getPagination(domain string) (paginationResult, error) {
func (c *Client) getPagination(domain string) (r paginationResult, err error) {
url := fmt.Sprintf("%s&showNumPages=true", c.formatURL(domain, 0))
var resp []byte

resp, err := httpclient.MakeRequest(c.config.Client, url, c.config.MaxRetries, c.config.Timeout)
resp, err = httpclient.MakeRequest(c.config.Client, url, c.config.MaxRetries, c.config.Timeout)
if err != nil {
return paginationResult{}, err
}

var r paginationResult
if err = jsoniter.Unmarshal(resp, &r); err != nil {
return r, err
return
}

return r, nil
err = jsoniter.Unmarshal(resp, &r)
return
}
28 changes: 12 additions & 16 deletions pkg/providers/otx/otx.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,12 @@ func (c *Client) Name() string {

func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
paginate:
for page := 1; ; page++ {
for page := uint(1); ; page++ {
select {
case <-ctx.Done():
break paginate
default:
if c.config.Verbose {
logrus.WithFields(logrus.Fields{"provider": Name, "page": page - 1}).Infof("fetching %s", domain)
}
logrus.WithFields(logrus.Fields{"provider": Name, "page": page - 1}).Infof("fetching %s", domain)
apiURL := c.formatURL(domain, page)
resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
if err != nil {
Expand All @@ -77,20 +75,18 @@ paginate:
return nil
}

func (c *Client) formatURL(domain string, page int) string {
func (c *Client) formatURL(domain string, page uint) string {
category := "hostname"
if !domainutil.HasSubdomain(domain) {
return fmt.Sprintf(_BaseURL+"api/v1/indicators/domain/%s/url_list?limit=100&page=%d",
domain, page,
)
} else if domainutil.HasSubdomain(domain) && c.config.IncludeSubdomains {
return fmt.Sprintf(_BaseURL+"api/v1/indicators/domain/%s/url_list?limit=100&page=%d",
domainutil.Domain(domain), page,
)
} else {
return fmt.Sprintf(_BaseURL+"api/v1/indicators/hostname/%s/url_list?limit=100&page=%d",
domain, page,
)
category = "domain"
}
if domainutil.HasSubdomain(domain) && c.config.IncludeSubdomains {
domain = domainutil.Domain(domain)
category = "domain"
}

return fmt.Sprintf("%sapi/v1/indicators/%s/%s/url_list?limit=100&page=%d", _BaseURL, category, domain, page)

}

var _BaseURL = "https://otx.alienvault.com/"
Expand Down
4 changes: 2 additions & 2 deletions pkg/providers/providers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package providers

import (
"context"
mapset "github.com/deckarep/golang-set/v2"
"github.com/valyala/fasthttp"
)

Expand All @@ -21,13 +22,12 @@ type URLScan struct {
type Config struct {
Threads uint
Timeout uint
Verbose bool
MaxRetries uint
IncludeSubdomains bool
RemoveParameters bool
Client *fasthttp.Client
Providers []string
Blacklist map[string]struct{}
Blacklist mapset.Set[string]
Output string
JSON bool
URLScan URLScan
Expand Down
11 changes: 4 additions & 7 deletions pkg/providers/urlscan/types.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package urlscan

import (
"reflect"
"strings"
)

Expand Down Expand Up @@ -29,12 +28,10 @@ type archivedPage struct {

func parseSort(sort []interface{}) string {
var sortParam []string
for i := 0; i < len(sort); i++ {
t := reflect.TypeOf(sort[i])
v := reflect.ValueOf(sort[i])
switch t.Kind() {
case reflect.String:
sortParam = append(sortParam, v.String())
for _, t := range sort {
switch t.(type) {
case string:
sortParam = append(sortParam, t.(string))
}
}
return strings.Join(sortParam, ",")
Expand Down
16 changes: 5 additions & 11 deletions pkg/providers/urlscan/urlscan.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,13 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
header.Value = c.config.URLScan.APIKey
}

page := 0
paginate:
for {
for page := uint(0); ; page++ {
select {
case <-ctx.Done():
break paginate
default:
if c.config.Verbose {
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
}
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
apiURL := c.formatURL(domain, searchAfter)
resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout, header)
if err != nil {
Expand All @@ -64,9 +61,7 @@ paginate:
}
// rate limited
if result.Status == 429 {
if c.config.Verbose {
logrus.WithField("provider", "urlscan").Warnf("urlscan responded with 429")
}
logrus.WithField("provider", "urlscan").Warnf("urlscan responded with 429, probably being rate limited")
break paginate
}

Expand All @@ -89,18 +84,17 @@ paginate:
if !result.HasMore {
break paginate
}
page++
}
}
return nil
}

func (c *Client) formatURL(domain string, after string) string {
if after != "" {
return fmt.Sprintf(_BaseURL+"api/v1/search/?q=domain:%s&size=100", domain) + "&search_after=" + after
after = "&search_after=" + after
}

return fmt.Sprintf(_BaseURL+"api/v1/search/?q=domain:%s&size=100", domain)
return fmt.Sprintf(_BaseURL+"api/v1/search/?q=domain:%s&size=100", domain) + after
}

func setBaseURL(baseURL string) {
Expand Down
20 changes: 7 additions & 13 deletions pkg/providers/wayback/wayback.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,8 @@ type Client struct {
config *providers.Config
}

func New(c *providers.Config, filters providers.Filters) *Client {
return &Client{
filters: filters,
config: c,
}
func New(config *providers.Config, filters providers.Filters) *Client {
return &Client{filters, config}
}

func (c *Client) Name() string {
Expand All @@ -43,14 +40,13 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
if err != nil {
return fmt.Errorf("failed to fetch wayback pagination: %s", err)
}

for page := uint(0); page < pages; page++ {
select {
case <-ctx.Done():
return nil
default:
if c.config.Verbose {
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
}
logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
apiURL := c.formatURL(domain, page)
// make HTTP request
resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
Expand All @@ -70,11 +66,9 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
}

// output results
for i, entry := range result {
// Skip first result by default
if i != 0 {
results <- entry[0]
}
// Slicing as [1:] to skip first result by default
for _, entry := range result[1:] {
results <- entry[0]
}
}
}
Expand Down

0 comments on commit 6ce890f

Please sign in to comment.