diff --git a/cmd/goscholar/main.go b/cmd/goscholar/main.go index c8a66db..ac83eac 100644 --- a/cmd/goscholar/main.go +++ b/cmd/goscholar/main.go @@ -16,9 +16,11 @@ func main() { Usage: goscholar search [--keywords=] [--author=] [--title=] - [--after=<year>] [--before=<year>] [--num=<num>] [--start=<start>] - goscholar find <cluster-id> + [--after=<year>] [--before=<year>] [--num=<num>] [--start=<start>] + [--user-agent=<user-agent>] + goscholar find <cluster-id> [--user-agent=<user-agent>] goscholar cite <cluster-id> [--after=<year>] [--before=<year>] [--num=<num>] [--start=<start>] + [--user-agent=<user-agent>] goscholar -h | --help goscholar --version @@ -109,6 +111,9 @@ func parseArgs(args map[string]interface{}) (q *goscholar.Query) { if args["--start"] != nil { start = args["--start"].(string) } + if args["--user-agent"] != nil { + goscholar.USER_AGENT = args["--user-agent"].(string) + } if num == "" { num = "10" // as default diff --git a/fetch.go b/fetch.go index ecfd170..4f7a981 100644 --- a/fetch.go +++ b/fetch.go @@ -4,6 +4,7 @@ import ( "errors" "github.com/PuerkitoBio/goquery" log "github.com/Sirupsen/logrus" + "net/http" "strings" ) @@ -11,7 +12,24 @@ import ( func Fetch(url string) (doc *goquery.Document, err error) { log.WithFields(log.Fields{"url": url}).Info("Fetch sends request") - doc, err = goquery.NewDocument(url) + // set request + req, err := http.NewRequest("GET", url, nil) + if err != nil { + log.WithFields(log.Fields{"url": url, "err": err}).Error("Failed to generate new request") + return nil, err + } + req.Header.Set("User-Agent", USER_AGENT) + + // send request and get response + client := http.DefaultClient + res, err := client.Do(req) + if err != nil { + log.WithFields(log.Fields{"url": url, "err": err}).Error("Failed to get response") + return nil, err + } + + // generate new Document + doc, err = goquery.NewDocumentFromResponse(res) log.WithFields(log.Fields{"doc.url": doc.Url}).Info("goquery.Document is generated") if err != nil { log.WithFields(log.Fields{"url": url, "err": err}).Error("Generating goquery.Documentation failed") diff --git a/property.go b/property.go index fa6360c..dc16634 100644 --- a/property.go +++ b/property.go @@ -18,3 +18,7 @@ const ( article_sidebar_selector = ".gs_md_wp > a" sidebar_text_selector = ".gs_ggsS" ) + +var ( + USER_AGENT = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)" +)