Skip to content
This repository has been archived by the owner on May 30, 2021. It is now read-only.

Commit

Permalink
implement fetch and its test #72
Browse files Browse the repository at this point in the history
  • Loading branch information
sotetsuk committed May 7, 2016
1 parent 3376845 commit 7daeeb9
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 3 deletions.
29 changes: 27 additions & 2 deletions fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,33 @@ package goscholar

import (
"github.com/PuerkitoBio/goquery"
log "github.com/Sirupsen/logrus"
"strings"
"errors"
)

func Fetch(url string) (doc *goquery.Document, err error) {
return nil, nil
func Fetech(url string) (doc *goquery.Document, err error) {
log.WithFields(log.Fields{"url": url}).Info("Fetch sends request")

doc, err = goquery.NewDocument(url)
log.WithFields(log.Fields{"doc.url": doc.Url}).Info("goquery.Document is generated")
if err != nil {
log.WithFields(log.Fields{"url": url, "err": err}).Error("Generating goquery.Documentation failed")
return nil, err
}

// 1. check the "Please show you're not a robot" page. See #61
// 2. check the "We're sorry..."
if s := doc.Find("h1").First().Text(); strings.Contains(s, "robot") || strings.Contains(s, "sorry") {
log.WithFields(log.Fields{"h1":s, "doc.Url": doc.Url}).Error("Robot check occurs")
return nil, errors.New("Robot check occurs")
}

// check the "To continue, please type the characters below:". See #55
if strings.Contains(doc.Url.String(), "sorry") {
log.WithFields(log.Fields{"doc.Url": doc.Url}).Error("Request is rejected from Google")
return nil, errors.New("Request is rejected from Google")
}

return doc, nil
}
16 changes: 15 additions & 1 deletion fetch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,18 @@ import (
"testing"
)

func TestFetch(t *testing.T) {}
func TestFetch(t *testing.T) {
url := "https://scholar.google.co.jp/scholar?hl=en&cluster=5362332738201102290&num=1"

doc, err := Fetech(url)
if err != nil {
t.Skip(err)
}

expected := "Deep learning"
actual := doc.Find(WHOLE_ARTICLE_SELECTOR).First().Find(ARTICLE_TITLE_SELECTOR).Text()

if actual != expected {
t.Error(TestErr{expected:expected, actual:actual})
}
}
1 change: 1 addition & 0 deletions property.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const (

// selector
const (
WHOLE_ARTICLE_SELECTOR = ".gs_r"
ARTICLE_TITLE_SELECTOR = "h3.gs_rt > a"
ARTICLE_HEADER_SELECTOR = ".gs_a"
ARTICLE_FOOTER_SELECTOR = ".gs_fl"
Expand Down

0 comments on commit 7daeeb9

Please sign in to comment.