Skip to content

Commit

Permalink
fixed details paginator
Browse files Browse the repository at this point in the history
  • Loading branch information
sleeperok committed Mar 23, 2019
1 parent 9969a54 commit c720c93
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 5 deletions.
12 changes: 10 additions & 2 deletions scrape/scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (

func (f *Field) extract(content *goquery.Selection, results *map[string]interface{}, baseURL string) error {
for _, attr := range f.Attrs {
values := []interface{}{}
values := []string{}
var err error
content.Find(f.CSSSelector).Each(func(index int, s *goquery.Selection) {
switch strings.ToLower(attr) {
Expand Down Expand Up @@ -424,13 +424,21 @@ func (task *Task) paginate(ctx context.Context, in <-chan flow, nextPageSelector
viper.GetInt("MAX_PAGES") > 0 && currentPageNum < viper.GetInt("MAX_PAGES")-1 {
// TODO clone request to use same settings
currentPageNum++
paginatorLink := ""
switch paginator["paginator_href"].(type) {
case string:
paginatorLink = paginator["paginator_href"].(string)
case []string:
paginatorLink = paginator["paginator_href"].([]string)[0]
}
logger.Info(paginatorLink)
fetcherChannel <- flow{fmt.Sprintf("%s-%d", data.key, currentPageNum), data.url,
fetch.Request{
Actions: "",
FormData: "",
Method: "",
Type: "",
URL: paginator["paginator_href"].(string),
URL: paginatorLink,
UserToken: "",
},
}
Expand Down
90 changes: 88 additions & 2 deletions scrape/scrape_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ var (
// randFalse = false
delayFetch time.Duration
//paginateResults bool
personsPayload, detailsPayload, JSONPayload, CSVPayload, XMLPayload, deepExtractPayload Payload
update = flag.Bool("update", false, "update result files")
personsPayload, detailsPayload, JSONPayload, CSVPayload, XMLPayload, deepExtractPayload, pathDetailsPaginator Payload
update = flag.Bool("update", false, "update result files")
)

func init() {
Expand Down Expand Up @@ -266,6 +266,58 @@ func init() {
Format: "json",
IsPath: true,
}
pathDetailsPaginator = Payload{
Name: "details paginator",
Request: fetch.Request{
URL: "http://testserver:12345/country/United%20States",
UserToken: "",
Type: "",
Actions: "",
},
Fields: []Field{
Field{
Name: "selector1",
CSSSelector: ".list-group-item a",
Details: Payload{
Name: "selector1details",
Request: fetch.Request{
URL: "http://testserver:12345/country/United%20States/city/San%20Jose",
UserToken: "",
Type: "",
Actions: "",
},
Fields: []Field{
Field{
Name: "selector1",
CSSSelector: ".badge-primary",
Attrs: []string{"text"},
Filters: []Filter{
Filter{Name: "trim"},
},
},
Field{
Name: "selector2",
CSSSelector: "#cards a",
Attrs: []string{"href", "text"},
Filters: []Filter{
Filter{Name: "trim"},
},
},
},
Paginator: ".active~ .page-item+ .page-item .page-link",
Format: "",
IsPath: false,
},
Attrs: []string{"path"},
Filters: []Filter{
Filter{Name: "trim"},
},
},
},
Paginator: "",
Format: "json",
IsPath: true,
}
}

func TestNewTask(t *testing.T) {
Expand Down Expand Up @@ -730,6 +782,40 @@ func TestFilters(t *testing.T) {
assert.Equal(t, "1;2;3;4;", res)
}

func TestPathPaginator(t *testing.T) {
os.RemoveAll("./results")
fetchServerAddr := viper.GetString("DFK_FETCH")
fetchServerCfg := fetch.Config{
Host: fetchServerAddr,
}
fetchServer := fetch.Start(fetchServerCfg)
defer fetchServer.Stop()

task := NewTask()
task.storage.DeleteAll()
r, err := task.Parse(pathDetailsPaginator)
assert.NoError(t, err)

buf := new(bytes.Buffer)
buf.ReadFrom(r)
str := make(map[string]interface{})
err = json.Unmarshal(buf.Bytes(), &str)
assert.NoError(t, err)
resultFile := str["Output file"].(string)

actualText, err := ioutil.ReadFile(filepath.Join("./", resultFile))
assert.NoError(t, err)

var actualJSON []map[string]string
err = json.Unmarshal([]byte(actualText), &actualJSON)
assert.NoError(t, err)

assert.Equal(t, 25, len(actualJSON))

task.storage.DeleteAll()
os.RemoveAll("./results")
}

func TestPathParse(t *testing.T) {
viper.Set("MAX_PAGES", 10)
os.RemoveAll("./results")
Expand Down
1 change: 0 additions & 1 deletion test-docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,4 @@ services:
- "12345:12345"
depends_on:
- chrome
restart: always

0 comments on commit c720c93

Please sign in to comment.