Skip to content

Commit

Permalink
added stats
Browse files Browse the repository at this point in the history
  • Loading branch information
marcobeierer committed Jul 2, 2019
1 parent 17d8ef1 commit 10e12de
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 12 deletions.
4 changes: 4 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# TODO

- add verbose flag to optionally print progress during creation
- body of each response is json with info about number of checked pages
28 changes: 16 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@ func main() {
}

for {
if body, contentType, limitReached, ok := doRequest(url, token); ok {
if limitReached {
log.Println("the URL limit was reached and the sitemap is probably not complete")
}
if body, contentType, stats, limitReached, ok := doRequest(url, token); ok {
if contentType == "application/xml" {
if stats != "" {
log.Println(stats)
}
if limitReached {
log.Println("WARNING: the URL limit was reached and the sitemap probably is not complete")
}
fmt.Println(body)
return
}
Expand All @@ -63,15 +66,15 @@ func readToken(tokenPath string) (string, bool) {
return fmt.Sprintf("%s", bytes), true
}

// return body, contentType, limitReached, bool if successful
func doRequest(url, token string) (string, string, bool, bool) {
// returns body, contentType, stats (as unparsed json) limitReached, and bool if successful
func doRequest(url, token string) (string, string, string, bool, bool) {
urlBase64 := base64.URLEncoding.EncodeToString([]byte(url))

// TODO make max_etchers and reference count as param
req, err := http.NewRequest("GET", "https://api.marcobeierer.com/sitemap/v2/"+urlBase64+"?pdfs=1&origin_system=cli&max_fetchers=3&reference_count_threshold=5", nil)
if err != nil {
log.Println(err)
return "", "", false, false
return "", "", "", false, false
}

if token != "" {
Expand All @@ -82,23 +85,24 @@ func doRequest(url, token string) (string, string, bool, bool) {
resp, err := http.DefaultClient.Do(req)
if err != nil {
log.Println(err)
return "", "", false, false
return "", "", "", false, false
}
defer resp.Body.Close()

limitReached := resp.Header.Get("X-Limit-Reached") == "1"
contentType := resp.Header.Get("content-type")
stats := resp.Header.Get("X-Stats")
limitReached := resp.Header.Get("X-Limit-Reached") == "1"

if resp.StatusCode != http.StatusOK {
log.Printf("got status code %d, expected 200\n", resp.StatusCode)
return "", contentType, limitReached, false
return "", contentType, stats, limitReached, false
}

bytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println(err)
return "", contentType, limitReached, false
return "", contentType, stats, limitReached, false
}

return string(bytes), contentType, limitReached, true
return string(bytes), contentType, stats, limitReached, true
}

0 comments on commit 10e12de

Please sign in to comment.