Skip to content
This repository has been archived by the owner on Apr 9, 2022. It is now read-only.

Commit

Permalink
add better support for content length data, by actually reading and d…
Browse files Browse the repository at this point in the history
…iscarding asset body bytes
  • Loading branch information
lrstanley committed Jan 23, 2017
1 parent 2601b53 commit 4da76ba
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions scraper/scraper.go
Expand Up @@ -7,6 +7,7 @@ package scraper
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
Expand Down Expand Up @@ -115,6 +116,12 @@ func (c *Crawler) fetchResource(rsrc *Resource) {
}

if resp.Body != nil {
// we don't care about the body, but we want to know how large it is.
// count the bytes but discard them.
if resp.ContentLength < 1 {
resp.ContentLength, _ = io.Copy(ioutil.Discard, resp.Body)
}

resp.Body.Close() // ensure the body stream is closed
}

Expand Down Expand Up @@ -192,6 +199,10 @@ func (c *Crawler) Fetch(res *FetchResult) {
res.Response.Body = string(bbytes[:])
}

if res.Response.ContentLength < 1 {
res.Response.ContentLength = int64(len(buf))
}

c.Log.Printf("fetched %s in %dms with status %d", res.Response.URL.String(), res.Time.Milli, res.Response.Code)

resourceTime := utils.NewTimer()
Expand Down

0 comments on commit 4da76ba

Please sign in to comment.