Skip to content

Commit

Permalink
Merge pull request #67 from laanwj/2016_10_images_prevent_unicode_con…
Browse files Browse the repository at this point in the history
…version

Fix EXIF parsing: avoid converting images to unicode strings
  • Loading branch information
s-rah committed Oct 2, 2016
2 parents a25b786 + 67108f5 commit c735333
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 3 deletions.
3 changes: 2 additions & 1 deletion deanonymization/check_exif.go
@@ -1,6 +1,7 @@
package deanonymization

import (
"bytes"
"github.com/s-rah/onionscan/config"
"github.com/s-rah/onionscan/report"
"github.com/xiam/exif"
Expand All @@ -16,7 +17,7 @@ func CheckExif(osreport *report.OnionScanReport, anonreport *report.AnonymityRep

if crawlRecord.Page.Status == 200 && strings.Contains(crawlRecord.Page.Headers.Get("Content-Type"), "image/jpeg") {
reader := exif.New()
_, err := io.Copy(reader, strings.NewReader(string(crawlRecord.Page.Snapshot)))
_, err := io.Copy(reader, bytes.NewReader(crawlRecord.Page.Raw))

// exif.FoundExifInData is a signal that the EXIF parser has all it needs,
// it doesn't need to be given the whole image.
Expand Down
1 change: 1 addition & 0 deletions model/page.go
Expand Up @@ -14,6 +14,7 @@ type Page struct {
Links []Element
Scripts []Element
Snapshot string
Raw []byte
Hash string
}

Expand Down
4 changes: 2 additions & 2 deletions spider/onionspider.go
Expand Up @@ -169,8 +169,8 @@ func (os *OnionSpider) GetPage(uri string, base *url.URL, osc *config.OnionScanC
if strings.Contains(response.Header.Get("Content-Type"), "text/html") {
page = ParsePage(response.Body, base, snapshot)
} else if strings.Contains(response.Header.Get("Content-Type"), "image/jpeg") {
page = SnapshotResource(response.Body)
osc.LogInfo(fmt.Sprintf("Fetched %d byte image", len(page.Snapshot)))
page = SnapshotBinaryResource(response.Body)
osc.LogInfo(fmt.Sprintf("Fetched %d byte image", len(page.Raw)))
} else if snapshot {
page = SnapshotResource(response.Body)
osc.LogInfo(fmt.Sprintf("Grabbed %d byte document", len(page.Snapshot)))
Expand Down
8 changes: 8 additions & 0 deletions spider/pageparser.go
Expand Up @@ -26,6 +26,14 @@ func SnapshotResource(response io.Reader) model.Page {
return page
}

func SnapshotBinaryResource(response io.Reader) model.Page {
page := model.Page{}
buf := make([]byte, 1024*512) // Read Max 0.5 MB
n, _ := io.ReadFull(response, buf)
page.Raw = buf[0:n]
return page
}

func ParsePage(response io.Reader, base *url.URL, snapshot bool) model.Page {

page := model.Page{}
Expand Down

0 comments on commit c735333

Please sign in to comment.