forked from s-rah/onionscan
/
email_scan.go
25 lines (21 loc) · 882 Bytes
/
email_scan.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
package deanonymization
import (
"github.com/mhatta/onionscan/config"
"github.com/mhatta/onionscan/report"
"regexp"
"strings"
)
// EmailScan extracts anything which resembles an email address from the current crawl.
func EmailScan(osreport *report.OnionScanReport, anonreport *report.AnonymityReport, osc *config.OnionScanConfig) {
mailRegexp := regexp.MustCompile(`(\w[-._\w]*\w@\w[-._\w]*\w\.\w{2,16})`)
for _, id := range osreport.Crawls {
crawlRecord, _ := osc.Database.GetCrawlRecord(id)
if strings.Contains(crawlRecord.Page.Headers.Get("Content-Type"), "text/html") {
foundEmail := mailRegexp.FindAllString(crawlRecord.Page.Snapshot, -1)
for _, email := range foundEmail {
anonreport.EmailAddresses = append(anonreport.EmailAddresses, email)
osc.Database.InsertRelationship(osreport.HiddenService, "snapshot", "email-address", email)
}
}
}
}