Change ExtractLinks to use LinkWorker

nylar · Jan 1, 2015 · bb0d777 · bb0d777
1 parent 27159b6
commit bb0d777
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 17 deletions.
diff --git a/parser.go b/parser.go
@@ -54,22 +54,16 @@ func ExtractAuthor(doc *goquery.Document) string {
 // ExtractLinks all anchors (with href attributes) from a document and return a list
 // of the anchors. Should return an error but goquery.NewDocumentFromReader that
 // subsequently calls html.Parse doesn't like returning errors for bad markup.
-func ExtractLinks(doc *goquery.Document) []string {
-	links := []string{}
-	linkTracker := make(map[string]bool)
-
+func ExtractLinks(doc *goquery.Document, lw *LinkWorker) {
 	doc.Find("a").Each(func(i int, s *goquery.Selection) {
 		// Only interested in anchors that have a href attribute.
 		link, href := s.Attr("href")
 		if href {
-			if _, ok := linkTracker[link]; !ok {
-				links = append(links, link)
-				linkTracker[link] = true
-			}
+			lw.Push(Link(link))
 		}
 	})
 
-	return links
+	return
 }
 
 // ExtractText extracts all p tags from a page.

diff --git a/parser_test.go b/parser_test.go
@@ -186,10 +186,11 @@ func TestParser_ExtractAuthorPrecedence(t *testing.T) {
 
 func TestParser_ExtractLinks_Empty(t *testing.T) {
 	doc := NewDocument("")
+	lw := NewLinkWorker()
 
-	links := ExtractLinks(doc)
+	ExtractLinks(doc, lw)
 
-	assert.Equal(t, len(links), 0)
+	assert.Equal(t, lw.Len(), 0)
 }
 
 func TestParser_ExtractLinks_Valid(t *testing.T) {
@@ -201,20 +202,22 @@ func TestParser_ExtractLinks_Valid(t *testing.T) {
 </p>`
 
 	doc := NewDocument(htmlSoup)
+	lw := NewLinkWorker()
 
-	links := ExtractLinks(doc)
+	ExtractLinks(doc, lw)
 
-	assert.Equal(t, len(links), 2)
+	assert.Equal(t, lw.Len(), 2)
 }
 
 func TestParser_ExtractLinks_Invalid(t *testing.T) {
 	// This should return an error but html.Parse doesn't seem to care.
 	invalidHTML := `<html><body><aef<eqf>>>qq></body></ht>`
 
 	doc := NewDocument(invalidHTML)
-	links := ExtractLinks(doc)
+	lw := NewLinkWorker()
+	ExtractLinks(doc, lw)
 
-	assert.Equal(t, len(links), 0)
+	assert.Equal(t, lw.Len(), 0)
 }
 
 func TestParser_ExtractLinks_NoDuplicates(t *testing.T) {
@@ -227,10 +230,11 @@ func TestParser_ExtractLinks_NoDuplicates(t *testing.T) {
 </p>`
 
 	doc := NewDocument(htmlWithDupes)
+	lw := NewLinkWorker()
 
-	links := ExtractLinks(doc)
+	ExtractLinks(doc, lw)
 
-	assert.Equal(t, len(links), 3)
+	assert.Equal(t, lw.Len(), 3)
 }
 
 func TestParser_ExtractTextEmpty(t *testing.T) {