-
Notifications
You must be signed in to change notification settings - Fork 1
/
scraperB.go
105 lines (91 loc) · 2.4 KB
/
scraperB.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package scraper
import (
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
type scraperB struct {
sourceAddress string
sourcePhoneAddress string
interval time.Duration
}
func (c *scraperB) ListScrape() ([]Contact, error) {
fmt.Printf("Scraping list: %s", c.sourceAddress)
fmt.Println()
time.Sleep(c.interval)
res, err := http.Get(c.sourceAddress)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, err
}
result := []Contact{}
doc.Find("div.main").ChildrenFiltered("div.items").ChildrenFiltered("div.item").Each(func(i int, s *goquery.Selection) {
s.Each(func(i int, itemDiv *goquery.Selection) {
attrClass, exists := itemDiv.Attr("class")
if exists && attrClass == "item" {
element := itemDiv.Find("a")
if element != nil {
link, exists := element.Attr("href")
if exists {
sourceUrl, err := url.Parse(c.sourceAddress)
if err != nil {
fmt.Println(err)
} else {
contactUrl := sourceUrl
contactUrl.Path = link
contact, err := c.contactScrape(contactUrl.String(), c.sourcePhoneAddress, c.interval)
if err != nil {
fmt.Println(err)
} else {
result = append(result, contact)
fmt.Println(contact.GetNumbers())
}
}
}
}
}
})
})
return result, nil
}
func (c *scraperB) contactScrape(url string, phoneUrl string, interval time.Duration) (Contact, error) {
fmt.Printf("Scraping contact: %s", url)
fmt.Println()
time.Sleep(interval)
res, err := http.Get(url)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, err
}
element := doc.Find("div.right")
if element != nil {
t := element.Find("h1").Text()
nums := strings.TrimSpace(element.Find("div.phone0mt div.phone02 a.phoneMT").Text())
d := strings.TrimSpace(element.Find("div.text").Text())
return &contact{
numbers: nums,
website: url,
title: t,
description: d,
}, nil
}
return nil, errors.New("Unable to find contact information")
}