-
Notifications
You must be signed in to change notification settings - Fork 0
/
city.go
47 lines (41 loc) · 1.18 KB
/
city.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
package main
import (
"fmt"
"regexp"
"gopkg.in/mgo.v2"
"gopkg.in/mgo.v2/bson"
"github.com/PuerkitoBio/goquery"
"github.com/pestkam/scraper"
)
type CityInfo struct {
CityName string
LatinCityName string
URL string
}
func ParseCity(resp scraper.Response, session *mgo.Session) {
latinCityNameRegexp := regexp.MustCompile(`\w+$`)
checkCountryRegexp := regexp.MustCompile(`^/`)
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
fmt.Println(err)
} else {
c := session.DB("2Gis").C("city")
doc.Find("li.world__listItem").Each(func(i int, item *goquery.Selection) {
ProfileCity := CityInfo{}
ProfileCity.CityName = item.Find("a.world__listItemName").Text()
link, _ := item.Find("a.world__listItemName").Attr("href")
ProfileCity.LatinCityName = latinCityNameRegexp.FindString(link)
// if ProfileCity.LatinCityName != "gornoaltaysk" {
// return
// }
if checkCountryRegexp.MatchString(link) {
link = "http://2gis.ru" + link
}
ProfileCity.URL = link + "/rubrics"
_, err = c.Upsert(bson.M{"latincityname": ProfileCity.LatinCityName}, bson.M{"$set": ProfileCity})
if err != nil {
fmt.Println(err)
}
})
}
}