Skip to content
Permalink
Browse files

add meta combine

  • Loading branch information
denpaneko committed Mar 7, 2020
1 parent 37877b6 commit c87db67bb13346bc5b4b29823af88a57cdcc4d43
Showing with 90 additions and 49 deletions.
  1. +1 −1 scrape_test.go
  2. +32 −22 selector/selector.go
  3. +34 −13 selector/site.go
  4. +23 −13 sites/mgs.go
@@ -12,7 +12,7 @@ func TestScrape(t *testing.T) {
//meta := Scrape(sites.Fc2Club("437689"))
//meta := Scrape(sites.Carib("030720-001"))
//meta := Scrape(sites.CaribPr("022820_003"))
meta := Scrape(sites.Mgs("261ARA-418"))
meta := Scrape(sites.Mgs("300MIUM-544"))

fmt.Println(meta.Json())
}
@@ -6,18 +6,18 @@ import (
)

type CssSelector struct {
Id Item
Title Item
Actor Item
Poster Item
Series Item
Producer Item
Release Item
Duration Item
Sample Item
Images Item
Label Item
Genre Item
Id *Item
Title *Item
Actor *Item
Poster *Item
Series *Item
Producer *Item
Release *Item
Duration *Item
Sample *Item
Images *Item
Label *Item
Genre *Item
}

type Item struct {
@@ -27,22 +27,22 @@ type Item struct {
preset string
}

func Selector(selector string) Item {
return Item{selector: selector, attribute: "", replacer: strings.NewReplacer("", ""), preset: ""}
func Selector(selector string) *Item {
return &Item{selector: selector, attribute: "", replacer: strings.NewReplacer("", ""), preset: ""}
}

func Preset(preset string) Item {
return Item{selector: "", attribute: "", replacer: strings.NewReplacer("", ""), preset: preset}
func Preset(preset string) *Item {
return &Item{selector: "", attribute: "", replacer: strings.NewReplacer("", ""), preset: preset}
}

func (selector Item) Replace(oldNew ...string) Item {
func (selector Item) Replace(oldNew ...string) *Item {
selector.replacer = strings.NewReplacer(oldNew...)
return selector
return &selector
}

func (selector Item) Attribute(attr string) Item {
func (selector Item) Attribute(attr string) *Item {
selector.attribute = attr
return selector
return &selector
}

func (selector Item) Text(doc *goquery.Document) string {
@@ -61,16 +61,26 @@ func (selector Item) Texts(doc *goquery.Document) []string {
return texts
}

func (selector Item) Value(doc *goquery.Document) string {
func (selector *Item) Value(doc *goquery.Document) string {

if selector == nil {
return ""
}

if len(selector.preset) > 0 {
return selector.preset
}
selection := doc.Find(selector.selector).First()
return selector.textOrAttr(selection)
}

func (selector Item) Values(doc *goquery.Document) []string {
func (selector *Item) Values(doc *goquery.Document) []string {
var texts []string

if selector == nil {
return texts
}

doc.Find(selector.selector).Each(func(i int, selection *goquery.Selection) {
texts = append(texts, selector.textOrAttr(selection))
})
@@ -23,6 +23,7 @@ type Site struct {
Charset string
Cookies []http.Cookie
CssSelector
Next *Site
}

func (site Site) Meta() Meta {
@@ -34,23 +35,44 @@ func (site Site) Meta() Meta {
log.Fatal(err)
}

var next = Meta{}
if site.Next != nil {
next = site.Next.Meta()
}

// extract meta data from web page
meta.Title = site.Title.Value(doc)
meta.Actor = site.Actor.Value(doc)
meta.Poster = site.Poster.Value(doc)
meta.Producer = site.Producer.Value(doc)
meta.Sample = site.Sample.Value(doc)
meta.Series = site.Series.Value(doc)
meta.Release = site.Release.Value(doc)
meta.Duration = site.Duration.Value(doc)
meta.Id = site.Id.Value(doc)
meta.Label = site.Label.Value(doc)
meta.Genre = site.Genre.Values(doc)
meta.Images = site.Images.Values(doc)
meta.Title = combine(site.Title.Value(doc), next.Title)
meta.Actor = combine(site.Actor.Value(doc), next.Actor)
meta.Poster = combine(site.Poster.Value(doc), next.Poster)
meta.Producer = combine(site.Producer.Value(doc), next.Producer)
meta.Sample = combine(site.Sample.Value(doc), next.Sample)
meta.Series = combine(site.Series.Value(doc), next.Series)
meta.Release = combine(site.Release.Value(doc), next.Release)
meta.Duration = combine(site.Duration.Value(doc), next.Duration)
meta.Id = combine(site.Id.Value(doc), next.Id)
meta.Label = combine(site.Label.Value(doc), next.Label)
meta.Genre = combineArray(site.Genre.Values(doc), next.Genre)
meta.Images = combineArray(site.Images.Values(doc), next.Images)
meta.Url = site.Url
return meta
}

func combine(first string, second string) string {
if len(first) > 0 {
return first
} else {
return second
}
}

func combineArray(first []string, second []string) []string {
if len(first) > 0 {
return first
} else {
return second
}
}

func (site Site) Body() io.ReadCloser {
resp, err := site.get()

@@ -86,7 +108,6 @@ func (site Site) get() (*http.Response, error) {
req.Header.Set("User-Agent", site.UserAgent)

for _, cookie := range site.Cookies {
log.Println(cookie)
req.AddCookie(&cookie)
}

@@ -7,24 +7,34 @@ import (
)

func Mgs(id string) Site {
return Site{

mobile := Site{
Url: fmt.Sprintf("https://sp.mgstage.com/product/product_detail/SP-%s/", id),
UserAgent: MobileUserAgent,
Cookies: []http.Cookie{{Name: "adc", Value: "1"}},

CssSelector: CssSelector{
Title: Selector(".sample-image-wrap.h1 > img").Attribute("alt"),
Actor: Selector("a.actor"),
Poster: Selector(".sample-image-wrap.h1").Attribute("href"),
Producer: Selector("#detail > div > dl > dd:nth-child(6) > a"),
Sample: Selector("#sample-movie").Attribute("src"),
Series: Selector("a.series"),
Release: Selector("#detail > div > dl > dd:nth-child(12)"),
Duration: Selector("#detail > div > dl > dd:nth-child(14)"),
Id: Selector("#detail > div > dl > dd:nth-child(16)").Replace("SP-", ""),
Label: Selector("null"),
Genre: Selector("#detail > div > dl > dd > a"),
Images: Selector("a[class^=\"sample-image-wrap sample\"]").Attribute("href"),
Title: Selector(".sample-image-wrap.h1 > img").Attribute("alt"),
Actor: Selector("a.actor"),
Poster: Selector(".sample-image-wrap.h1").Attribute("href"),
Sample: Selector("#sample-movie").Attribute("src"),
Series: Selector("a.series"),
Label: Selector("null"),
Images: Selector("a[class^=\"sample-image-wrap sample\"]").Attribute("href"),
},
}

return Site{
Url: fmt.Sprintf("https://www.mgstage.com/product/product_detail/%s/", id),
UserAgent: UserAgent,
Cookies: mobile.Cookies,
CssSelector: CssSelector{
Producer: Selector("div.detail_left > div > table:nth-child(3) > tbody > tr:nth-child(2) > td > a"),
Release: Selector("div.detail_left > div > table:nth-child(3) > tbody > tr:nth-child(5) > td"),
Duration: Selector("div.detail_left > div > table:nth-child(3) > tbody > tr:nth-child(3) > td"),
Id: Selector("div.detail_left > div > table:nth-child(3) > tbody > tr:nth-child(4) > td"),
Genre: Selector("div.detail_left > div > table:nth-child(3) > tbody > tr:nth-child(9) > td > a"),
},
Next: &mobile,
}
}

0 comments on commit c87db67

Please sign in to comment.
You can’t perform that action at this time.