-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.go
125 lines (100 loc) · 2.95 KB
/
scraper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package main
import (
"fmt"
"net/http"
"strconv"
"github.com/PuerkitoBio/goquery"
"github.com/gocolly/colly"
"github.com/labstack/echo/v4"
)
type CastItem struct {
Actor string `json:"actor"`
Roles []string `json:"role"`
}
type MovieData struct {
Title string `json:"title"`
Rate float64 `json:"rate"`
PosterImage string `json:"posterImage"`
Duration string `json:"duration"`
Year string `json:"year"`
Genres []string `json:"genres"`
Rank int `json:"rank"`
Directors []string `json:"director"`
Writers []string `json:"writers"`
Cast []CastItem `json:"cast"`
}
func GetMovieData(h echo.Context) error {
slug := h.Param("slug")
url := fmt.Sprintf("https://www.imdb.com/title/%v/", slug)
response := MovieData{}
// initialize the Collector
c := colly.NewCollector()
// set a valid User-Agent header
c.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
c.OnHTML(".jqlHBQ h1 span", func(e *colly.HTMLElement) {
response.Title = e.Text
})
c.OnHTML(".cMEQkK", func(e *colly.HTMLElement) {
var rate float64
var err error
rate, err = strconv.ParseFloat(e.Text, 64)
if err != nil {
rate = 0
}
response.Rate = rate
})
c.OnHTML(".dEqUUl div.ipc-poster img.ipc-image", func(e *colly.HTMLElement) {
response.PosterImage = e.Attr("src")
})
c.OnHTML(".jqlHBQ ul li:first-child", func(e *colly.HTMLElement) {
response.Year = e.Text
})
c.OnHTML(".jqlHBQ ul li:last-child", func(e *colly.HTMLElement) {
response.Duration = e.Text
})
c.OnHTML(".ktjuZl .ipc-chip-list__scroller a span", func(e *colly.HTMLElement) {
response.Genres = append(response.Genres, e.Text)
})
c.OnHTML(".eWQwwe .fTREEx", func(e *colly.HTMLElement) {
var rank int
var err error
rank, err = strconv.Atoi(e.Text)
if err != nil {
rank = 0
}
response.Rank = rank
})
c.OnHTML(".bHYmJY>li:first-child>div>ul>li>a", func(h *colly.HTMLElement) {
response.Directors = append(response.Directors, h.Text)
})
c.OnHTML(".bHYmJY>li:nth-child(2)>div>ul>li>a", func(h *colly.HTMLElement) {
response.Writers = append(response.Writers, h.Text)
})
c.OnHTML(".hNfYaW .gWwKlt", func(h *colly.HTMLElement) {
x := h.DOM.Find(".gCQkeh")
newPerson := CastItem{
Actor: x.Text(),
Roles: make([]string, 0),
}
y := h.DOM.Find("ul")
y.Children().Each(func(i int, s *goquery.Selection) {
newPerson.Roles = append(newPerson.Roles, s.Text())
})
response.Cast = append(response.Cast, newPerson)
})
c.Visit(url)
var hasErr bool
c.OnError(func(r *colly.Response, err error) {
if err != nil {
hasErr = true
// return h.JSON(http.StatusNotFound, "not found")
}
})
if hasErr {
return h.JSON(http.StatusInternalServerError, "this one's one us")
}
if response.Title == "" || response.Rate == 0 {
return h.JSON(http.StatusNotFound, "not found")
}
return h.JSON(http.StatusOK, response)
}