-
-
Notifications
You must be signed in to change notification settings - Fork 26
/
linkprev.go
138 lines (115 loc) · 3.73 KB
/
linkprev.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package linkprev
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/imroc/req/v3"
"github.com/nekomeowww/insights-bot/pkg/opengraph"
"github.com/samber/lo"
)
var (
ErrNetworkError = errors.New("network error")
ErrRequestFailed = errors.New("request failed")
)
type Client struct {
reqClient *req.Client
}
func NewClient() *Client {
return &Client{
reqClient: req.
C().
SetUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"),
}
}
func (c *Client) Debug() *Client {
c.reqClient.EnableDumpAll()
return c
}
func (c *Client) Preview(ctx context.Context, urlStr string) (Meta, error) {
r := c.newRequest(ctx, urlStr)
body, err := c.request(r, urlStr)
if err != nil {
return Meta{}, err
}
doc, err := goquery.NewDocumentFromReader(body)
if err != nil {
return Meta{}, fmt.Errorf("failed to parse response body with goquery: %v", err)
}
preview := newMetaFrom(doc)
return preview, nil
}
func (c *Client) newRequest(ctx context.Context, urlStr string) *req.Request {
request := c.reqClient.
R().
EnableDump().
SetContext(ctx)
c.alterRequestForTwitter(request, urlStr)
return request
}
// requestForTwitter is a special request for Twitter.
//
// We need to ask Twitter server to generate a SSR rendered HTML for us to get the metadatas
// Learn more at:
// 1. https://stackoverflow.com/a/64332370/19954520
// 2. https://stackoverflow.com/a/64164115/19954520
//
// Other alternative User-Agent for Twitter:
// 1. Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
// 2. Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)
// 3. facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)
// 4. Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)
func (c *Client) alterRequestForTwitter(request *req.Request, urlStr string) *req.Request {
parsedURL, err := url.Parse(urlStr)
if err != nil {
return request
}
if !lo.Contains([]string{"twitter.com", "vxtwitter.com", "fxtwitter.com"}, parsedURL.Host) {
return request
}
return request.SetHeader("User-Agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)")
}
func (c *Client) request(r *req.Request, urlStr string) (io.Reader, error) {
resp, err := r.Get(urlStr)
if err != nil {
return nil, fmt.Errorf("failed to get a preview of url %s, %w: %v", urlStr, ErrNetworkError, err)
}
if !resp.IsSuccessState() {
return nil, fmt.Errorf("failed to get url %s, %w, status code: %d, dump: %s", urlStr, ErrRequestFailed, resp.StatusCode, resp.Dump())
}
defer resp.Body.Close()
buf := new(bytes.Buffer)
_, err = io.Copy(buf, resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %v", err)
}
return buf, nil
}
type Meta struct {
Title string
Description string
Favicon string
Author string
Keywords []string
OpenGraph opengraph.OpenGraph
}
func newMetaFrom(doc *goquery.Document) Meta {
meta := Meta{
Title: strings.TrimSpace(doc.Find("head > title").Text()),
Description: strings.TrimSpace(doc.Find("head > meta[name='description']").AttrOr("content", "")),
Favicon: strings.TrimSpace(doc.Find("head > link[rel='icon']").AttrOr("href", "")),
Author: strings.TrimSpace(doc.Find("head > meta[name='author']").AttrOr("content", "")),
Keywords: doc.Find("head > meta[name='keywords']").Map(func(i int, s *goquery.Selection) string {
return strings.TrimSpace(s.AttrOr("content", ""))
}),
OpenGraph: opengraph.NewOpenGraphMetadataFromDocument(doc),
}
if meta.Title == "" && meta.OpenGraph.Title == "" {
return Meta{}
}
return meta
}