Skip to content

Commit

Permalink
Removed cascadia dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
Rohaq authored and qdm12 committed Aug 15, 2022
1 parent 80ebc4e commit 8b1018f
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 20 deletions.
1 change: 0 additions & 1 deletion go.mod
Expand Up @@ -3,7 +3,6 @@ module github.com/qdm12/gluetun
go 1.17

require (
github.com/andybalholm/cascadia v1.3.1
github.com/breml/rootcerts v0.2.6
github.com/fatih/color v1.13.0
github.com/golang/mock v1.6.0
Expand Down
2 changes: 0 additions & 2 deletions go.sum
@@ -1,8 +1,6 @@
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7/go.mod h1:6zEj6s6u/ghQa61ZWa/C2Aw3RkjiTBOix7dkqa1VLIs=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
Expand Down
43 changes: 26 additions & 17 deletions internal/provider/slickvpn/updater/website.go
Expand Up @@ -4,15 +4,15 @@ import (
"context"
"errors"
"fmt"
"io"
"net/http"
"regexp"
"strings"

"github.com/andybalholm/cascadia"
htmlutils "github.com/qdm12/gluetun/internal/updater/html"
"golang.org/x/net/html"
)

var ErrHTTPStatusCode = errors.New("HTTP status code is not OK")

func fetchAndParseWebsite(ctx context.Context, client *http.Client) (
hostToData map[string]serverData, err error) {
const url = "https://www.slickvpn.com/locations/"
Expand All @@ -26,7 +26,17 @@ func fetchAndParseWebsite(ctx context.Context, client *http.Client) (
return nil, fmt.Errorf("do HTTP request: %w", err)
}

hostToData, err = parseHTML(response.Body)
if response.StatusCode != http.StatusOK {
return nil, fmt.Errorf("%w: %d %s", ErrHTTPStatusCode, response.StatusCode, response.Status)
}

rootNode, err := html.Parse(response.Body)
if err != nil {
_ = response.Body.Close()
return nil, fmt.Errorf("parsing HTML code: %w", err)
}

hostToData, err = parseHTML(rootNode)
if err != nil {
_ = response.Body.Close()
return nil, fmt.Errorf("parsing HTML: %w", err)
Expand All @@ -47,18 +57,18 @@ type serverData struct {
city string
}

var (
locationTableSelector = cascadia.MustCompile(`table#location-table > tbody > tr`) //nolint:gochecknoglobals
ovpnHrefSelector = cascadia.MustCompile(`a[href$='.ovpn']`) //nolint:gochecknoglobals
)
func parseHTML(rootNode *html.Node) (hostToData map[string]serverData, err error) {
locationTableNode := htmlutils.GetFirstNodeByID(rootNode, "location-table")
if locationTableNode == nil {
return nil, fmt.Errorf("unable to find html node with matching id")
}

func parseHTML(body io.Reader) (hostToData map[string]serverData, err error) {
root, err := html.Parse(body)
if err != nil {
return nil, fmt.Errorf("parsing response body: %w", err)
tBodyNode := htmlutils.GetFirstNodeByType(locationTableNode, "tbody")
if tBodyNode == nil {
return nil, fmt.Errorf("unable to find tbody tag inside location table")
}

rowNodes := cascadia.QueryAll(root, locationTableSelector)
rowNodes := htmlutils.GetNodesByType(tBodyNode, "tr")
hostToData = make(map[string]serverData, len(rowNodes))

for _, rowNode := range rowNodes {
Expand All @@ -74,20 +84,19 @@ func parseHTML(body io.Reader) (hostToData map[string]serverData, err error) {
for cellNode := rowNode.FirstChild; cellNode != nil; cellNode = cellNode.NextSibling {
switch columnIndex {
case columnIndexContinent:
// TODO Translate continent to region
data.region = cellNode.FirstChild.Data
case columnIndexCountry:
data.country = cellNode.FirstChild.Data
case columnIndexCity:
data.city = cellNode.FirstChild.Data
case columnIndexConfig:
linkNodes := cascadia.QueryAll(cellNode, ovpnHrefSelector)
linkNodes := htmlutils.GetNodesByType(cellNode, "a")
for _, linkNode := range linkNodes {
if !strings.EqualFold(linkNode.FirstChild.Data, "openvpn") {
if htmlutils.GetText(linkNode) != "OpenVPN" {
continue
}

data.ovpnURL, err = getAttributeValue(linkNode, "href")
data.ovpnURL, err = htmlutils.GetAttr(linkNode, "href")
if err != nil {
return nil, fmt.Errorf("get attribute value: %w", err)
}
Expand Down
98 changes: 98 additions & 0 deletions internal/updater/html/parsing.go
@@ -0,0 +1,98 @@
package htmlutils

import (
"container/list"
"errors"
"fmt"

"golang.org/x/net/html"
)

var (
ErrAttrNotFound = errors.New("matching attribute not found")
)

func GetText(n *html.Node) string {
return n.FirstChild.Data
}

func GetAttr(n *html.Node, key string) (string, error) {
for _, attr := range n.Attr {
if attr.Key == key {
return attr.Val, nil
}
}

return "", ErrAttrNotFound
}

func CheckAttrMatch(n *html.Node, attrKey string, checkValue string) bool {
attrValue, err := GetAttr(n, attrKey)
return err == nil && attrValue == checkValue
}

func CheckID(n *html.Node, idValue string) bool {
return CheckAttrMatch(n, "id", idValue)
}

func CheckNodeType(n *html.Node, tagType string) bool {
return n.Type == html.ElementNode && n.Data == tagType
}

func GetFirstNodeByID(n *html.Node, idValue string) *html.Node {
return bfs(n, func(n *html.Node) bool {
return CheckID(n, idValue)
})
}

func GetFirstNodeByType(n *html.Node, nodeType string) *html.Node {
return bfs(n, func(n *html.Node) bool {
return CheckNodeType(n, nodeType)
})
}

func GetNodesByType(n *html.Node, nodeType string) []*html.Node {
nodes := []*html.Node{}
for childNode := n.FirstChild; childNode != nil; childNode = childNode.NextSibling {
if CheckNodeType(childNode, nodeType) {
nodes = append(nodes, childNode)
}
}
return nodes
}

// branching first search: returns the node matching the match function
// and nil if no node is found.
func bfs(rootNode *html.Node,
match func(node *html.Node) bool) (node *html.Node) {
visited := make(map[*html.Node]struct{})
queue := list.New()
_ = queue.PushBack(rootNode)

for queue.Len() > 0 {
listElement := queue.Front()
node, ok := queue.Remove(listElement).(*html.Node)
if !ok {
panic(fmt.Sprintf("linked list has bad type %T", listElement.Value))
}

if node == nil {
continue
}

if _, ok := visited[node]; ok {
continue
}
visited[node] = struct{}{}

if match(node) {
return node
}

for child := node.FirstChild; child != nil; child = child.NextSibling {
_ = queue.PushBack(child)
}
}

return nil
}

0 comments on commit 8b1018f

Please sign in to comment.