-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8ea969e
Showing
22 changed files
with
2,053 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Auto detect text files and perform LF normalization | ||
* text=auto |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
version: 2 | ||
updates: | ||
- package-ecosystem: "gomod" | ||
directory: "/" | ||
schedule: | ||
interval: "daily" | ||
|
||
- package-ecosystem: "github-actions" | ||
directory: "/" | ||
schedule: | ||
interval: "daily" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
name: Coverage | ||
|
||
on: | ||
workflow_run: | ||
workflows: [ Test ] | ||
branches: [ main ] | ||
types: | ||
- completed | ||
|
||
jobs: | ||
coverage: | ||
if: ${{ github.event.workflow_run.conclusion == 'success' }} | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout Code | ||
uses: actions/checkout@v3 | ||
|
||
- name: Setup Go | ||
uses: actions/setup-go@v4 | ||
with: | ||
go-version: stable | ||
|
||
- name: Send Coverage | ||
env: | ||
COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
run: | | ||
go test -race -covermode atomic -coverprofile=covprofile ./... | ||
go install github.com/mattn/goveralls@latest | ||
goveralls -coverprofile=covprofile -service=github |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
name: Dependabot | ||
|
||
on: | ||
pull_request_target: | ||
|
||
jobs: | ||
test: | ||
if: ${{ github.actor == 'dependabot[bot]' }} | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
matrix: | ||
os: [ windows-latest, ubuntu-latest, macos-latest ] | ||
steps: | ||
- name: Checkout Code | ||
uses: actions/checkout@v3 | ||
with: | ||
ref: ${{ github.event.pull_request.head.sha }} | ||
|
||
- name: Setup Go | ||
uses: actions/setup-go@v4 | ||
with: | ||
go-version: stable | ||
|
||
- name: Test Code | ||
run: go test -v -race ./... | ||
|
||
merge: | ||
if: ${{ github.actor == 'dependabot[bot]' }} | ||
runs-on: ubuntu-latest | ||
needs: test | ||
permissions: | ||
pull-requests: write | ||
contents: write | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: nick-invision/retry@v2 | ||
with: | ||
timeout_minutes: 60 | ||
max_attempts: 5 | ||
retry_wait_seconds: 60 | ||
retry_on: error | ||
command: gh pr merge --auto --squash ${{ github.event.pull_request.html_url }} | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
name: Test | ||
|
||
on: | ||
push: | ||
branches: [ main ] | ||
pull_request: | ||
branches: [ main ] | ||
|
||
jobs: | ||
test: | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
matrix: | ||
os: [ windows-latest, ubuntu-latest, macos-latest ] | ||
steps: | ||
- name: Checkout Code | ||
uses: actions/checkout@v3 | ||
|
||
- name: Setup Go | ||
uses: actions/setup-go@v4 | ||
with: | ||
go-version: stable | ||
|
||
- name: Test Code | ||
run: go test -v -race ./... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2023 sunshineplan | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,228 @@ | ||
# node | ||
|
||
[![GoDev](https://img.shields.io/static/v1?label=godev&message=reference&color=00add8)][godev] | ||
[![Go](https://github.com/sunshineplan/node/workflows/Test/badge.svg)][actions] | ||
[![CoverageStatus](https://coveralls.io/repos/github/sunshineplan/node/badge.svg?branch=main&service=github)][coveralls] | ||
[![GoReportCard](https://goreportcard.com/badge/github.com/sunshineplan/node)][goreportcard] | ||
|
||
[godev]: https://pkg.go.dev/github.com/sunshineplan/node "GoDev" | ||
[actions]: https://github.com/sunshineplan/node/actions "GitHub Actions Page" | ||
[coveralls]: https://coveralls.io/github/sunshineplan/node?branch=main "Coverage Status" | ||
[goreportcard]: https://goreportcard.com/report/github.com/sunshineplan/node "Go Report Card" | ||
|
||
Node is a Go package for parsing HTML and XML documents, inspired by the popular Python library Beautiful Soup. Node provides APIs for extracting data from HTML and XML documents by traversing the parse tree and accessing elements and attributes. | ||
|
||
## Installation | ||
|
||
To install Node, use the `go get` command: | ||
```bash | ||
go get -u github.com/sunshineplan/node | ||
``` | ||
|
||
## Usage | ||
|
||
```go | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
|
||
"github.com/sunshineplan/node" | ||
) | ||
|
||
func main() { | ||
// Parse an HTML document from a string | ||
doc, err := node.ParseHTML("<html><head><title>Page Title</title></head><body><p>Hello, World!</p></body></html>") | ||
if err != nil { | ||
fmt.Println(err) | ||
return | ||
} | ||
|
||
// Find the page title | ||
title := doc.Find(node.Descendant, node.Title) | ||
fmt.Println(title.GetText()) | ||
|
||
// Find the first paragraph | ||
p := doc.Find(node.Descendant, node.P) | ||
fmt.Println(p.GetText()) | ||
|
||
// Find all paragraphs | ||
paragraphs := doc.FindAll(node.Descendant, node.P) | ||
for _, p := range paragraphs { | ||
fmt.Println(p.GetText()) | ||
} | ||
} | ||
``` | ||
|
||
## API | ||
|
||
```go | ||
// Node is an interface representing an HTML node. | ||
type Node interface { | ||
HtmlNode | ||
|
||
// String returns a TextNode if the node has only one child whose type is text, otherwise returns nil. | ||
String() TextNode | ||
|
||
// Strings return all of the text nodes inside this node. | ||
Strings() []TextNode | ||
|
||
// StrippedStrings return a list of strings generated by Strings, where strings consisting entirely of | ||
// whitespace are ignored, and whitespace at the beginning and end of strings is removed. | ||
StrippedStrings() []string | ||
|
||
// GetText concatenates all of the text node's content. | ||
GetText() string | ||
} | ||
|
||
// TextNode is an interface representing a text node. | ||
type TextNode interface { | ||
HtmlNode | ||
|
||
// String returns content for text node. | ||
String() string | ||
} | ||
|
||
// HtmlNode is an interface representing an HTML node. | ||
type HtmlNode interface { | ||
// Raw returns orgin *html.Node. | ||
Raw() *html.Node | ||
// ToNode converts HtmlNode to Node. | ||
ToNode() Node | ||
// ToTextNode converts HtmlNode to TextNode. | ||
// It will panic if the node type is not text node. | ||
ToTextNode() TextNode | ||
|
||
// Type returns a NodeType. | ||
Type() html.NodeType | ||
// Data returns tag name for element node or content for text node. | ||
Data() string | ||
// Attrs returns an Attributes interface for element node. | ||
Attrs() Attributes | ||
// HasAttr return whether node has an attribute. | ||
HasAttr(string) bool | ||
// HTML renders the node's parse tree as HTML code. | ||
HTML() string | ||
// Readable renders unescaped HTML code. | ||
Readable() string | ||
|
||
// Parent returns the parent of this node. | ||
Parent() Node | ||
// FirstChild returns the first child of this node. | ||
FirstChild() Node | ||
// LastChild returns the last child of this node. | ||
LastChild() Node | ||
// PrevSibling returns the previous node that are on the same level of the parse tree. | ||
PrevSibling() Node | ||
// NextSibling returns the next node that are on the same level of the parse tree. | ||
NextSibling() Node | ||
// PrevNode returns the node that was parsed immediately before this node. | ||
PrevNode() Node | ||
// NextNode returns the node that was parsed immediately after this node. | ||
NextNode() Node | ||
|
||
// Parents iterate over all of this node's parent recursively. | ||
Parents() []Node | ||
// Children return all of this node's direct children. | ||
Children() []Node | ||
// Descendants iterate over all of this node's children recursively. | ||
Descendants() []Node | ||
// PrevSiblings return all of this node's previous nodes that are on the same level of the parse tree. | ||
PrevSiblings() []Node | ||
// NextSiblings return all of this node's next nodes that are on the same level of the parse tree. | ||
NextSiblings() []Node | ||
// PrevNodes return all of the nodes that was parsed before this node. | ||
PrevNodes() []Node | ||
// NextNodes return all of the nodes that was parsed after this node. | ||
NextNodes() []Node | ||
|
||
// Finder includes a set of find methods. | ||
Finder | ||
} | ||
|
||
// Attributes is an interface that describes a node's attributes with | ||
// methods for getting and iterating over key-value pairs. | ||
type Attributes interface { | ||
// Range calls the provided function for each key-value pair in the Attributes | ||
// iteration stops if the function returns false for any pair. | ||
Range(func(key, value string) bool) | ||
|
||
// Get returns the value associated with the specified key and | ||
// a boolean indicating whether the key exists in the Attributes. | ||
Get(key string) (value string, exists bool) | ||
} | ||
|
||
// Finder represents a set of methods for finding nodes. | ||
type Finder interface { | ||
// Find searches for a single node in the parse tree based on the specified find method and filters. | ||
Find(FindMethod, TagFilter, ...Filter) Node | ||
|
||
// FindN searches for up to n nodes in the parse tree based on the specified find method and filters. | ||
FindN(FindMethod, int, TagFilter, ...Filter) []Node | ||
|
||
// FindAll searches for all nodes in the parse tree based on the specified find method and filters. | ||
FindAll(FindMethod, TagFilter, ...Filter) []Node | ||
|
||
// FindString searches for a single text node in the parse tree based on the specified find method and filters. | ||
FindString(FindMethod, StringFilter) TextNode | ||
|
||
// FindStringN searches for up to n text nodes in the parse tree based on the specified find method and filters. | ||
FindStringN(FindMethod, int, StringFilter) []TextNode | ||
|
||
// FindAllString searches for all text nodes in the parse tree based on the specified find method and filters. | ||
FindAllString(FindMethod, StringFilter) []TextNode | ||
} | ||
|
||
// FindMethod represents the method used to search for nodes in the parse tree. | ||
type FindMethod int | ||
|
||
const ( | ||
// Descendant represents a search for nodes that are descendants of the current node. | ||
Descendant FindMethod = iota | ||
|
||
// NoRecursive represents a search for nodes that are direct children of the current node. | ||
NoRecursive | ||
|
||
// Parent represents a search for the parent node of the current node. | ||
Parent | ||
|
||
// PrevSibling represents a search for the previous sibling node of the current node. | ||
PrevSibling | ||
|
||
// NextSibling represents a search for the next sibling node of the current node. | ||
NextSibling | ||
|
||
// Previous represents a search for the previous node in the parse tree. | ||
Previous | ||
|
||
// Next represents a search for the next node in the parse tree. | ||
Next | ||
) | ||
|
||
// TagFilter represents an interface that can be used to filter node based on node element's tag. | ||
type TagFilter interface { | ||
Ignore() bool | ||
IsMatch(node Node) bool | ||
} | ||
|
||
// Filter is an interface that describes a filter that can be used to select nodes. | ||
type Filter interface { | ||
// IsAttribute returns true if the filter represents an attribute filter. | ||
IsAttribute() bool | ||
|
||
// IsMatch returns true if the filter matches the given node. | ||
IsMatch(node Node) bool | ||
} | ||
|
||
// StringFilter interface extends the Filter interface and defines | ||
// a method for checking if the filter represents an string filter. | ||
type StringFilter interface { | ||
Filter | ||
IsString() bool | ||
} | ||
``` | ||
|
||
## License | ||
|
||
[The MIT License (MIT)](https://raw.githubusercontent.com/sunshineplan/node/main/LICENSE) |
Oops, something went wrong.