Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
sunshineplan committed May 2, 2023
0 parents commit 8ea969e
Show file tree
Hide file tree
Showing 22 changed files with 2,053 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto
11 changes: 11 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
version: 2
updates:
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "daily"

- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
29 changes: 29 additions & 0 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Coverage

on:
workflow_run:
workflows: [ Test ]
branches: [ main ]
types:
- completed

jobs:
coverage:
if: ${{ github.event.workflow_run.conclusion == 'success' }}
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@v3

- name: Setup Go
uses: actions/setup-go@v4
with:
go-version: stable

- name: Send Coverage
env:
COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
go test -race -covermode atomic -coverprofile=covprofile ./...
go install github.com/mattn/goveralls@latest
goveralls -coverprofile=covprofile -service=github
44 changes: 44 additions & 0 deletions .github/workflows/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: Dependabot

on:
pull_request_target:

jobs:
test:
if: ${{ github.actor == 'dependabot[bot]' }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ windows-latest, ubuntu-latest, macos-latest ]
steps:
- name: Checkout Code
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: Setup Go
uses: actions/setup-go@v4
with:
go-version: stable

- name: Test Code
run: go test -v -race ./...

merge:
if: ${{ github.actor == 'dependabot[bot]' }}
runs-on: ubuntu-latest
needs: test
permissions:
pull-requests: write
contents: write
steps:
- uses: actions/checkout@v3
- uses: nick-invision/retry@v2
with:
timeout_minutes: 60
max_attempts: 5
retry_wait_seconds: 60
retry_on: error
command: gh pr merge --auto --squash ${{ github.event.pull_request.html_url }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
25 changes: 25 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Test

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ windows-latest, ubuntu-latest, macos-latest ]
steps:
- name: Checkout Code
uses: actions/checkout@v3

- name: Setup Go
uses: actions/setup-go@v4
with:
go-version: stable

- name: Test Code
run: go test -v -race ./...
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2023 sunshineplan

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
228 changes: 228 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
# node

[![GoDev](https://img.shields.io/static/v1?label=godev&message=reference&color=00add8)][godev]
[![Go](https://github.com/sunshineplan/node/workflows/Test/badge.svg)][actions]
[![CoverageStatus](https://coveralls.io/repos/github/sunshineplan/node/badge.svg?branch=main&service=github)][coveralls]
[![GoReportCard](https://goreportcard.com/badge/github.com/sunshineplan/node)][goreportcard]

[godev]: https://pkg.go.dev/github.com/sunshineplan/node "GoDev"
[actions]: https://github.com/sunshineplan/node/actions "GitHub Actions Page"
[coveralls]: https://coveralls.io/github/sunshineplan/node?branch=main "Coverage Status"
[goreportcard]: https://goreportcard.com/report/github.com/sunshineplan/node "Go Report Card"

Node is a Go package for parsing HTML and XML documents, inspired by the popular Python library Beautiful Soup. Node provides APIs for extracting data from HTML and XML documents by traversing the parse tree and accessing elements and attributes.

## Installation

To install Node, use the `go get` command:
```bash
go get -u github.com/sunshineplan/node
```

## Usage

```go
package main

import (
"fmt"
"strings"

"github.com/sunshineplan/node"
)

func main() {
// Parse an HTML document from a string
doc, err := node.ParseHTML("<html><head><title>Page Title</title></head><body><p>Hello, World!</p></body></html>")
if err != nil {
fmt.Println(err)
return
}

// Find the page title
title := doc.Find(node.Descendant, node.Title)
fmt.Println(title.GetText())

// Find the first paragraph
p := doc.Find(node.Descendant, node.P)
fmt.Println(p.GetText())

// Find all paragraphs
paragraphs := doc.FindAll(node.Descendant, node.P)
for _, p := range paragraphs {
fmt.Println(p.GetText())
}
}
```

## API

```go
// Node is an interface representing an HTML node.
type Node interface {
HtmlNode

// String returns a TextNode if the node has only one child whose type is text, otherwise returns nil.
String() TextNode

// Strings return all of the text nodes inside this node.
Strings() []TextNode

// StrippedStrings return a list of strings generated by Strings, where strings consisting entirely of
// whitespace are ignored, and whitespace at the beginning and end of strings is removed.
StrippedStrings() []string

// GetText concatenates all of the text node's content.
GetText() string
}

// TextNode is an interface representing a text node.
type TextNode interface {
HtmlNode

// String returns content for text node.
String() string
}

// HtmlNode is an interface representing an HTML node.
type HtmlNode interface {
// Raw returns orgin *html.Node.
Raw() *html.Node
// ToNode converts HtmlNode to Node.
ToNode() Node
// ToTextNode converts HtmlNode to TextNode.
// It will panic if the node type is not text node.
ToTextNode() TextNode

// Type returns a NodeType.
Type() html.NodeType
// Data returns tag name for element node or content for text node.
Data() string
// Attrs returns an Attributes interface for element node.
Attrs() Attributes
// HasAttr return whether node has an attribute.
HasAttr(string) bool
// HTML renders the node's parse tree as HTML code.
HTML() string
// Readable renders unescaped HTML code.
Readable() string

// Parent returns the parent of this node.
Parent() Node
// FirstChild returns the first child of this node.
FirstChild() Node
// LastChild returns the last child of this node.
LastChild() Node
// PrevSibling returns the previous node that are on the same level of the parse tree.
PrevSibling() Node
// NextSibling returns the next node that are on the same level of the parse tree.
NextSibling() Node
// PrevNode returns the node that was parsed immediately before this node.
PrevNode() Node
// NextNode returns the node that was parsed immediately after this node.
NextNode() Node

// Parents iterate over all of this node's parent recursively.
Parents() []Node
// Children return all of this node's direct children.
Children() []Node
// Descendants iterate over all of this node's children recursively.
Descendants() []Node
// PrevSiblings return all of this node's previous nodes that are on the same level of the parse tree.
PrevSiblings() []Node
// NextSiblings return all of this node's next nodes that are on the same level of the parse tree.
NextSiblings() []Node
// PrevNodes return all of the nodes that was parsed before this node.
PrevNodes() []Node
// NextNodes return all of the nodes that was parsed after this node.
NextNodes() []Node

// Finder includes a set of find methods.
Finder
}

// Attributes is an interface that describes a node's attributes with
// methods for getting and iterating over key-value pairs.
type Attributes interface {
// Range calls the provided function for each key-value pair in the Attributes
// iteration stops if the function returns false for any pair.
Range(func(key, value string) bool)

// Get returns the value associated with the specified key and
// a boolean indicating whether the key exists in the Attributes.
Get(key string) (value string, exists bool)
}

// Finder represents a set of methods for finding nodes.
type Finder interface {
// Find searches for a single node in the parse tree based on the specified find method and filters.
Find(FindMethod, TagFilter, ...Filter) Node

// FindN searches for up to n nodes in the parse tree based on the specified find method and filters.
FindN(FindMethod, int, TagFilter, ...Filter) []Node

// FindAll searches for all nodes in the parse tree based on the specified find method and filters.
FindAll(FindMethod, TagFilter, ...Filter) []Node

// FindString searches for a single text node in the parse tree based on the specified find method and filters.
FindString(FindMethod, StringFilter) TextNode

// FindStringN searches for up to n text nodes in the parse tree based on the specified find method and filters.
FindStringN(FindMethod, int, StringFilter) []TextNode

// FindAllString searches for all text nodes in the parse tree based on the specified find method and filters.
FindAllString(FindMethod, StringFilter) []TextNode
}

// FindMethod represents the method used to search for nodes in the parse tree.
type FindMethod int

const (
// Descendant represents a search for nodes that are descendants of the current node.
Descendant FindMethod = iota

// NoRecursive represents a search for nodes that are direct children of the current node.
NoRecursive

// Parent represents a search for the parent node of the current node.
Parent

// PrevSibling represents a search for the previous sibling node of the current node.
PrevSibling

// NextSibling represents a search for the next sibling node of the current node.
NextSibling

// Previous represents a search for the previous node in the parse tree.
Previous

// Next represents a search for the next node in the parse tree.
Next
)

// TagFilter represents an interface that can be used to filter node based on node element's tag.
type TagFilter interface {
Ignore() bool
IsMatch(node Node) bool
}

// Filter is an interface that describes a filter that can be used to select nodes.
type Filter interface {
// IsAttribute returns true if the filter represents an attribute filter.
IsAttribute() bool

// IsMatch returns true if the filter matches the given node.
IsMatch(node Node) bool
}

// StringFilter interface extends the Filter interface and defines
// a method for checking if the filter represents an string filter.
type StringFilter interface {
Filter
IsString() bool
}
```

## License

[The MIT License (MIT)](https://raw.githubusercontent.com/sunshineplan/node/main/LICENSE)
Loading

0 comments on commit 8ea969e

Please sign in to comment.