From b677a11c917d877c26b2fc2be261c5c2a17a42a5 Mon Sep 17 00:00:00 2001 From: dinosaursrarr Date: Thu, 3 Aug 2023 13:57:42 +0100 Subject: [PATCH] Add bsoup module Responding to request on forum: https://discuss.tidbyt.com/t/beautiful-soup-python-supported/5328/2 I can imagine you saying no, because this provides similar functionality to the existing html module. But I agree with the poster that this is a better API and I would prefer to be able to use it. --- docs/modules.md | 1 + go.mod | 1 + go.sum | 1 + runtime/applet.go | 4 ++++ runtime/applet_test.go | 3 +++ 5 files changed, 10 insertions(+) diff --git a/docs/modules.md b/docs/modules.md index a7d05446fd..9aad04657d 100644 --- a/docs/modules.md +++ b/docs/modules.md @@ -28,6 +28,7 @@ individual modules, please refer to the Starlib documentation. | Module | Description | | --- | --- | +| [`bsoup.star`](https://github.com/qri-io/starlib/blob/master/bsoup) | Beautiful Soup-like functions for HTML | | [`compress/gzip.star`](https://github.com/qri-io/starlib/blob/master/compress/gzip) | gzip decompressing | | [`compress/zipfile.star`](https://github.com/qri-io/starlib/blob/master/zipfile) | zip decompressing | | [`encoding/base64.star`](https://github.com/qri-io/starlib/tree/master/encoding/base64) | Base 64 encoding and decoding | diff --git a/go.mod b/go.mod index 485b35137f..38925cfdb2 100644 --- a/go.mod +++ b/go.mod @@ -60,6 +60,7 @@ require ( github.com/cloudflare/circl v1.3.7 // indirect github.com/cyphar/filepath-securejoin v0.2.4 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/dustmop/soup v1.1.2-0.20190516214245-38228baa104e // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect diff --git a/go.sum b/go.sum index af16291738..0bc7726ec4 100644 --- a/go.sum +++ b/go.sum @@ -73,6 +73,7 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/dustmop/soup v1.1.2-0.20190516214245-38228baa104e h1:44fmjqDtdCiUNlSjJVp+w1AOs6na3Y6Ai0aIeseFjkI= github.com/dustmop/soup v1.1.2-0.20190516214245-38228baa104e/go.mod h1:CgNC6SGbT+Xb8wGGvzilttZL1mc5sQ/5KkcxsZttMIk= github.com/elazarl/goproxy v0.0.0-20230808193330-2592e75ae04a h1:mATvB/9r/3gvcejNsXKSkQ6lcIaNec2nyfOdlTBR2lU= github.com/elazarl/goproxy v0.0.0-20230808193330-2592e75ae04a/go.mod h1:Ro8st/ElPeALwNFlcTpWmkr6IoMFfkjXAvTHpevnDsM= diff --git a/runtime/applet.go b/runtime/applet.go index c97995d07a..45f98e74e6 100644 --- a/runtime/applet.go +++ b/runtime/applet.go @@ -13,6 +13,7 @@ import ( starlibgzip "github.com/qri-io/starlib/compress/gzip" starlibbase64 "github.com/qri-io/starlib/encoding/base64" + starlibbsoup "github.com/qri-io/starlib/bsoup" starlibcsv "github.com/qri-io/starlib/encoding/csv" starlibhash "github.com/qri-io/starlib/hash" starlibhtml "github.com/qri-io/starlib/html" @@ -510,6 +511,9 @@ func (a *Applet) loadModule(thread *starlark.Thread, module string) (starlark.St case "xpath.star": return xpath.LoadXPathModule() + case "bsoup.star": + return starlibbsoup.LoadModule() + case "compress/gzip.star": return starlark.StringDict{ starlibgzip.Module.Name: starlibgzip.Module, diff --git a/runtime/applet_test.go b/runtime/applet_test.go index 6ba07d8af6..bcb121f99a 100644 --- a/runtime/applet_test.go +++ b/runtime/applet_test.go @@ -221,6 +221,7 @@ func TestModuleLoading(t *testing.T) { // Our basic set of modules can be imported src := ` load("render.star", "render") +load("bsoup.star", "bsoup") load("encoding/base64.star", "base64") load("encoding/json.star", "json") load("http.star", "http") @@ -245,6 +246,8 @@ def main(): fail("re broken") if time.parse_duration("10s").seconds != 10: fail("time broken") + if bsoup.parseHtml("

foo

").find("h1").get_text() != "foo": + fail("bsoup broken") return render.Root(child=render.Box()) ` app, err := NewApplet("test.star", []byte(src))