Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,11 @@ Example use:

The `--entry` can be a file path or URL pointing to a catalog, collection, or item. The stats output is a JSON object with top-level properties for catalog, collection, and item stats.

The structure of the output conforms with the schema of the [STAC Stats extension](https://github.com/stac-extensions/stats), so the results can be added to a STAC entrypoint to provide stats on child catalogs, collections, and items. When generating output to be added to a catalog or collection, you don't want to include counts for the entrypoint itself in the reported statistics. The `--exclude-entry` flag is used to report statistics on resources linked from the entry but not on the entry itself.
The structure of the output conforms with the schema of the [STAC Stats extension](https://github.com/stac-extensions/stats), so the results can be added to a STAC entrypoint to provide stats on child catalogs, collections, and items. The `stac stats` command can write out a copy of the provided entrypoint with statistics added.

To generate statistics for the STAC Stats extension, run the following:
To write out a version of a catalog or collection that includes metadata for the STAC Stats extension, run the following:

stac stats --entry path/to/catalog.json --exclude-entry

Paste the resulting top-level `stats:*` prefixed properties into your `catalog.json` and add the extension identifier to your catalog's `stac_extensions` property as described by the [STAC Stats extension](https://github.com/stac-extensions/stats).
stac stats --entry path/to/catalog.json --output path/to/catalog-with-stats.json

## Library Use

Expand Down
3 changes: 0 additions & 3 deletions cmd/stac/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ const (
// version flags
flagVerbose = "verbose"

// stats flags
flagExcludeEntry = "exclude-entry"

// common flags
flagLogLevel = "log-level"
flagEntry = "entry"
Expand Down
82 changes: 64 additions & 18 deletions cmd/stac/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,21 @@ import (
"encoding/json"
"fmt"
"os"
"strings"
"sync"
"time"

"github.com/google/go-github/v45/github"
"github.com/planetlabs/go-stac/crawler"
"github.com/schollz/progressbar/v3"
"github.com/urfave/cli/v2"
)

const (
statsRepoOwner = "stac-extensions"
statsRepoName = "stats"
)

type Stats struct {
Catalogs *ResourceStats `json:"stats:catalogs,omitempty"`
Collections *ResourceStats `json:"stats:collections,omitempty"`
Expand All @@ -33,22 +40,35 @@ var statsCommand = &cli.Command{
Flags: []cli.Flag{
&cli.StringFlag{
Name: flagEntry,
Usage: "Path to STAC resource (catalog, collection, or item) to crawl",
Usage: "Path or URL to STAC resource (catalog, collection, or item) to crawl",
EnvVars: []string{toEnvVar(flagEntry)},
},
&cli.BoolFlag{
Name: flagExcludeEntry,
Usage: "Do not count the entry itself",
Value: false,
EnvVars: []string{toEnvVar(flagExcludeEntry)},
},
&cli.BoolFlag{
Name: flagNoRecursion,
Usage: "Visit a single resource",
EnvVars: []string{toEnvVar(flagNoRecursion)},
&cli.StringFlag{
Name: flagOutput,
Usage: "Path to write a version of the entry resource with statistics added (if not provided, stats will be written to stdout)",
EnvVars: []string{toEnvVar(flagOutput)},
},
},
Action: func(ctx *cli.Context) error {
rewriteWithStats := false

outputPath := ctx.String(flagOutput)
var entryResource crawler.Resource
var extensionReleaseTag string

if outputPath != "" {
client := github.NewClient(nil)
release, _, releaseErr := client.Repositories.GetLatestRelease(ctx.Context, statsRepoOwner, statsRepoName)
if releaseErr != nil {
return fmt.Errorf("failed to get latest release information for https://github.com/%s/%s: %w", statsRepoOwner, statsRepoName, releaseErr)
}
extensionReleaseTag = release.GetTagName()
if extensionReleaseTag == "" {
return fmt.Errorf("latest release for https://github.com/%s/%s has no version identifier", statsRepoOwner, statsRepoName)
}
rewriteWithStats = true
}

entryPath := ctx.String(flagEntry)
if entryPath == "" {
return fmt.Errorf("missing --%s", flagEntry)
Expand All @@ -70,11 +90,11 @@ var statsCommand = &cli.Command{
progressbar.OptionClearOnFinish(),
)

skip := ctx.Bool(flagExcludeEntry)
noRecursion := ctx.Bool(flagNoRecursion)
skip := rewriteWithStats

visitor := func(resource crawler.Resource, info *crawler.ResourceInfo) error {
if skip {
entryResource = resource
skip = false
return nil
}
Expand Down Expand Up @@ -148,10 +168,6 @@ var statsCommand = &cli.Command{
_ = bar.Add(1)
bar.Describe(fmt.Sprintf("catalogs: %d; collections: %d; items: %d", catalogs, collections, items))

if noRecursion {
return crawler.ErrStopRecursion
}

return nil
}

Expand All @@ -161,6 +177,36 @@ var statsCommand = &cli.Command{
}

_ = bar.Finish()
return json.NewEncoder(os.Stdout).Encode(stats)

if !rewriteWithStats {
return json.NewEncoder(os.Stdout).Encode(stats)
}

extensionSchemaRoot := fmt.Sprintf("https://%s.github.io/%s/", statsRepoOwner, statsRepoName)
extensions := []string{fmt.Sprintf("%s%s/schema.json", extensionSchemaRoot, extensionReleaseTag)}

for _, extension := range entryResource.Extensions() {
if strings.HasPrefix(extension, extensionSchemaRoot) {
continue
}
extensions = append(extensions, extension)
}

entryResource["stac_extensions"] = extensions
if stats.Catalogs != nil {
entryResource["stats:catalogs"] = stats.Catalogs
}
if stats.Collections != nil {
entryResource["stats:collections"] = stats.Collections
}
if stats.Items != nil {
entryResource["stats:items"] = stats.Items
}

data, jsonErr := json.MarshalIndent(orderedMap(entryResource), "", " ")
if jsonErr != nil {
return fmt.Errorf("failed to encode resource as JSON: %w", jsonErr)
}
return os.WriteFile(outputPath, data, 0644)
},
}
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ require (
github.com/dlclark/regexp2 v1.7.0
github.com/go-logr/logr v1.2.3
github.com/go-logr/zapr v1.2.3
github.com/google/go-github/v45 v45.2.0
github.com/hashicorp/go-retryablehttp v0.7.1
github.com/santhosh-tekuri/jsonschema/v5 v5.0.0
github.com/schollz/progressbar/v3 v3.9.0
github.com/schollz/progressbar/v3 v3.8.7
github.com/stretchr/testify v1.8.0
github.com/tschaub/retry v1.0.0
github.com/urfave/cli/v2 v2.11.1
Expand All @@ -19,6 +20,7 @@ require (
require (
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/go-querystring v1.1.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.1 // indirect
github.com/mattn/go-runewidth v0.0.13 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
Expand Down
9 changes: 9 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/zapr v1.2.3 h1:a9vnzlIBPQBBkeaR9IuMUfmVOrQlkoC4YfPoFkX3T7A=
github.com/go-logr/zapr v1.2.3/go.mod h1:eIauM6P8qSvTw5o2ez6UEAfGjQKrxQTl5EoK+Qa2oG4=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
github.com/google/go-github/v45 v45.2.0 h1:5oRLszbrkvxDDqBCNj2hjDZMKmvexaZ1xw/FCD+K3FI=
github.com/google/go-github/v45 v45.2.0/go.mod h1:FObaZJEDSTa/WGCzZ2Z3eoCDXWJKMenWWTrd8jrta28=
github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI=
Expand All @@ -38,6 +44,8 @@ github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/schollz/progressbar/v3 v3.8.7 h1:rtje4lnXVD1Dy/RtPpGd2ijLCmQ7Su3G2ia8dJcRKIo=
github.com/schollz/progressbar/v3 v3.8.7/go.mod h1:W5IEwbJecncFGBvuEh4A7HT1nZZ6WNIL2i3qbnI0WKY=
github.com/schollz/progressbar/v3 v3.9.0 h1:k9SRNQ8KZyibz1UZOaKxnkUE3iGtmGSDt1YY9KlCYQk=
github.com/schollz/progressbar/v3 v3.9.0/go.mod h1:W5IEwbJecncFGBvuEh4A7HT1nZZ6WNIL2i3qbnI0WKY=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
Expand Down Expand Up @@ -107,6 +115,7 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
Expand Down