From def542cd1016a94c08f29b9d5bbbe51971c723b8 Mon Sep 17 00:00:00 2001 From: Tristan Cartledge Date: Mon, 13 Oct 2025 06:58:58 +0000 Subject: [PATCH 1/8] feat: add sanitize command to remove extensions and clean OpenAPI specs --- marshaller/coremodel.go | 23 +- marshaller/unmarshaller.go | 14 + mise-tasks/test-cli | 22 +- openapi/cmd/README.md | 161 +++++++ openapi/cmd/root.go | 1 + openapi/cmd/sanitize.go | 195 ++++++++ openapi/sanitize.go | 419 ++++++++++++++++++ openapi/sanitize_test.go | 322 ++++++++++++++ .../testdata/sanitize/sanitize_expected.yaml | 53 +++ openapi/testdata/sanitize/sanitize_input.yaml | 100 +++++ .../sanitize_keep_components_expected.yaml | 72 +++ .../sanitize_keep_extensions_expected.yaml | 24 + .../sanitize_keep_extensions_input.yaml | 29 ++ .../sanitize_multi_pattern_expected.yaml | 25 ++ .../sanitize_multi_pattern_input.yaml | 31 ++ .../sanitize_no_extensions_input.yaml | 28 ++ .../sanitize/sanitize_pattern_config.yaml | 2 + .../sanitize/sanitize_pattern_expected.yaml | 32 ++ .../sanitize/sanitize_pattern_input.yaml | 37 ++ 19 files changed, 1585 insertions(+), 5 deletions(-) create mode 100644 openapi/cmd/sanitize.go create mode 100644 openapi/sanitize.go create mode 100644 openapi/sanitize_test.go create mode 100644 openapi/testdata/sanitize/sanitize_expected.yaml create mode 100644 openapi/testdata/sanitize/sanitize_input.yaml create mode 100644 openapi/testdata/sanitize/sanitize_keep_components_expected.yaml create mode 100644 openapi/testdata/sanitize/sanitize_keep_extensions_expected.yaml create mode 100644 openapi/testdata/sanitize/sanitize_keep_extensions_input.yaml create mode 100644 openapi/testdata/sanitize/sanitize_multi_pattern_expected.yaml create mode 100644 openapi/testdata/sanitize/sanitize_multi_pattern_input.yaml create mode 100644 openapi/testdata/sanitize/sanitize_no_extensions_input.yaml create mode 100644 openapi/testdata/sanitize/sanitize_pattern_config.yaml create mode 100644 openapi/testdata/sanitize/sanitize_pattern_expected.yaml create mode 100644 openapi/testdata/sanitize/sanitize_pattern_input.yaml diff --git a/marshaller/coremodel.go b/marshaller/coremodel.go index 6440242..20db53a 100644 --- a/marshaller/coremodel.go +++ b/marshaller/coremodel.go @@ -24,13 +24,16 @@ type CoreModeler interface { SetConfig(config *yml.Config) GetConfig() *yml.Config Marshal(ctx context.Context, w io.Writer) error + SetUnknownProperties(props []string) + GetUnknownProperties() []string } type CoreModel struct { - RootNode *yaml.Node // RootNode is the node that was unmarshaled into this model - Valid bool // Valid indicates whether the model passed validation, ie all its required fields were present and ValidYaml is true - ValidYaml bool // ValidYaml indicates whether the model's underlying YAML representation is valid, for example a mapping node was received for a model - Config *yml.Config // Generally only set on the top-level model that was unmarshaled + RootNode *yaml.Node // RootNode is the node that was unmarshaled into this model + Valid bool // Valid indicates whether the model passed validation, ie all its required fields were present and ValidYaml is true + ValidYaml bool // ValidYaml indicates whether the model's underlying YAML representation is valid, for example a mapping node was received for a model + Config *yml.Config // Generally only set on the top-level model that was unmarshaled + UnknownProperties []string // UnknownProperties lists property keys that were present in the YAML but not defined in the model (excludes extensions which start with "x-") } var _ CoreModeler = (*CoreModel)(nil) @@ -86,6 +89,18 @@ func (c *CoreModel) GetConfig() *yml.Config { return c.Config } +func (c *CoreModel) SetUnknownProperties(props []string) { + c.UnknownProperties = props +} + +func (c *CoreModel) GetUnknownProperties() []string { + if c.UnknownProperties == nil { + return []string{} + } + + return c.UnknownProperties +} + // GetJSONPointer returns the JSON pointer path from the topLevelRootNode to this CoreModel's RootNode. // Returns an empty string if the node is not found or if either node is nil. // The returned pointer follows RFC6901 format (e.g., "/path/to/node"). diff --git a/marshaller/unmarshaller.go b/marshaller/unmarshaller.go index 9a3a609..302691f 100644 --- a/marshaller/unmarshaller.go +++ b/marshaller/unmarshaller.go @@ -347,6 +347,10 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str jobValidationErrs := make([][]error, numJobs) + // Track unknown properties (non-extension, non-field, non-embedded map properties) + var unknownPropertiesMutex sync.Mutex + var unknownProperties []string + // Mutex to protect concurrent access to extensionsField var extensionsMutex sync.Mutex @@ -381,6 +385,11 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str return nil } jobMapContent[i/2] = append(jobMapContent[i/2], keyNode, valueNode) + } else { + // This is an unknown property (not a recognized field, not an extension, not in embedded map) + unknownPropertiesMutex.Lock() + unknownProperties = append(unknownProperties, key) + unknownPropertiesMutex.Unlock() } } else { // Get field info from cache and field value directly @@ -438,6 +447,11 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str validationErrs = append(validationErrs, embeddedMapValidationErrs...) } + // Store unknown properties in the core model if any were found + if len(unknownProperties) > 0 { + unmarshallable.SetUnknownProperties(unknownProperties) + } + // Use the errors to determine the validity of the model unmarshallable.DetermineValidity(validationErrs) diff --git a/mise-tasks/test-cli b/mise-tasks/test-cli index 285faa6..39bb731 100755 --- a/mise-tasks/test-cli +++ b/mise-tasks/test-cli @@ -26,6 +26,7 @@ $CLI spec validate --help > /dev/null $CLI spec upgrade --help > /dev/null $CLI spec inline --help > /dev/null $CLI spec clean --help > /dev/null +$CLI spec sanitize --help > /dev/null $CLI spec bundle --help > /dev/null $CLI spec join --help > /dev/null $CLI spec bootstrap --help > /dev/null @@ -122,6 +123,23 @@ if ! diff -q dist/test/test-cleaned-empty.yaml openapi/testdata/clean/clean_empt exit 1 fi +# Test sanitize command with known test files +echo " ✓ Testing sanitize command..." +$CLI spec sanitize openapi/testdata/sanitize/sanitize_input.yaml dist/test/test-sanitized.yaml > /dev/null +$CLI spec sanitize --config openapi/testdata/sanitize/sanitize_pattern_config.yaml openapi/testdata/sanitize/sanitize_pattern_input.yaml dist/test/test-sanitized-pattern.yaml > /dev/null + +# Compare sanitize outputs with expected +echo " ✓ Comparing sanitize outputs with expected..." +if ! diff -q dist/test/test-sanitized.yaml openapi/testdata/sanitize/sanitize_expected.yaml > /dev/null; then + echo " ❌ Sanitize output differs from expected" + exit 1 +fi + +if ! diff -q dist/test/test-sanitized-pattern.yaml openapi/testdata/sanitize/sanitize_pattern_expected.yaml > /dev/null; then + echo " ❌ Sanitize pattern output differs from expected" + exit 1 +fi + # Test join command with known test files echo " ✓ Testing join command..." $CLI spec join openapi/testdata/join/main.yaml openapi/testdata/join/subdir/second.yaml openapi/testdata/join/third.yaml dist/test/test-joined-counter.yaml > /dev/null @@ -161,6 +179,8 @@ $CLI spec validate dist/test/test-joined-counter.yaml > /dev/null $CLI spec validate dist/test/test-joined-filepath.yaml > /dev/null $CLI spec validate dist/test/test-cleaned.yaml > /dev/null $CLI spec validate dist/test/test-cleaned-empty.yaml > /dev/null +$CLI spec validate dist/test/test-sanitized.yaml > /dev/null +$CLI spec validate dist/test/test-sanitized-pattern.yaml > /dev/null $CLI spec validate dist/test/test-joined-conflicts.yaml > /dev/null # Test arazzo validation with known test files @@ -232,7 +252,7 @@ echo "✅ All CLI integration tests passed!" echo "📊 Test summary:" echo " - Tested all command help outputs" echo " - Validated known good and bad files" -echo " - Tested bootstrap, upgrade, inline, clean, bundle, join commands" +echo " - Tested bootstrap, upgrade, inline, clean, sanitize, bundle, join commands" echo " - Compared outputs with expected results" echo " - Tested arazzo validation" echo " - Tested overlay validation, apply, and compare" diff --git a/openapi/cmd/README.md b/openapi/cmd/README.md index 31d98a8..90c3e8d 100644 --- a/openapi/cmd/README.md +++ b/openapi/cmd/README.md @@ -12,6 +12,7 @@ OpenAPI specifications define REST APIs in a standard format. These commands hel - [`upgrade`](#upgrade) - [`inline`](#inline) - [`clean`](#clean) + - [`sanitize`](#sanitize) - [`bundle`](#bundle) - [Bundle vs Inline](#bundle-vs-inline) - [`join`](#join) @@ -216,6 +217,166 @@ components: - You're preparing a specification for publication or distribution - You want to reduce document size and complexity - You're maintaining a large specification with many components + +### `sanitize` + +Remove unwanted elements from an OpenAPI specification to create clean, standards-compliant documents. + +```bash +# Default sanitization (remove all extensions and unused components) +openapi spec sanitize ./spec.yaml + +# Sanitize and write to new file +openapi spec sanitize ./spec.yaml ./clean-spec.yaml + +# Sanitize in-place +openapi spec sanitize -w ./spec.yaml + +# Use config file for selective sanitization +openapi spec sanitize --config sanitize-config.yaml ./spec.yaml +``` + +**Default Behavior (no config):** + +By default, sanitize performs aggressive cleanup: + +- Removes ALL x-* vendor extensions throughout the document +- Removes unused components (schemas, responses, parameters, etc.) +- Removes unknown properties not defined in the OpenAPI specification + +**Configuration File Support:** + +Create a YAML configuration file to control sanitization behavior: + +```yaml +# sanitize-config.yaml + +# Remove only specific extension patterns (if not set, removes ALL extensions) +extensionPatterns: + - "x-go-*" + - "x-internal-*" + +# Keep unused components (default: false, removes them) +keepUnusedComponents: true + +# Keep unknown properties (default: false, removes them) +keepUnknownProperties: true +``` + +**What gets sanitized:** + +- **Extensions**: All x-* vendor extensions (info, paths, operations, schemas, etc.) +- **Unused Components**: Schemas, responses, parameters, examples, request bodies, headers, security schemes, links, callbacks, and path items that aren't referenced +- **Unknown Properties**: Properties not defined in the OpenAPI specification + +**Before sanitization:** + +```yaml +openapi: 3.1.0 +info: + title: My API + version: 1.0.0 + x-api-id: internal-123 + x-go-package: myapi +paths: + /users: + get: + operationId: listUsers + x-go-name: ListUsers + x-rate-limit: 100 + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/User' +components: + schemas: + User: + type: object + x-go-type: User + properties: + id: + type: string + UnusedSchema: + type: object + description: Not referenced anywhere +``` + +**After sanitization (default):** + +```yaml +openapi: 3.1.0 +info: + title: My API + version: 1.0.0 +paths: + /users: + get: + operationId: listUsers + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/User' +components: + schemas: + User: + type: object + properties: + id: + type: string +``` + +**After sanitization (with pattern config):** + +Using config with `extensionPatterns: ["x-go-*"]`: + +```yaml +openapi: 3.1.0 +info: + title: My API + version: 1.0.0 + x-api-id: internal-123 # kept (doesn't match x-go-*) +paths: + /users: + get: + operationId: listUsers + x-rate-limit: 100 # kept (doesn't match x-go-*) + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/User' +components: + schemas: + User: + type: object + properties: + id: + type: string +``` + +**Benefits of sanitization:** + +- **Standards compliance**: Remove vendor-specific extensions for clean, standard specs +- **Clean distribution**: Prepare specifications for public sharing or publishing +- **Reduced size**: Remove unnecessary extensions and unused components +- **Selective cleanup**: Use patterns to target specific extension families +- **Flexible control**: Config file allows fine-grained control over what to keep + +**Use Sanitize when:** + +- You want to remove all vendor extensions before publishing +- You're preparing specifications for standards-compliant distribution +- You need to clean up internal annotations before sharing externally +- You want to remove specific extension families (e.g., x-go-*, x-internal-*) +- You're combining extension removal with component cleanup in one operation ### `bundle` diff --git a/openapi/cmd/root.go b/openapi/cmd/root.go index ee20044..ad6f365 100644 --- a/openapi/cmd/root.go +++ b/openapi/cmd/root.go @@ -8,6 +8,7 @@ func Apply(rootCmd *cobra.Command) { rootCmd.AddCommand(upgradeCmd) rootCmd.AddCommand(inlineCmd) rootCmd.AddCommand(cleanCmd) + rootCmd.AddCommand(sanitizeCmd) rootCmd.AddCommand(bundleCmd) rootCmd.AddCommand(joinCmd) rootCmd.AddCommand(bootstrapCmd) diff --git a/openapi/cmd/sanitize.go b/openapi/cmd/sanitize.go new file mode 100644 index 0000000..491f4e5 --- /dev/null +++ b/openapi/cmd/sanitize.go @@ -0,0 +1,195 @@ +package cmd + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/speakeasy-api/openapi/openapi" + "github.com/spf13/cobra" +) + +var sanitizeCmd = &cobra.Command{ + Use: "sanitize [output-file]", + Short: "Remove unwanted elements from an OpenAPI specification", + Long: `Sanitize an OpenAPI specification by removing unwanted elements such as vendor extensions, +unused components, and unknown properties. + +This command provides comprehensive cleanup of OpenAPI documents to prepare them for +distribution, standardization, or sharing. By default, it performs aggressive cleanup +by removing all extensions and unused components. + +Default behavior (no config): +- Removes ALL x-* extensions throughout the document +- Removes unused components (schemas, responses, parameters, etc.) +- Removes unknown properties not in the OpenAPI specification + +With a configuration file, you can: +- Selectively remove extensions by pattern (e.g., x-go-*, x-internal-*) +- Keep unused components if needed +- Keep unknown properties if needed + +What gets sanitized by default: +- All x-* vendor extensions (info, paths, operations, schemas, etc.) +- Unused schemas in components/schemas +- Unused responses in components/responses +- Unused parameters in components/parameters +- Unused examples in components/examples +- Unused request bodies in components/requestBodies +- Unused headers in components/headers +- Unused security schemes in components/securitySchemes +- Unused links in components/links +- Unused callbacks in components/callbacks +- Unused path items in components/pathItems +- Unknown properties not defined in OpenAPI spec + +Benefits of sanitization: +- **Standards compliance**: Remove vendor-specific extensions for clean, standard specs +- **Clean distribution**: Prepare specifications for public sharing or publishing +- **Reduce document size**: Remove unnecessary extensions and unused components +- **Selective cleanup**: Use patterns to target specific extension families +- **Flexible control**: Config file allows fine-grained control over what to keep + +Configuration file format (YAML): + + # Remove only specific extension patterns (if not set, removes ALL extensions) + extensionPatterns: + - "x-go-*" + - "x-internal-*" + + # Keep unused components (default: false, removes them) + keepUnusedComponents: true + + # Keep unknown properties (default: false, removes them) + keepUnknownProperties: true + +Output options: +- No output file specified: writes to stdout (pipe-friendly) +- Output file specified: writes to the specified file +- --write flag: writes in-place to the input file + +Examples: + # Default sanitization (remove all extensions and unused components) + openapi spec sanitize ./api.yaml + + # Sanitize and write to new file + openapi spec sanitize ./api.yaml ./clean-api.yaml + + # Sanitize in-place + openapi spec sanitize -w ./api.yaml + + # Use config file for selective sanitization + openapi spec sanitize --config sanitize-config.yaml ./api.yaml + + # Combine config and output options + openapi spec sanitize --config sanitize-config.yaml -w ./api.yaml`, + Args: cobra.RangeArgs(1, 2), + Run: runSanitize, +} + +var ( + sanitizeWriteInPlace bool + sanitizeConfigFile string +) + +func init() { + sanitizeCmd.Flags().BoolVarP(&sanitizeWriteInPlace, "write", "w", false, "write result in-place to input file") + sanitizeCmd.Flags().StringVarP(&sanitizeConfigFile, "config", "c", "", "path to sanitize configuration file") +} + +func runSanitize(cmd *cobra.Command, args []string) { + ctx := cmd.Context() + inputFile := args[0] + + var outputFile string + if len(args) > 1 { + outputFile = args[1] + } + + processor, err := NewOpenAPIProcessor(inputFile, outputFile, sanitizeWriteInPlace) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + if err := sanitizeOpenAPI(ctx, processor); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func sanitizeOpenAPI(ctx context.Context, processor *OpenAPIProcessor) error { + // Load the OpenAPI document + doc, validationErrors, err := processor.LoadDocument(ctx) + if err != nil { + return err + } + if doc == nil { + return errors.New("failed to parse OpenAPI document: document is nil") + } + + // Report validation errors but continue with sanitization + processor.ReportValidationErrors(validationErrors) + + // Load sanitize options from config file if provided + var opts *openapi.SanitizeOptions + if sanitizeConfigFile != "" { + opts, err = openapi.LoadSanitizeConfigFromFile(sanitizeConfigFile) + if err != nil { + return fmt.Errorf("failed to load config file: %w", err) + } + processor.PrintInfo(fmt.Sprintf("Using configuration from %s", sanitizeConfigFile)) + } + + // Perform the sanitization + if err := openapi.Sanitize(ctx, doc, opts); err != nil { + return fmt.Errorf("failed to sanitize OpenAPI document: %w", err) + } + + // Report success + reportSanitizationResults(processor, opts) + + return processor.WriteDocument(ctx, doc) +} + +// reportSanitizationResults reports the sanitization operation +func reportSanitizationResults(processor *OpenAPIProcessor, opts *openapi.SanitizeOptions) { + var messages []string + + // Determine what was done with extensions + if opts == nil || len(opts.ExtensionPatterns) == 0 { + messages = append(messages, "removed all extensions") + } else { + messages = append(messages, fmt.Sprintf("removed extensions matching %v", opts.ExtensionPatterns)) + } + + // Determine what was done with components + if opts == nil || !opts.KeepUnusedComponents { + messages = append(messages, "removed unused components") + } else { + messages = append(messages, "kept all components") + } + + // Determine what was done with unknown properties + if opts != nil && opts.KeepUnknownProperties { + messages = append(messages, "kept unknown properties") + } + + // Build the success message + successMsg := "Successfully sanitized document (" + for i, msg := range messages { + if i > 0 { + successMsg += ", " + } + successMsg += msg + } + successMsg += ")" + + processor.PrintSuccess(successMsg) +} + +// GetSanitizeCommand returns the sanitize command for external use +func GetSanitizeCommand() *cobra.Command { + return sanitizeCmd +} diff --git a/openapi/sanitize.go b/openapi/sanitize.go new file mode 100644 index 0000000..529f68f --- /dev/null +++ b/openapi/sanitize.go @@ -0,0 +1,419 @@ +package openapi + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/speakeasy-api/openapi/extensions" + "github.com/speakeasy-api/openapi/jsonschema/oas3" + "github.com/speakeasy-api/openapi/marshaller" + "gopkg.in/yaml.v3" +) + +// SanitizeOptions configures the sanitization behavior. +// Can be loaded from a YAML config file or constructed programmatically. +// Zero values provide aggressive cleanup (remove everything non-standard). +type SanitizeOptions struct { + // ExtensionPatterns specifies glob patterns for selective extension removal. + // nil: Remove ALL extensions (default) + // []: Keep ALL extensions (empty array) + // ["x-go-*", ...]: Remove only extensions matching these patterns + ExtensionPatterns []string `yaml:"extensionPatterns"` + + // KeepUnusedComponents preserves unused components in the document. + // Default (false): removes unused components. + // Set to true to preserve all components regardless of usage. + KeepUnusedComponents bool `yaml:"keepUnusedComponents,omitempty"` + + // KeepUnknownProperties preserves properties not defined in the OpenAPI specification. + // Default (false): removes unknown/unrecognized properties. + // Set to true to preserve all properties even if not in the OpenAPI spec. + // Note: Extensions (x-*) are handled separately by ExtensionPatterns. + KeepUnknownProperties bool `yaml:"keepUnknownProperties,omitempty"` +} + +// Sanitize cleans an OpenAPI document by removing unwanted elements. +// By default (nil options or zero values), it provides aggressive cleanup: +// - Removes all extensions (x-*) +// - Removes unused components +// - Removes unknown properties +// +// This function modifies the document in place. +// +// Why use Sanitize? +// +// - **Standards compliance**: Remove vendor-specific extensions for standards-compliant specs +// - **Clean distribution**: Prepare specifications for public sharing or publishing +// - **Reduce document size**: Remove unnecessary extensions, components, and properties +// - **Selective cleanup**: Use patterns to target specific extension families +// - **Integration ready**: Combine multiple cleanup operations in one pass +// +// What gets sanitized by default: +// +// - All x-* extensions throughout the document +// - Unused components (schemas, responses, parameters, etc.) +// - Unknown properties not defined in the OpenAPI specification +// +// Extension removal behavior: +// - If opts is nil or opts.ExtensionPatterns is empty: removes ALL x-* extensions +// - If opts.ExtensionPatterns has values: removes only matching extensions +// +// Example usage: +// +// // Default sanitization: remove all extensions, unused components, and unknown properties +// err := Sanitize(ctx, doc, nil) +// if err != nil { +// return fmt.Errorf("failed to sanitize document: %w", err) +// } +// +// // Remove only x-go-* extensions, keep everything else +// opts := &SanitizeOptions{ +// ExtensionPatterns: []string{"x-go-*"}, +// KeepUnusedComponents: true, +// KeepUnknownProperties: true, +// } +// err := Sanitize(ctx, doc, opts) +// if err != nil { +// return fmt.Errorf("failed to sanitize document: %w", err) +// } +// +// // Remove extensions and unknown properties, but keep components +// opts := &SanitizeOptions{ +// KeepUnusedComponents: true, +// } +// err := Sanitize(ctx, doc, opts) +// +// Parameters: +// - ctx: Context for the operation +// - doc: The OpenAPI document to sanitize (modified in place) +// - opts: Sanitization options (nil uses defaults: aggressive cleanup) +// +// Returns: +// - error: Any error that occurred during sanitization +func Sanitize(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) error { + if doc == nil { + return nil + } + + // Use default options if nil + if opts == nil { + opts = &SanitizeOptions{} + } + + // Remove extensions based on configuration + if err := removeExtensions(ctx, doc, opts); err != nil { + return fmt.Errorf("failed to remove extensions: %w", err) + } + + // Remove unknown properties if not keeping them + if !opts.KeepUnknownProperties { + if err := removeUnknownProperties(ctx, doc); err != nil { + return fmt.Errorf("failed to remove unknown properties: %w", err) + } + } + + // Clean unused components if not keeping them + if !opts.KeepUnusedComponents { + if err := Clean(ctx, doc); err != nil { + return fmt.Errorf("failed to clean unused components: %w", err) + } + } + + return nil +} + +// LoadSanitizeConfig loads sanitize configuration from a YAML reader. +func LoadSanitizeConfig(r io.Reader) (*SanitizeOptions, error) { + data, err := io.ReadAll(r) + if err != nil { + return nil, fmt.Errorf("failed to read config: %w", err) + } + + var opts SanitizeOptions + if err := yaml.Unmarshal(data, &opts); err != nil { + return nil, fmt.Errorf("failed to parse config: %w", err) + } + + return &opts, nil +} + +// LoadSanitizeConfigFromFile loads sanitize configuration from a YAML file. +func LoadSanitizeConfigFromFile(path string) (*SanitizeOptions, error) { + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open config file: %w", err) + } + defer f.Close() + + return LoadSanitizeConfig(f) +} + +// removeExtensions walks through the document and removes extensions based on options. +func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) error { + // Determine removal strategy: + // - nil ExtensionPatterns: remove ALL extensions (default) + // - empty array []: keep ALL extensions (explicit no-op) + // - non-empty array: remove only matching patterns + + if opts != nil && opts.ExtensionPatterns != nil && len(opts.ExtensionPatterns) == 0 { + // Empty array explicitly set = keep all extensions + return nil + } + + var patterns []string + removeAll := true + + if opts != nil && opts.ExtensionPatterns != nil && len(opts.ExtensionPatterns) > 0 { + // Use patterns for selective removal + patterns = opts.ExtensionPatterns + removeAll = false + } + + // Walk through the document and process all Extensions + for item := range Walk(ctx, doc) { + err := item.Match(Matcher{ + Extensions: func(ext *extensions.Extensions) error { + if ext == nil || ext.Len() == 0 { + return nil + } + + // Collect keys to remove + keysToRemove := []string{} + for key := range ext.All() { + shouldRemove := false + + if removeAll { + // Remove all extensions + shouldRemove = true + } else { + // Check if extension matches any pattern + shouldRemove = matchesAnyPattern(key, patterns) + } + + if shouldRemove { + keysToRemove = append(keysToRemove, key) + } + } + + // Remove the identified keys + for _, key := range keysToRemove { + ext.Delete(key) + } + + return nil + }, + }) + if err != nil { + return fmt.Errorf("failed to process extensions: %w", err) + } + } + + return nil +} + +// removeUnknownProperties removes properties that are not defined in the OpenAPI specification. +// It uses the UnknownProperties list tracked during unmarshalling to identify and remove +// unknown keys from the YAML nodes. +func removeUnknownProperties(ctx context.Context, doc *OpenAPI) error { + // Walk through the document and clean unknown properties from all models + // We need specific matchers for wrapped types (Referenced*, JSONSchema) + for item := range Walk(ctx, doc) { + err := item.Match(Matcher{ + Any: func(model any) error { + return cleanUnknownPropertiesFromModel(model) + }, + Schema: func(schema *oas3.JSONSchema[oas3.Referenceable]) error { + return cleanUnknownPropertiesFromJSONSchema(schema) + }, + // Handle all Referenced types by extracting their Object + ReferencedResponse: func(ref *ReferencedResponse) error { + if ref != nil && !ref.IsReference() && ref.Object != nil { + return cleanUnknownPropertiesFromModel(ref.Object) + } + return nil + }, + ReferencedParameter: func(ref *ReferencedParameter) error { + if ref != nil && !ref.IsReference() && ref.Object != nil { + return cleanUnknownPropertiesFromModel(ref.Object) + } + return nil + }, + ReferencedRequestBody: func(ref *ReferencedRequestBody) error { + if ref != nil && !ref.IsReference() && ref.Object != nil { + return cleanUnknownPropertiesFromModel(ref.Object) + } + return nil + }, + ReferencedHeader: func(ref *ReferencedHeader) error { + if ref != nil && !ref.IsReference() && ref.Object != nil { + return cleanUnknownPropertiesFromModel(ref.Object) + } + return nil + }, + ReferencedExample: func(ref *ReferencedExample) error { + if ref != nil && !ref.IsReference() && ref.Object != nil { + return cleanUnknownPropertiesFromModel(ref.Object) + } + return nil + }, + ReferencedLink: func(ref *ReferencedLink) error { + if ref != nil && !ref.IsReference() && ref.Object != nil { + return cleanUnknownPropertiesFromModel(ref.Object) + } + return nil + }, + ReferencedCallback: func(ref *ReferencedCallback) error { + if ref != nil && !ref.IsReference() && ref.Object != nil { + return cleanUnknownPropertiesFromModel(ref.Object) + } + return nil + }, + ReferencedPathItem: func(ref *ReferencedPathItem) error { + if ref != nil && !ref.IsReference() && ref.Object != nil { + return cleanUnknownPropertiesFromModel(ref.Object) + } + return nil + }, + ReferencedSecurityScheme: func(ref *ReferencedSecurityScheme) error { + if ref != nil && !ref.IsReference() && ref.Object != nil { + return cleanUnknownPropertiesFromModel(ref.Object) + } + return nil + }, + }) + if err != nil { + return fmt.Errorf("failed to clean unknown properties: %w", err) + } + } + + return nil +} + +// cleanUnknownPropertiesFromJSONSchema handles JSONSchema wrappers +func cleanUnknownPropertiesFromJSONSchema(js *oas3.JSONSchema[oas3.Referenceable]) error { + if js == nil || !js.IsSchema() { + return nil // Skip boolean schemas + } + + schema := js.GetSchema() + if schema == nil { + return nil + } + + // Clean unknown properties from the schema + return cleanUnknownPropertiesFromModel(schema) +} + +// cleanUnknownPropertiesFromModel removes unknown properties from a model's YAML node +// using the UnknownProperties list tracked during unmarshalling. +func cleanUnknownPropertiesFromModel(model any) error { + // Try to get the core model + core := getCoreModelFromAny(model) + if core == nil { + return nil // No core model found + } + + // Check if core implements CoreModeler (has UnknownProperties) + coreModeler, ok := core.(marshaller.CoreModeler) + if !ok { + return nil // Core doesn't implement CoreModeler + } + + unknownProps := coreModeler.GetUnknownProperties() + if len(unknownProps) == 0 { + return nil // No unknown properties to remove + } + + rootNode := coreModeler.GetRootNode() + if rootNode == nil { + return nil // No root node + } + + // Remove unknown properties from the root node + removePropertiesFromNode(rootNode, unknownProps) + + return nil +} + +// getCoreModelFromAny attempts to extract a core model from various wrapper types +func getCoreModelFromAny(model any) any { + // Try direct core getter + type coreGetter interface { + GetCoreAny() any + } + + if coreModel, ok := model.(coreGetter); ok { + core := coreModel.GetCoreAny() + if core != nil { + return core + } + } + + // Try navigable node (for EitherValue wrappers) + type navigableNoder interface { + GetNavigableNode() (any, error) + } + + if navigable, ok := model.(navigableNoder); ok { + inner, err := navigable.GetNavigableNode() + if err == nil && inner != nil { + // Recursively try to get core from the inner value + return getCoreModelFromAny(inner) + } + } + + return nil +} + +// removePropertiesFromNode removes the specified property keys from a YAML mapping node. +func removePropertiesFromNode(node *yaml.Node, keysToRemove []string) { + if node == nil || node.Kind != yaml.MappingNode { + return + } + + // Build a set of keys to remove for efficient lookup + removeSet := make(map[string]bool, len(keysToRemove)) + for _, key := range keysToRemove { + removeSet[key] = true + } + + // Filter content to exclude keys in the remove set + newContent := make([]*yaml.Node, 0, len(node.Content)) + for i := 0; i < len(node.Content); i += 2 { + if i+1 >= len(node.Content) { + break + } + + keyNode := node.Content[i] + valueNode := node.Content[i+1] + + if keyNode.Kind == yaml.ScalarNode && removeSet[keyNode.Value] { + // Skip this key-value pair (it's unknown) + continue + } + + // Keep this key-value pair + newContent = append(newContent, keyNode, valueNode) + } + + // Update the node's content + node.Content = newContent +} + +// matchesAnyPattern checks if a string matches any of the provided glob patterns. +func matchesAnyPattern(str string, patterns []string) bool { + for _, pattern := range patterns { + matched, err := filepath.Match(pattern, str) + if err != nil { + // Invalid pattern, skip it + continue + } + if matched { + return true + } + } + return false +} diff --git a/openapi/sanitize_test.go b/openapi/sanitize_test.go new file mode 100644 index 0000000..9ec04c8 --- /dev/null +++ b/openapi/sanitize_test.go @@ -0,0 +1,322 @@ +package openapi_test + +import ( + "bytes" + "os" + "strings" + "testing" + + "github.com/speakeasy-api/openapi/openapi" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSanitize_RemoveAllExtensions_Success(t *testing.T) { + t.Parallel() + + ctx := t.Context() + + // Load the input document with various extensions + inputFile, err := os.Open("testdata/sanitize/sanitize_input.yaml") + require.NoError(t, err) + defer inputFile.Close() + + inputDoc, validationErrs, err := openapi.Unmarshal(ctx, inputFile) + require.NoError(t, err) + require.Empty(t, validationErrs, "Input document should be valid") + + // Sanitize with default options (remove all extensions and clean components) + err = openapi.Sanitize(ctx, inputDoc, nil) + require.NoError(t, err) + + // Marshal the sanitized document to YAML + var buf bytes.Buffer + err = openapi.Marshal(ctx, inputDoc, &buf) + require.NoError(t, err) + actualYAML := buf.Bytes() + + // Load the expected output + expectedBytes, err := os.ReadFile("testdata/sanitize/sanitize_expected.yaml") + require.NoError(t, err) + + // Compare the actual output with expected output + assert.Equal(t, string(expectedBytes), string(actualYAML), "Sanitized document should match expected output") +} + +func TestSanitize_PatternBased_Success(t *testing.T) { + t.Parallel() + + ctx := t.Context() + + // Load the input document with various extensions + inputFile, err := os.Open("testdata/sanitize/sanitize_pattern_input.yaml") + require.NoError(t, err) + defer inputFile.Close() + + inputDoc, validationErrs, err := openapi.Unmarshal(ctx, inputFile) + require.NoError(t, err) + require.Empty(t, validationErrs, "Input document should be valid") + + // Sanitize with pattern matching - only remove x-go-* extensions + opts := &openapi.SanitizeOptions{ + ExtensionPatterns: []string{"x-go-*"}, + KeepUnusedComponents: true, + KeepUnknownProperties: true, + } + err = openapi.Sanitize(ctx, inputDoc, opts) + require.NoError(t, err) + + // Marshal the sanitized document to YAML + var buf bytes.Buffer + err = openapi.Marshal(ctx, inputDoc, &buf) + require.NoError(t, err) + actualYAML := buf.Bytes() + + // Load the expected output + expectedBytes, err := os.ReadFile("testdata/sanitize/sanitize_pattern_expected.yaml") + require.NoError(t, err) + + // Compare the actual output with expected output + assert.Equal(t, string(expectedBytes), string(actualYAML), "Pattern-based sanitized document should match expected output") +} + +func TestSanitize_MultiplePatterns_Success(t *testing.T) { + t.Parallel() + + ctx := t.Context() + + // Load the input document with various extensions + inputFile, err := os.Open("testdata/sanitize/sanitize_multi_pattern_input.yaml") + require.NoError(t, err) + defer inputFile.Close() + + inputDoc, validationErrs, err := openapi.Unmarshal(ctx, inputFile) + require.NoError(t, err) + require.Empty(t, validationErrs, "Input document should be valid") + + // Sanitize with multiple patterns + opts := &openapi.SanitizeOptions{ + ExtensionPatterns: []string{"x-go-*", "x-internal-*"}, + KeepUnusedComponents: true, + } + err = openapi.Sanitize(ctx, inputDoc, opts) + require.NoError(t, err) + + // Marshal the sanitized document to YAML + var buf bytes.Buffer + err = openapi.Marshal(ctx, inputDoc, &buf) + require.NoError(t, err) + actualYAML := buf.Bytes() + + // Load the expected output + expectedBytes, err := os.ReadFile("testdata/sanitize/sanitize_multi_pattern_expected.yaml") + require.NoError(t, err) + + // Compare the actual output with expected output + assert.Equal(t, string(expectedBytes), string(actualYAML), "Multi-pattern sanitized document should match expected output") +} + +func TestSanitize_KeepComponents_Success(t *testing.T) { + t.Parallel() + + ctx := t.Context() + + // Load the input document + inputFile, err := os.Open("testdata/sanitize/sanitize_input.yaml") + require.NoError(t, err) + defer inputFile.Close() + + inputDoc, validationErrs, err := openapi.Unmarshal(ctx, inputFile) + require.NoError(t, err) + require.Empty(t, validationErrs, "Input document should be valid") + + // Sanitize but keep unused components + opts := &openapi.SanitizeOptions{ + KeepUnusedComponents: true, + } + err = openapi.Sanitize(ctx, inputDoc, opts) + require.NoError(t, err) + + // Marshal the sanitized document to YAML + var buf bytes.Buffer + err = openapi.Marshal(ctx, inputDoc, &buf) + require.NoError(t, err) + actualYAML := buf.Bytes() + + // Load the expected output (all extensions removed, components kept) + expectedBytes, err := os.ReadFile("testdata/sanitize/sanitize_keep_components_expected.yaml") + require.NoError(t, err) + + // Compare the actual output with expected output + assert.Equal(t, string(expectedBytes), string(actualYAML), "Sanitized document with kept components should match expected output") +} + +func TestSanitize_EmptyDocument_Success(t *testing.T) { + t.Parallel() + + ctx := t.Context() + + // Test with nil document + err := openapi.Sanitize(ctx, nil, nil) + require.NoError(t, err) + + // Test with minimal document (no components, no extensions) + doc := &openapi.OpenAPI{ + OpenAPI: "3.1.0", + Info: openapi.Info{ + Title: "Empty API", + Version: "1.0.0", + }, + } + + err = openapi.Sanitize(ctx, doc, nil) + require.NoError(t, err) +} + +func TestSanitize_NoExtensions_Success(t *testing.T) { + t.Parallel() + + ctx := t.Context() + + // Load document without extensions + inputFile, err := os.Open("testdata/sanitize/sanitize_no_extensions_input.yaml") + require.NoError(t, err) + defer inputFile.Close() + + inputDoc, validationErrs, err := openapi.Unmarshal(ctx, inputFile) + require.NoError(t, err) + require.Empty(t, validationErrs, "Input document should be valid") + + // Sanitize (should be a no-op for extensions) + err = openapi.Sanitize(ctx, inputDoc, nil) + require.NoError(t, err) + + // Document should still be valid + assert.NotNil(t, inputDoc) +} + +func TestLoadSanitizeConfig_Success(t *testing.T) { + t.Parallel() + + configYAML := `extensionPatterns: + - "x-go-*" + - "x-internal-*" +keepUnusedComponents: true +keepUnknownProperties: false +` + + // Load config from reader + opts, err := openapi.LoadSanitizeConfig(strings.NewReader(configYAML)) + require.NoError(t, err) + require.NotNil(t, opts) + + // Verify config was loaded correctly + assert.Equal(t, []string{"x-go-*", "x-internal-*"}, opts.ExtensionPatterns) + assert.True(t, opts.KeepUnusedComponents) + assert.False(t, opts.KeepUnknownProperties) +} + +func TestLoadSanitizeConfig_FileNotFound_Error(t *testing.T) { + t.Parallel() + + // Try to load non-existent config file + _, err := openapi.LoadSanitizeConfigFromFile("testdata/sanitize/nonexistent.yaml") + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to open config file") +} + +func TestLoadSanitizeConfig_InvalidYAML_Error(t *testing.T) { + t.Parallel() + + invalidYAML := `extensionPatterns: + - "x-go-*" + invalid yaml syntax here: [ +keepUnusedComponents: true +` + + // Try to load invalid YAML + _, err := openapi.LoadSanitizeConfig(strings.NewReader(invalidYAML)) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to parse config") +} + +func TestSanitize_ConfigFile_Success(t *testing.T) { + t.Parallel() + + ctx := t.Context() + + // Load the input document + inputFile, err := os.Open("testdata/sanitize/sanitize_pattern_input.yaml") + require.NoError(t, err) + defer inputFile.Close() + + inputDoc, validationErrs, err := openapi.Unmarshal(ctx, inputFile) + require.NoError(t, err) + require.Empty(t, validationErrs, "Input document should be valid") + + configYAML := `extensionPatterns: + - "x-go-*" +keepUnusedComponents: true +keepUnknownProperties: true +` + + // Load config from reader + opts, err := openapi.LoadSanitizeConfig(strings.NewReader(configYAML)) + require.NoError(t, err) + + // Sanitize using config + err = openapi.Sanitize(ctx, inputDoc, opts) + require.NoError(t, err) + + // Marshal the sanitized document to YAML + var buf bytes.Buffer + err = openapi.Marshal(ctx, inputDoc, &buf) + require.NoError(t, err) + actualYAML := buf.Bytes() + + // Load the expected output + expectedBytes, err := os.ReadFile("testdata/sanitize/sanitize_pattern_expected.yaml") + require.NoError(t, err) + + // Compare the actual output with expected output + assert.Equal(t, string(expectedBytes), string(actualYAML), "Config-based sanitized document should match expected output") +} + +func TestSanitize_KeepExtensionsRemoveUnknownProperties_Success(t *testing.T) { + t.Parallel() + + ctx := t.Context() + + // Load input with both extensions and unknown properties + inputFile, err := os.Open("testdata/sanitize/sanitize_keep_extensions_input.yaml") + require.NoError(t, err) + defer inputFile.Close() + + inputDoc, validationErrs, err := openapi.Unmarshal(ctx, inputFile) + require.NoError(t, err) + require.Empty(t, validationErrs, "Input document should be valid") + + // Configure to keep ALL extensions but remove unknown properties + // Empty array = keep all extensions, nil = remove all + opts := &openapi.SanitizeOptions{ + ExtensionPatterns: []string{}, // Empty array = keep ALL extensions + KeepUnknownProperties: false, // Remove unknown properties + KeepUnusedComponents: true, + } + + err = openapi.Sanitize(ctx, inputDoc, opts) + require.NoError(t, err) + + // Marshal the sanitized document + var buf bytes.Buffer + err = openapi.Marshal(ctx, inputDoc, &buf) + require.NoError(t, err) + actualYAML := buf.Bytes() + + // Load the expected output + expectedBytes, err := os.ReadFile("testdata/sanitize/sanitize_keep_extensions_expected.yaml") + require.NoError(t, err) + + // Compare the actual output with expected output + assert.Equal(t, string(expectedBytes), string(actualYAML), "Should keep extensions but remove unknown properties") +} diff --git a/openapi/testdata/sanitize/sanitize_expected.yaml b/openapi/testdata/sanitize/sanitize_expected.yaml new file mode 100644 index 0000000..1ccc9c1 --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_expected.yaml @@ -0,0 +1,53 @@ +openapi: 3.1.0 +info: + title: Test API with Extensions + version: 1.0.0 + contact: + name: API Support + email: support@example.com + license: + name: MIT +servers: + - url: https://api.example.com + description: Production server +paths: + /users: + get: + summary: List users + operationId: listUsers + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/User" + /products: + post: + summary: Create product + operationId: createProduct + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/Product" + responses: + "201": + description: Created +components: + schemas: + User: + type: object + properties: + id: + type: string + name: + type: string + Product: + type: object + properties: + id: + type: string + name: + type: string diff --git a/openapi/testdata/sanitize/sanitize_input.yaml b/openapi/testdata/sanitize/sanitize_input.yaml new file mode 100644 index 0000000..62d3475 --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_input.yaml @@ -0,0 +1,100 @@ +openapi: 3.1.0 +info: + title: Test API with Extensions + version: 1.0.0 + x-api-id: test-api-123 + x-go-package: testapi + contact: + name: API Support + email: support@example.com + x-slack-channel: api-support + license: + name: MIT + x-internal-id: lic-001 +x-custom-extension: root-level +x-go-name: TestAPI +x-internal-config: + setting1: value1 + setting2: value2 +servers: + - url: https://api.example.com + description: Production server + x-environment: production + x-go-server-name: ProductionServer +paths: + /users: + get: + summary: List users + operationId: listUsers + x-go-name: ListUsers + x-rate-limit: 100 + responses: + "200": + description: Success + x-response-type: UserList + content: + application/json: + schema: + $ref: "#/components/schemas/User" + x-content-encoding: gzip + /products: + x-internal-endpoint: true + post: + summary: Create product + operationId: createProduct + x-go-name: CreateProduct + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/Product" + responses: + "201": + description: Created +components: + schemas: + User: + type: object + x-go-type: User + x-db-table: users + properties: + id: + type: string + x-go-name: ID + name: + type: string + x-validation: required + x-examples: + - id: "123" + name: "John Doe" + Product: + type: object + x-go-type: Product + properties: + id: + type: string + name: + type: string + UnusedSchema: + type: object + description: This schema is not referenced anywhere + x-go-type: UnusedSchema + properties: + field: + type: string + responses: + UnusedResponse: + description: Not used anywhere + x-internal: true + content: + application/json: + schema: + type: object + parameters: + UnusedParam: + name: unused + in: query + schema: + type: string + x-go-name: UnusedParam diff --git a/openapi/testdata/sanitize/sanitize_keep_components_expected.yaml b/openapi/testdata/sanitize/sanitize_keep_components_expected.yaml new file mode 100644 index 0000000..cc8eed2 --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_keep_components_expected.yaml @@ -0,0 +1,72 @@ +openapi: 3.1.0 +info: + title: Test API with Extensions + version: 1.0.0 + contact: + name: API Support + email: support@example.com + license: + name: MIT +servers: + - url: https://api.example.com + description: Production server +paths: + /users: + get: + summary: List users + operationId: listUsers + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/User" + /products: + post: + summary: Create product + operationId: createProduct + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/Product" + responses: + "201": + description: Created +components: + schemas: + User: + type: object + properties: + id: + type: string + name: + type: string + Product: + type: object + properties: + id: + type: string + name: + type: string + UnusedSchema: + type: object + description: This schema is not referenced anywhere + properties: + field: + type: string + responses: + UnusedResponse: + description: Not used anywhere + content: + application/json: + schema: + type: object + parameters: + UnusedParam: + name: unused + in: query + schema: + type: string diff --git a/openapi/testdata/sanitize/sanitize_keep_extensions_expected.yaml b/openapi/testdata/sanitize/sanitize_keep_extensions_expected.yaml new file mode 100644 index 0000000..22b6458 --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_keep_extensions_expected.yaml @@ -0,0 +1,24 @@ +openapi: 3.1.0 +info: + title: Test API with Extensions and Unknown Properties + version: 1.0.0 + x-api-id: test-123 + x-go-package: myapi +paths: + /test: + get: + summary: Test endpoint + operationId: test + x-custom: keep-this + responses: + "200": + description: Success + x-response-code: 200 +components: + schemas: + TestSchema: + type: object + x-go-type: TestSchema + properties: + field: + type: string diff --git a/openapi/testdata/sanitize/sanitize_keep_extensions_input.yaml b/openapi/testdata/sanitize/sanitize_keep_extensions_input.yaml new file mode 100644 index 0000000..20ac669 --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_keep_extensions_input.yaml @@ -0,0 +1,29 @@ +openapi: 3.1.0 +info: + title: Test API with Extensions and Unknown Properties + version: 1.0.0 + x-api-id: test-123 + unknownProperty: should-be-removed + x-go-package: myapi +paths: + /test: + get: + summary: Test endpoint + operationId: test + x-custom: keep-this + anotherUnknown: remove-this + unknownField: also-remove + responses: + "200": + description: Success + x-response-code: 200 + invalidProp: remove-me +components: + schemas: + TestSchema: + type: object + x-go-type: TestSchema + notInSpec: should-go + properties: + field: + type: string diff --git a/openapi/testdata/sanitize/sanitize_multi_pattern_expected.yaml b/openapi/testdata/sanitize/sanitize_multi_pattern_expected.yaml new file mode 100644 index 0000000..1050366 --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_multi_pattern_expected.yaml @@ -0,0 +1,25 @@ +openapi: 3.1.0 +info: + title: Multi-Pattern Test API + version: 1.0.0 + x-speakeasy-retries: 3 + x-other-extension: value +paths: + /test: + get: + summary: Test endpoint + operationId: test + x-speakeasy-pagination: false + x-rate-limit: 100 + responses: + "200": + description: Success +components: + schemas: + TestSchema: + type: object + x-speakeasy-entity: test + x-custom: value + properties: + field: + type: string diff --git a/openapi/testdata/sanitize/sanitize_multi_pattern_input.yaml b/openapi/testdata/sanitize/sanitize_multi_pattern_input.yaml new file mode 100644 index 0000000..fc27d1f --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_multi_pattern_input.yaml @@ -0,0 +1,31 @@ +openapi: 3.1.0 +info: + title: Multi-Pattern Test API + version: 1.0.0 + x-go-package: testapi + x-internal-version: 2.0 + x-speakeasy-retries: 3 + x-other-extension: value +paths: + /test: + get: + summary: Test endpoint + operationId: test + x-go-name: Test + x-internal-cache: true + x-speakeasy-pagination: false + x-rate-limit: 100 + responses: + "200": + description: Success +components: + schemas: + TestSchema: + type: object + x-go-type: TestSchema + x-internal-table: test + x-speakeasy-entity: test + x-custom: value + properties: + field: + type: string diff --git a/openapi/testdata/sanitize/sanitize_no_extensions_input.yaml b/openapi/testdata/sanitize/sanitize_no_extensions_input.yaml new file mode 100644 index 0000000..7eeb473 --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_no_extensions_input.yaml @@ -0,0 +1,28 @@ +openapi: 3.1.0 +info: + title: Clean API without Extensions + version: 1.0.0 + contact: + name: API Support + email: support@example.com +paths: + /users: + get: + summary: List users + operationId: listUsers + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/User" +components: + schemas: + User: + type: object + properties: + id: + type: string + name: + type: string diff --git a/openapi/testdata/sanitize/sanitize_pattern_config.yaml b/openapi/testdata/sanitize/sanitize_pattern_config.yaml new file mode 100644 index 0000000..c260a54 --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_pattern_config.yaml @@ -0,0 +1,2 @@ +extensionPatterns: + - "x-go-*" diff --git a/openapi/testdata/sanitize/sanitize_pattern_expected.yaml b/openapi/testdata/sanitize/sanitize_pattern_expected.yaml new file mode 100644 index 0000000..8045d8f --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_pattern_expected.yaml @@ -0,0 +1,32 @@ +openapi: 3.1.0 +info: + title: Test API with Multiple Extension Types + version: 1.0.0 + x-speakeasy-retries: 3 + x-internal-version: 2.0 +paths: + /users: + get: + summary: List users + operationId: listUsers + x-speakeasy-pagination: true + x-internal-cache-ttl: 300 + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/User" +components: + schemas: + User: + type: object + x-speakeasy-entity: user + x-internal-table: users + properties: + id: + type: string + x-speakeasy-example: "user-123" + name: + type: string diff --git a/openapi/testdata/sanitize/sanitize_pattern_input.yaml b/openapi/testdata/sanitize/sanitize_pattern_input.yaml new file mode 100644 index 0000000..176169c --- /dev/null +++ b/openapi/testdata/sanitize/sanitize_pattern_input.yaml @@ -0,0 +1,37 @@ +openapi: 3.1.0 +info: + title: Test API with Multiple Extension Types + version: 1.0.0 + x-go-package: testapi + x-speakeasy-retries: 3 + x-internal-version: 2.0 +paths: + /users: + get: + summary: List users + operationId: listUsers + x-go-name: ListUsers + x-speakeasy-pagination: true + x-internal-cache-ttl: 300 + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/User" +components: + schemas: + User: + type: object + x-go-type: User + x-speakeasy-entity: user + x-internal-table: users + properties: + id: + type: string + x-go-name: ID + x-speakeasy-example: "user-123" + name: + type: string + x-go-name: Name From b680f235582257ea4387a5ea181ed92fcfc9dfb4 Mon Sep 17 00:00:00 2001 From: Tristan Cartledge Date: Mon, 13 Oct 2025 07:07:59 +0000 Subject: [PATCH 2/8] fix --- marshaller/unmarshaller.go | 7 ++++--- openapi/cmd/sanitize.go | 2 +- openapi/sanitize.go | 13 ++++--------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/marshaller/unmarshaller.go b/marshaller/unmarshaller.go index 302691f..50d4859 100644 --- a/marshaller/unmarshaller.go +++ b/marshaller/unmarshaller.go @@ -367,7 +367,8 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str // Direct field index lookup (eliminates map[string]Field allocation) fieldIndex, ok := fieldMap.FieldIndexes[key] if !ok { - if strings.HasPrefix(key, "x-") && extensionsField != nil { + switch { + case strings.HasPrefix(key, "x-") && extensionsField != nil: // Lock access to extensionsField to prevent concurrent modification extensionsMutex.Lock() defer extensionsMutex.Unlock() @@ -375,7 +376,7 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str if err != nil { return err } - } else if embeddedMap != nil { + case embeddedMap != nil: // Skip alias definitions - these are nodes where: // 1. The value node has an anchor (e.g., &keyAlias) // 2. The key is not an alias reference (doesn't start with *) @@ -385,7 +386,7 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str return nil } jobMapContent[i/2] = append(jobMapContent[i/2], keyNode, valueNode) - } else { + default: // This is an unknown property (not a recognized field, not an extension, not in embedded map) unknownPropertiesMutex.Lock() unknownProperties = append(unknownProperties, key) diff --git a/openapi/cmd/sanitize.go b/openapi/cmd/sanitize.go index 491f4e5..65783ac 100644 --- a/openapi/cmd/sanitize.go +++ b/openapi/cmd/sanitize.go @@ -139,7 +139,7 @@ func sanitizeOpenAPI(ctx context.Context, processor *OpenAPIProcessor) error { if err != nil { return fmt.Errorf("failed to load config file: %w", err) } - processor.PrintInfo(fmt.Sprintf("Using configuration from %s", sanitizeConfigFile)) + processor.PrintInfo("Using configuration from " + sanitizeConfigFile) } // Perform the sanitization diff --git a/openapi/sanitize.go b/openapi/sanitize.go index 529f68f..96c065b 100644 --- a/openapi/sanitize.go +++ b/openapi/sanitize.go @@ -142,7 +142,7 @@ func LoadSanitizeConfig(r io.Reader) (*SanitizeOptions, error) { // LoadSanitizeConfigFromFile loads sanitize configuration from a YAML file. func LoadSanitizeConfigFromFile(path string) (*SanitizeOptions, error) { - f, err := os.Open(path) + f, err := os.Open(path) //nolint:gosec if err != nil { return nil, fmt.Errorf("failed to open config file: %w", err) } @@ -183,8 +183,7 @@ func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) // Collect keys to remove keysToRemove := []string{} for key := range ext.All() { - shouldRemove := false - + var shouldRemove bool if removeAll { // Remove all extensions shouldRemove = true @@ -222,12 +221,8 @@ func removeUnknownProperties(ctx context.Context, doc *OpenAPI) error { // We need specific matchers for wrapped types (Referenced*, JSONSchema) for item := range Walk(ctx, doc) { err := item.Match(Matcher{ - Any: func(model any) error { - return cleanUnknownPropertiesFromModel(model) - }, - Schema: func(schema *oas3.JSONSchema[oas3.Referenceable]) error { - return cleanUnknownPropertiesFromJSONSchema(schema) - }, + Any: cleanUnknownPropertiesFromModel, + Schema: cleanUnknownPropertiesFromJSONSchema, // Handle all Referenced types by extracting their Object ReferencedResponse: func(ref *ReferencedResponse) error { if ref != nil && !ref.IsReference() && ref.Object != nil { From e3cabdbfe3c55e9de004d43b4d8499c58694282e Mon Sep 17 00:00:00 2001 From: Tristan Cartledge Date: Mon, 13 Oct 2025 07:24:21 +0000 Subject: [PATCH 3/8] fix --- .github/workflows/ci.yaml | 4 ++++ mise-tasks/test-coverage | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8319539..14ff321 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -166,6 +166,10 @@ jobs: timeout 300 go test -coverprofile=main-coverage.out -covermode=atomic ./... > /dev/null 2>&1 || echo "Main branch tests failed or timed out" if [ -f main-coverage.out ]; then + # Filter out cmd and tests folders from main branch coverage (same as current branch) + grep -v -E '/cmd/|/tests/' main-coverage.out > main-coverage.filtered.out || true + mv main-coverage.filtered.out main-coverage.out + MAIN_COVERAGE=$(go tool cover -func=main-coverage.out | grep total | awk '{print $3}' || echo "0.0%") echo "main-coverage=$MAIN_COVERAGE" >> $GITHUB_OUTPUT echo "Main branch coverage: $MAIN_COVERAGE" diff --git a/mise-tasks/test-coverage b/mise-tasks/test-coverage index df6c9f9..f2a72f5 100755 --- a/mise-tasks/test-coverage +++ b/mise-tasks/test-coverage @@ -7,6 +7,14 @@ if ! gotestsum --format testname -- -race -coverprofile=coverage.out -covermode= exit 1 fi +# Filter out cmd and tests folders from coverage report +if [ -f coverage.out ]; then + echo "🔧 Filtering cmd and tests folders from coverage report..." + grep -v -E '/cmd/|/tests/' coverage.out > coverage.filtered.out || true + # Keep original for reference, use filtered for reporting + mv coverage.filtered.out coverage.out +fi + echo "" echo "## 📊 Test Coverage Report" echo "" From fcd97d54ab59b42f578f885796eda8a99b36d2b9 Mon Sep 17 00:00:00 2001 From: Tristan Cartledge Date: Tue, 14 Oct 2025 09:37:53 +1000 Subject: [PATCH 4/8] refactor(sanitize): address PR feedback and improve code quality - Simplify nested conditions in removeExtensions by consolidating checks - Add SanitizeResult type to collect and return warnings for invalid glob patterns - Pre-allocate unknownProperties slice capacity for better performance - Use map[string]struct{} instead of map[string]bool for memory efficiency - Add PrintWarning method to OpenAPIProcessor for warning display - Update all tests and CLI to handle new return signature Addresses Copilot review feedback in PR #59 --- marshaller/unmarshaller.go | 2 +- openapi/cmd/sanitize.go | 8 ++- openapi/cmd/shared.go | 7 +++ openapi/sanitize.go | 100 +++++++++++++++++++++++++++---------- openapi/sanitize_test.go | 27 ++++++---- 5 files changed, 106 insertions(+), 38 deletions(-) diff --git a/marshaller/unmarshaller.go b/marshaller/unmarshaller.go index 50d4859..74cf0f5 100644 --- a/marshaller/unmarshaller.go +++ b/marshaller/unmarshaller.go @@ -349,7 +349,7 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str // Track unknown properties (non-extension, non-field, non-embedded map properties) var unknownPropertiesMutex sync.Mutex - var unknownProperties []string + unknownProperties := make([]string, 0, numJobs) // Mutex to protect concurrent access to extensionsField var extensionsMutex sync.Mutex diff --git a/openapi/cmd/sanitize.go b/openapi/cmd/sanitize.go index 65783ac..43706bb 100644 --- a/openapi/cmd/sanitize.go +++ b/openapi/cmd/sanitize.go @@ -143,10 +143,16 @@ func sanitizeOpenAPI(ctx context.Context, processor *OpenAPIProcessor) error { } // Perform the sanitization - if err := openapi.Sanitize(ctx, doc, opts); err != nil { + result, err := openapi.Sanitize(ctx, doc, opts) + if err != nil { return fmt.Errorf("failed to sanitize OpenAPI document: %w", err) } + // Report any warnings + for _, warning := range result.Warnings { + processor.PrintWarning(warning) + } + // Report success reportSanitizationResults(processor, opts) diff --git a/openapi/cmd/shared.go b/openapi/cmd/shared.go index e059a20..01b5b2a 100644 --- a/openapi/cmd/shared.go +++ b/openapi/cmd/shared.go @@ -114,3 +114,10 @@ func (p *OpenAPIProcessor) PrintInfo(message string) { fmt.Printf("📋 %s\n", message) } } + +// PrintWarning prints a warning message if not writing to stdout +func (p *OpenAPIProcessor) PrintWarning(message string) { + if !p.WriteToStdout { + fmt.Printf("⚠️ Warning: %s\n", message) + } +} diff --git a/openapi/sanitize.go b/openapi/sanitize.go index 96c065b..ce69f6a 100644 --- a/openapi/sanitize.go +++ b/openapi/sanitize.go @@ -35,6 +35,13 @@ type SanitizeOptions struct { KeepUnknownProperties bool `yaml:"keepUnknownProperties,omitempty"` } +// SanitizeResult contains the results of a sanitization operation. +type SanitizeResult struct { + // Warnings contains non-fatal issues encountered during sanitization. + // These typically include invalid glob patterns that were skipped. + Warnings []string +} + // Sanitize cleans an OpenAPI document by removing unwanted elements. // By default (nil options or zero values), it provides aggressive cleanup: // - Removes all extensions (x-*) @@ -64,10 +71,13 @@ type SanitizeOptions struct { // Example usage: // // // Default sanitization: remove all extensions, unused components, and unknown properties -// err := Sanitize(ctx, doc, nil) +// result, err := Sanitize(ctx, doc, nil) // if err != nil { // return fmt.Errorf("failed to sanitize document: %w", err) // } +// for _, warning := range result.Warnings { +// fmt.Fprintf(os.Stderr, "Warning: %s\n", warning) +// } // // // Remove only x-go-* extensions, keep everything else // opts := &SanitizeOptions{ @@ -75,7 +85,7 @@ type SanitizeOptions struct { // KeepUnusedComponents: true, // KeepUnknownProperties: true, // } -// err := Sanitize(ctx, doc, opts) +// result, err := Sanitize(ctx, doc, opts) // if err != nil { // return fmt.Errorf("failed to sanitize document: %w", err) // } @@ -84,7 +94,7 @@ type SanitizeOptions struct { // opts := &SanitizeOptions{ // KeepUnusedComponents: true, // } -// err := Sanitize(ctx, doc, opts) +// result, err := Sanitize(ctx, doc, opts) // // Parameters: // - ctx: Context for the operation @@ -92,10 +102,13 @@ type SanitizeOptions struct { // - opts: Sanitization options (nil uses defaults: aggressive cleanup) // // Returns: +// - *SanitizeResult: Result containing any warnings from the operation // - error: Any error that occurred during sanitization -func Sanitize(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) error { +func Sanitize(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) (*SanitizeResult, error) { + result := &SanitizeResult{} + if doc == nil { - return nil + return result, nil } // Use default options if nil @@ -104,25 +117,27 @@ func Sanitize(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) error { } // Remove extensions based on configuration - if err := removeExtensions(ctx, doc, opts); err != nil { - return fmt.Errorf("failed to remove extensions: %w", err) + warnings, err := removeExtensions(ctx, doc, opts) + if err != nil { + return result, fmt.Errorf("failed to remove extensions: %w", err) } + result.Warnings = append(result.Warnings, warnings...) // Remove unknown properties if not keeping them if !opts.KeepUnknownProperties { if err := removeUnknownProperties(ctx, doc); err != nil { - return fmt.Errorf("failed to remove unknown properties: %w", err) + return result, fmt.Errorf("failed to remove unknown properties: %w", err) } } // Clean unused components if not keeping them if !opts.KeepUnusedComponents { if err := Clean(ctx, doc); err != nil { - return fmt.Errorf("failed to clean unused components: %w", err) + return result, fmt.Errorf("failed to clean unused components: %w", err) } } - return nil + return result, nil } // LoadSanitizeConfig loads sanitize configuration from a YAML reader. @@ -152,21 +167,23 @@ func LoadSanitizeConfigFromFile(path string) (*SanitizeOptions, error) { } // removeExtensions walks through the document and removes extensions based on options. -func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) error { +// Returns a slice of warnings for any invalid glob patterns encountered. +func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) ([]string, error) { // Determine removal strategy: // - nil ExtensionPatterns: remove ALL extensions (default) // - empty array []: keep ALL extensions (explicit no-op) // - non-empty array: remove only matching patterns - if opts != nil && opts.ExtensionPatterns != nil && len(opts.ExtensionPatterns) == 0 { - // Empty array explicitly set = keep all extensions - return nil - } - var patterns []string removeAll := true + var invalidPatterns []string - if opts != nil && opts.ExtensionPatterns != nil && len(opts.ExtensionPatterns) > 0 { + // Handle extension patterns if explicitly set + if opts != nil && opts.ExtensionPatterns != nil { + if len(opts.ExtensionPatterns) == 0 { + // Empty array explicitly set = keep all extensions + return nil, nil + } // Use patterns for selective removal patterns = opts.ExtensionPatterns removeAll = false @@ -189,7 +206,14 @@ func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) shouldRemove = true } else { // Check if extension matches any pattern - shouldRemove = matchesAnyPattern(key, patterns) + matched, invalid := matchesAnyPattern(key, patterns) + shouldRemove = matched + // Collect invalid patterns (only once per pattern) + for _, pattern := range invalid { + if !contains(invalidPatterns, pattern) { + invalidPatterns = append(invalidPatterns, pattern) + } + } } if shouldRemove { @@ -206,11 +230,17 @@ func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) }, }) if err != nil { - return fmt.Errorf("failed to process extensions: %w", err) + return nil, fmt.Errorf("failed to process extensions: %w", err) } } - return nil + // Convert invalid patterns to warnings + var warnings []string + for _, pattern := range invalidPatterns { + warnings = append(warnings, fmt.Sprintf("invalid glob pattern '%s' was skipped", pattern)) + } + + return warnings, nil } // removeUnknownProperties removes properties that are not defined in the OpenAPI specification. @@ -370,9 +400,9 @@ func removePropertiesFromNode(node *yaml.Node, keysToRemove []string) { } // Build a set of keys to remove for efficient lookup - removeSet := make(map[string]bool, len(keysToRemove)) + removeSet := make(map[string]struct{}, len(keysToRemove)) for _, key := range keysToRemove { - removeSet[key] = true + removeSet[key] = struct{}{} } // Filter content to exclude keys in the remove set @@ -385,9 +415,11 @@ func removePropertiesFromNode(node *yaml.Node, keysToRemove []string) { keyNode := node.Content[i] valueNode := node.Content[i+1] - if keyNode.Kind == yaml.ScalarNode && removeSet[keyNode.Value] { - // Skip this key-value pair (it's unknown) - continue + if keyNode.Kind == yaml.ScalarNode { + if _, shouldRemove := removeSet[keyNode.Value]; shouldRemove { + // Skip this key-value pair (it's unknown) + continue + } } // Keep this key-value pair @@ -399,14 +431,28 @@ func removePropertiesFromNode(node *yaml.Node, keysToRemove []string) { } // matchesAnyPattern checks if a string matches any of the provided glob patterns. -func matchesAnyPattern(str string, patterns []string) bool { +// Returns (matched bool, invalidPatterns []string) where invalidPatterns contains +// any patterns that failed to compile. +func matchesAnyPattern(str string, patterns []string) (bool, []string) { + var invalidPatterns []string for _, pattern := range patterns { matched, err := filepath.Match(pattern, str) if err != nil { - // Invalid pattern, skip it + // Collect invalid pattern + invalidPatterns = append(invalidPatterns, pattern) continue } if matched { + return true, invalidPatterns + } + } + return false, invalidPatterns +} + +// contains checks if a string slice contains a specific string. +func contains(slice []string, str string) bool { + for _, s := range slice { + if s == str { return true } } diff --git a/openapi/sanitize_test.go b/openapi/sanitize_test.go index 9ec04c8..7e98983 100644 --- a/openapi/sanitize_test.go +++ b/openapi/sanitize_test.go @@ -26,8 +26,9 @@ func TestSanitize_RemoveAllExtensions_Success(t *testing.T) { require.Empty(t, validationErrs, "Input document should be valid") // Sanitize with default options (remove all extensions and clean components) - err = openapi.Sanitize(ctx, inputDoc, nil) + result, err := openapi.Sanitize(ctx, inputDoc, nil) require.NoError(t, err) + assert.Empty(t, result.Warnings, "Should not have warnings") // Marshal the sanitized document to YAML var buf bytes.Buffer @@ -63,8 +64,9 @@ func TestSanitize_PatternBased_Success(t *testing.T) { KeepUnusedComponents: true, KeepUnknownProperties: true, } - err = openapi.Sanitize(ctx, inputDoc, opts) + result, err := openapi.Sanitize(ctx, inputDoc, opts) require.NoError(t, err) + assert.Empty(t, result.Warnings, "Should not have warnings") // Marshal the sanitized document to YAML var buf bytes.Buffer @@ -99,8 +101,9 @@ func TestSanitize_MultiplePatterns_Success(t *testing.T) { ExtensionPatterns: []string{"x-go-*", "x-internal-*"}, KeepUnusedComponents: true, } - err = openapi.Sanitize(ctx, inputDoc, opts) + result, err := openapi.Sanitize(ctx, inputDoc, opts) require.NoError(t, err) + assert.Empty(t, result.Warnings, "Should not have warnings") // Marshal the sanitized document to YAML var buf bytes.Buffer @@ -134,8 +137,9 @@ func TestSanitize_KeepComponents_Success(t *testing.T) { opts := &openapi.SanitizeOptions{ KeepUnusedComponents: true, } - err = openapi.Sanitize(ctx, inputDoc, opts) + result, err := openapi.Sanitize(ctx, inputDoc, opts) require.NoError(t, err) + assert.Empty(t, result.Warnings, "Should not have warnings") // Marshal the sanitized document to YAML var buf bytes.Buffer @@ -157,8 +161,9 @@ func TestSanitize_EmptyDocument_Success(t *testing.T) { ctx := t.Context() // Test with nil document - err := openapi.Sanitize(ctx, nil, nil) + result, err := openapi.Sanitize(ctx, nil, nil) require.NoError(t, err) + assert.Empty(t, result.Warnings, "Should not have warnings") // Test with minimal document (no components, no extensions) doc := &openapi.OpenAPI{ @@ -169,8 +174,9 @@ func TestSanitize_EmptyDocument_Success(t *testing.T) { }, } - err = openapi.Sanitize(ctx, doc, nil) + result, err = openapi.Sanitize(ctx, doc, nil) require.NoError(t, err) + assert.Empty(t, result.Warnings, "Should not have warnings") } func TestSanitize_NoExtensions_Success(t *testing.T) { @@ -188,8 +194,9 @@ func TestSanitize_NoExtensions_Success(t *testing.T) { require.Empty(t, validationErrs, "Input document should be valid") // Sanitize (should be a no-op for extensions) - err = openapi.Sanitize(ctx, inputDoc, nil) + result, err := openapi.Sanitize(ctx, inputDoc, nil) require.NoError(t, err) + assert.Empty(t, result.Warnings, "Should not have warnings") // Document should still be valid assert.NotNil(t, inputDoc) @@ -265,8 +272,9 @@ keepUnknownProperties: true require.NoError(t, err) // Sanitize using config - err = openapi.Sanitize(ctx, inputDoc, opts) + result, err := openapi.Sanitize(ctx, inputDoc, opts) require.NoError(t, err) + assert.Empty(t, result.Warnings, "Should not have warnings") // Marshal the sanitized document to YAML var buf bytes.Buffer @@ -304,8 +312,9 @@ func TestSanitize_KeepExtensionsRemoveUnknownProperties_Success(t *testing.T) { KeepUnusedComponents: true, } - err = openapi.Sanitize(ctx, inputDoc, opts) + result, err := openapi.Sanitize(ctx, inputDoc, opts) require.NoError(t, err) + assert.Empty(t, result.Warnings, "Should not have warnings") // Marshal the sanitized document var buf bytes.Buffer From cc559ee989c1980037a5811a48361409ef80661d Mon Sep 17 00:00:00 2001 From: Tristan Cartledge Date: Tue, 14 Oct 2025 10:03:19 +1000 Subject: [PATCH 5/8] refactor(sanitize): improve pattern tracking and reporting logic - Track pattern usage throughout document walk to provide better warnings - Warn about invalid glob patterns (syntax errors) - Warn about patterns that never matched any extensions - Fix reportSanitizationResults to distinguish between nil (remove all) and empty slice (keep all) for ExtensionPatterns - Remove unused matchesAnyPattern and contains helper functions - Inline pattern matching logic in removeExtensions for better tracking This addresses Copilot PR feedback comments about: 1. Invalid patterns being lost after first match 2. Incorrect reporting logic for extension patterns --- openapi/cmd/sanitize.go | 7 +++- openapi/sanitize.go | 73 +++++++++++++++++++---------------------- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/openapi/cmd/sanitize.go b/openapi/cmd/sanitize.go index 43706bb..de13842 100644 --- a/openapi/cmd/sanitize.go +++ b/openapi/cmd/sanitize.go @@ -164,9 +164,14 @@ func reportSanitizationResults(processor *OpenAPIProcessor, opts *openapi.Saniti var messages []string // Determine what was done with extensions - if opts == nil || len(opts.ExtensionPatterns) == 0 { + if opts == nil || opts.ExtensionPatterns == nil { + // nil patterns = remove all extensions (default) messages = append(messages, "removed all extensions") + } else if len(opts.ExtensionPatterns) == 0 { + // empty slice = keep all extensions (explicit) + messages = append(messages, "kept all extensions") } else { + // specific patterns = remove matching extensions messages = append(messages, fmt.Sprintf("removed extensions matching %v", opts.ExtensionPatterns)) } diff --git a/openapi/sanitize.go b/openapi/sanitize.go index ce69f6a..0ff98a5 100644 --- a/openapi/sanitize.go +++ b/openapi/sanitize.go @@ -167,7 +167,7 @@ func LoadSanitizeConfigFromFile(path string) (*SanitizeOptions, error) { } // removeExtensions walks through the document and removes extensions based on options. -// Returns a slice of warnings for any invalid glob patterns encountered. +// Returns a slice of warnings for invalid patterns or patterns that matched nothing. func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) ([]string, error) { // Determine removal strategy: // - nil ExtensionPatterns: remove ALL extensions (default) @@ -176,7 +176,6 @@ func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) var patterns []string removeAll := true - var invalidPatterns []string // Handle extension patterns if explicitly set if opts != nil && opts.ExtensionPatterns != nil { @@ -189,6 +188,18 @@ func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) removeAll = false } + // Track pattern usage: map[pattern]MatchInfo + type matchInfo struct { + invalid bool // true if pattern has invalid syntax + matched bool // true if pattern matched at least one extension + } + patternUsage := make(map[string]*matchInfo) + + // Initialize tracking for all patterns + for _, pattern := range patterns { + patternUsage[pattern] = &matchInfo{} + } + // Walk through the document and process all Extensions for item := range Walk(ctx, doc) { err := item.Match(Matcher{ @@ -206,12 +217,18 @@ func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) shouldRemove = true } else { // Check if extension matches any pattern - matched, invalid := matchesAnyPattern(key, patterns) - shouldRemove = matched - // Collect invalid patterns (only once per pattern) - for _, pattern := range invalid { - if !contains(invalidPatterns, pattern) { - invalidPatterns = append(invalidPatterns, pattern) + for _, pattern := range patterns { + info := patternUsage[pattern] + matched, err := filepath.Match(pattern, key) + if err != nil { + // Mark pattern as invalid + info.invalid = true + continue + } + if matched { + // Mark pattern as having matched something + info.matched = true + shouldRemove = true } } } @@ -234,10 +251,15 @@ func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) } } - // Convert invalid patterns to warnings + // Generate warnings for invalid patterns and patterns that never matched var warnings []string - for _, pattern := range invalidPatterns { - warnings = append(warnings, fmt.Sprintf("invalid glob pattern '%s' was skipped", pattern)) + for _, pattern := range patterns { + info := patternUsage[pattern] + if info.invalid { + warnings = append(warnings, fmt.Sprintf("invalid glob pattern '%s' was skipped", pattern)) + } else if !info.matched { + warnings = append(warnings, fmt.Sprintf("pattern '%s' did not match any extensions in the document", pattern)) + } } return warnings, nil @@ -429,32 +451,3 @@ func removePropertiesFromNode(node *yaml.Node, keysToRemove []string) { // Update the node's content node.Content = newContent } - -// matchesAnyPattern checks if a string matches any of the provided glob patterns. -// Returns (matched bool, invalidPatterns []string) where invalidPatterns contains -// any patterns that failed to compile. -func matchesAnyPattern(str string, patterns []string) (bool, []string) { - var invalidPatterns []string - for _, pattern := range patterns { - matched, err := filepath.Match(pattern, str) - if err != nil { - // Collect invalid pattern - invalidPatterns = append(invalidPatterns, pattern) - continue - } - if matched { - return true, invalidPatterns - } - } - return false, invalidPatterns -} - -// contains checks if a string slice contains a specific string. -func contains(slice []string, str string) bool { - for _, s := range slice { - if s == str { - return true - } - } - return false -} From c49bf5daf09effd12a013258ebc879a49c9b7e91 Mon Sep 17 00:00:00 2001 From: Tristan Cartledge <108070248+TristanSpeakEasy@users.noreply.github.com> Date: Tue, 14 Oct 2025 10:15:57 +1000 Subject: [PATCH 6/8] Update openapi/cmd/README.md Co-authored-by: Blake Preston --- openapi/cmd/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi/cmd/README.md b/openapi/cmd/README.md index 90c3e8d..98cabf7 100644 --- a/openapi/cmd/README.md +++ b/openapi/cmd/README.md @@ -251,7 +251,7 @@ Create a YAML configuration file to control sanitization behavior: ```yaml # sanitize-config.yaml -# Remove only specific extension patterns (if not set, removes ALL extensions) +# Only remove extensions that match these patterns, null will remove ALL extensions, [] will remove no extensions (default: null, removes ALL extensions) extensionPatterns: - "x-go-*" - "x-internal-*" From 1a4f536ce012965949eb239e35ca481a4e69283d Mon Sep 17 00:00:00 2001 From: Tristan Cartledge Date: Tue, 14 Oct 2025 10:20:52 +1000 Subject: [PATCH 7/8] fix --- openapi/cmd/sanitize.go | 7 ++++--- openapi/sanitize.go | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/openapi/cmd/sanitize.go b/openapi/cmd/sanitize.go index de13842..7faf193 100644 --- a/openapi/cmd/sanitize.go +++ b/openapi/cmd/sanitize.go @@ -164,13 +164,14 @@ func reportSanitizationResults(processor *OpenAPIProcessor, opts *openapi.Saniti var messages []string // Determine what was done with extensions - if opts == nil || opts.ExtensionPatterns == nil { + switch { + case opts == nil || opts.ExtensionPatterns == nil: // nil patterns = remove all extensions (default) messages = append(messages, "removed all extensions") - } else if len(opts.ExtensionPatterns) == 0 { + case len(opts.ExtensionPatterns) == 0: // empty slice = keep all extensions (explicit) messages = append(messages, "kept all extensions") - } else { + default: // specific patterns = remove matching extensions messages = append(messages, fmt.Sprintf("removed extensions matching %v", opts.ExtensionPatterns)) } diff --git a/openapi/sanitize.go b/openapi/sanitize.go index 0ff98a5..6aecf9b 100644 --- a/openapi/sanitize.go +++ b/openapi/sanitize.go @@ -255,6 +255,9 @@ func removeExtensions(ctx context.Context, doc *OpenAPI, opts *SanitizeOptions) var warnings []string for _, pattern := range patterns { info := patternUsage[pattern] + if info == nil { + continue + } if info.invalid { warnings = append(warnings, fmt.Sprintf("invalid glob pattern '%s' was skipped", pattern)) } else if !info.matched { From e1ccfcaeac8bc92c364395acb224f38a17c00e52 Mon Sep 17 00:00:00 2001 From: Tristan Cartledge <108070248+TristanSpeakEasy@users.noreply.github.com> Date: Tue, 14 Oct 2025 10:29:10 +1000 Subject: [PATCH 8/8] Update openapi/cmd/sanitize.go Co-authored-by: Blake Preston --- openapi/cmd/sanitize.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi/cmd/sanitize.go b/openapi/cmd/sanitize.go index 7faf193..6777e87 100644 --- a/openapi/cmd/sanitize.go +++ b/openapi/cmd/sanitize.go @@ -53,7 +53,7 @@ Benefits of sanitization: Configuration file format (YAML): - # Remove only specific extension patterns (if not set, removes ALL extensions) + # Only remove extensions that match these patterns, null will remove ALL extensions, [] will remove no extensions (default: null, removes ALL extensions) extensionPatterns: - "x-go-*" - "x-internal-*"