From 1bebfa54eb50213d57d159dc5baea823000883a0 Mon Sep 17 00:00:00 2001 From: cbullinger Date: Tue, 14 Oct 2025 08:24:14 -0400 Subject: [PATCH 01/11] Fix: Clear FileStateService after upload and deprecation operations The FileStateService was not being cleared after files were uploaded and deprecation files were updated. This caused files to accumulate in the service across multiple webhook events, leading to duplicate uploads and stale data. Changes: - Added ClearFilesToUpload() call after AddFilesToTargetRepoBranch() - Added ClearFilesToDeprecate() call after UpdateDeprecationFile() This ensures the FileStateService is properly reset after each webhook processing cycle, preventing file accumulation. --- .../services/webhook_handler_new.go | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/examples-copier/services/webhook_handler_new.go b/examples-copier/services/webhook_handler_new.go index 3c1ceee..c68ae83 100644 --- a/examples-copier/services/webhook_handler_new.go +++ b/examples-copier/services/webhook_handler_new.go @@ -61,8 +61,6 @@ func RetrieveFileContentsWithConfigAndBranch(ctx context.Context, filePath strin return fileContent, nil } - - // HandleWebhookWithContainer handles incoming GitHub webhook requests using the service container func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config *configs.Config, container *ServiceContainer) { ctx := r.Context() @@ -190,11 +188,22 @@ func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommit // Process files with new pattern matching processFilesWithPatternMatching(ctx, prNumber, sourceCommitSHA, changedFiles, yamlConfig, config, container) - // Upload queued files - use existing function + // Upload queued files - copy from FileStateService to global map for legacy function + FilesToUpload = container.FileStateService.GetFilesToUpload() AddFilesToTargetRepoBranch(nil) - - // Update deprecation file - use existing function + container.FileStateService.ClearFilesToUpload() + + // Update deprecation file - copy from FileStateService to global map for legacy function + deprecationMap := container.FileStateService.GetFilesToDeprecate() + FilesToDeprecate = make(map[string]types.Configs) + for _, entry := range deprecationMap { + FilesToDeprecate[entry.FileName] = types.Configs{ + TargetRepo: entry.Repo, + TargetBranch: entry.Branch, + } + } UpdateDeprecationFile() + container.FileStateService.ClearFilesToDeprecate() // Calculate metrics after processing filesMatched := container.MetricsCollector.GetFilesMatched() - filesMatchedBefore @@ -464,4 +473,3 @@ func addToDeprecationMapForTarget(targetPath string, target types.TargetConfig, fileStateService.AddFileToDeprecate(deprecationFile, entry) } - From e195dc6fda195d305385112e0a215a5b3ea1003e Mon Sep 17 00:00:00 2001 From: cbullinger Date: Tue, 14 Oct 2025 12:56:59 -0400 Subject: [PATCH 02/11] Fix: Read GOOGLE_CLOUD_PROJECT_ID from environment variable Previously the code was using the constant name 'GOOGLE_CLOUD_PROJECT_ID' instead of reading the actual environment variable value, causing the error: 'projects/GOOGLE_CLOUD_PROJECT_ID is not a valid resource name' Changes: - Read projectId from os.Getenv(configs.GoogleCloudProjectId) - Read logName from os.Getenv(configs.CopierLogName) - Add validation to disable cloud logging if projectId is not set - Add fallback default for logName if not set --- examples-copier/services/logger.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/examples-copier/services/logger.go b/examples-copier/services/logger.go index 3f2a8aa..4d51c27 100644 --- a/examples-copier/services/logger.go +++ b/examples-copier/services/logger.go @@ -37,7 +37,12 @@ func InitializeGoogleLogger() { return } - projectId := configs.GoogleCloudProjectId + projectId := os.Getenv(configs.GoogleCloudProjectId) + if projectId == "" { + log.Printf("[WARN] GOOGLE_CLOUD_PROJECT_ID not set, disabling cloud logging\n") + gcpLoggingEnabled = false + return + } client, err := logging.NewClient(context.Background(), projectId) if err != nil { @@ -48,7 +53,10 @@ func InitializeGoogleLogger() { googleLoggingClient = client gcpLoggingEnabled = true - logName := configs.CopierLogName + logName := os.Getenv(configs.CopierLogName) + if logName == "" { + logName = "code-copier-log" // fallback default + } googleInfoLogger = client.Logger(logName).StandardLogger(logging.Info) googleWarningLogger = client.Logger(logName).StandardLogger(logging.Warning) googleErrorLogger = client.Logger(logName).StandardLogger(logging.Error) From 002bbdd61a8129161d2b4f1c3852cacbe77205e1 Mon Sep 17 00:00:00 2001 From: cbullinger Date: Tue, 14 Oct 2025 13:17:42 -0400 Subject: [PATCH 03/11] Fix: Parse target repo owner/name from full path The target repo is specified as 'owner/repo' (e.g., 'mongodb/atlas-architecture-go-sdk') but the code was using repoOwner() which returns the SOURCE repo owner ('10gen'), resulting in malformed paths like '10gen/mongodb/atlas-architecture-go-sdk'. Changes: - Added parseRepoPath() function to split 'owner/repo' into separate components - Updated all GitHub API calls to use parsed owner and repo name - Fixes 404 errors when accessing target repositories Affected functions: - createPullRequest - createBranch - createCommitTree - createCommit - mergePR - deleteBranchIfExists --- .../services/github_write_to_target.go | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/examples-copier/services/github_write_to_target.go b/examples-copier/services/github_write_to_target.go index 7d517b3..c616ba6 100644 --- a/examples-copier/services/github_write_to_target.go +++ b/examples-copier/services/github_write_to_target.go @@ -55,6 +55,17 @@ func findConfig(cfgs ConfigFileType, repoName string) Configs { // repoOwner returns the repository owner from environment variables. func repoOwner() string { return os.Getenv(configs.RepoOwner) } +// parseRepoPath parses a repository path in the format "owner/repo" and returns owner and repo separately. +// If the path doesn't contain a slash, it returns the source repo owner from env and the path as repo name. +func parseRepoPath(repoPath string) (owner, repo string) { + parts := strings.Split(repoPath, "/") + if len(parts) == 2 { + return parts[0], parts[1] + } + // Fallback to source repo owner if no slash found (backward compatibility) + return repoOwner(), repoPath +} + // AddFilesToTargetRepoBranch uploads files to the target repository branch // using the specified commit strategy (direct or via pull request). func AddFilesToTargetRepoBranch(cfgs ...ConfigFileType) { @@ -105,14 +116,14 @@ func AddFilesToTargetRepoBranch(cfgs ...ConfigFileType) { // createPullRequest opens a pull request from head to base in the specified repository. func createPullRequest(ctx context.Context, client *github.Client, repo, head, base, title, body string) (*github.PullRequest, error) { - owner := repoOwner() + owner, repoName := parseRepoPath(repo) pr := &github.NewPullRequest{ Title: github.String(title), Head: github.String(head), // for same-repo branches, just "branch"; for forks, use "owner:branch" Base: github.String(base), // e.g. "main" Body: github.String(body), } - created, _, err := client.PullRequests.Create(ctx, owner, repo, pr) + created, _, err := client.PullRequests.Create(ctx, owner, repoName, pr) if err != nil { return nil, fmt.Errorf("could not create PR: %w", err) } @@ -165,8 +176,9 @@ func addFilesViaPR(ctx context.Context, client *github.Client, key UploadKey, // We poll up to ~10s with 500ms interval var mergeable *bool var mergeableState string + owner, repoName := parseRepoPath(key.RepoName) for i := 0; i < 20; i++ { - current, _, gerr := client.PullRequests.Get(ctx, repoOwner(), key.RepoName, pr.GetNumber()) + current, _, gerr := client.PullRequests.Get(ctx, owner, repoName, pr.GetNumber()) if gerr == nil && current != nil { mergeable = current.Mergeable mergeableState = current.GetMergeableState() @@ -214,7 +226,7 @@ func addFilesToBranch(ctx context.Context, client *github.Client, key UploadKey, // createBranch creates a new branch from the specified base branch (defaults to 'main') and deletes it first if it already exists. func createBranch(ctx context.Context, client *github.Client, repo, newBranch string, baseBranch ...string) (*github.Reference, error) { - owner := repoOwner() + owner, repoName := parseRepoPath(repo) // Use provided base branch or default to "main" base := "main" @@ -222,14 +234,14 @@ func createBranch(ctx context.Context, client *github.Client, repo, newBranch st base = baseBranch[0] } - baseRef, _, err := client.Git.GetRef(ctx, owner, repo, "refs/heads/"+base) + baseRef, _, err := client.Git.GetRef(ctx, owner, repoName, "refs/heads/"+base) if err != nil { LogCritical(fmt.Sprintf("Failed to get '%s' baseRef: %s", base, err)) return nil, err } // *** Check if branch (newBranchRef) already exists and delete it *** - newBranchRef, _, err := client.Git.GetRef(ctx, owner, repo, fmt.Sprintf("%s%s", "refs/heads/", newBranch)) + newBranchRef, _, err := client.Git.GetRef(ctx, owner, repoName, fmt.Sprintf("%s%s", "refs/heads/", newBranch)) deleteBranchIfExists(ctx, client, repo, newBranchRef) newRef := &github.Reference{ @@ -239,7 +251,7 @@ func createBranch(ctx context.Context, client *github.Client, repo, newBranch st }, } - newBranchRef, _, err = client.Git.CreateRef(ctx, owner, repo, newRef) + newBranchRef, _, err = client.Git.CreateRef(ctx, owner, repoName, newRef) if err != nil { LogCritical(fmt.Sprintf("Failed to create newBranchRef %s: %s", newRef, err)) return nil, err @@ -254,10 +266,10 @@ func createBranch(ctx context.Context, client *github.Client, repo, newBranch st func createCommitTree(ctx context.Context, client *github.Client, targetBranch UploadKey, files map[string]string) (treeSHA string, baseSHA string, err error) { - owner := repoOwner() + owner, repoName := parseRepoPath(targetBranch.RepoName) // 1) Get current ref (ONE GET) - ref, _, err := client.Git.GetRef(ctx, owner, targetBranch.RepoName, targetBranch.BranchPath) + ref, _, err := client.Git.GetRef(ctx, owner, repoName, targetBranch.BranchPath) if err != nil || ref == nil { if err == nil { err = errors.Errorf("targetRef is nil") @@ -279,7 +291,7 @@ func createCommitTree(ctx context.Context, client *github.Client, targetBranch U } // 3) Create tree on top of baseSHA - tree, _, err := client.Git.CreateTree(ctx, owner, targetBranch.RepoName, baseSHA, treeEntries) + tree, _, err := client.Git.CreateTree(ctx, owner, repoName, baseSHA, treeEntries) if err != nil { return "", "", fmt.Errorf("failed to create tree: %w", err) } @@ -290,7 +302,7 @@ func createCommitTree(ctx context.Context, client *github.Client, targetBranch U func createCommit(ctx context.Context, client *github.Client, targetBranch UploadKey, baseSHA string, treeSHA string, message string) error { - owner := repoOwner() + owner, repoName := parseRepoPath(targetBranch.RepoName) parent := &github.Commit{SHA: github.String(baseSHA)} commit := &github.Commit{ @@ -299,7 +311,7 @@ func createCommit(ctx context.Context, client *github.Client, targetBranch Uploa Parents: []*github.Commit{parent}, } - newCommit, _, err := client.Git.CreateCommit(ctx, owner, targetBranch.RepoName, commit) + newCommit, _, err := client.Git.CreateCommit(ctx, owner, repoName, commit) if err != nil { return fmt.Errorf("could not create commit: %w", err) } @@ -309,7 +321,7 @@ func createCommit(ctx context.Context, client *github.Client, targetBranch Uploa Ref: github.String(targetBranch.BranchPath), // e.g., "refs/heads/main" Object: &github.GitObject{SHA: github.String(newCommit.GetSHA())}, } - if _, _, err := client.Git.UpdateRef(ctx, owner, targetBranch.RepoName, ref, false); err != nil { + if _, _, err := client.Git.UpdateRef(ctx, owner, repoName, ref, false); err != nil { // Detect non-fast-forward / conflict scenarios and provide a clearer error if eresp, ok := err.(*github.ErrorResponse); ok { if eresp.Response != nil && eresp.Response.StatusCode == http.StatusUnprocessableEntity { @@ -323,12 +335,12 @@ func createCommit(ctx context.Context, client *github.Client, targetBranch Uploa // mergePR merges the specified pull request in the given repository. func mergePR(ctx context.Context, client *github.Client, repo string, pr_number int) error { - owner := repoOwner() + owner, repoName := parseRepoPath(repo) options := &github.PullRequestOptions{ MergeMethod: "merge", // Other options: "squash" or "rebase" } - result, _, err := client.PullRequests.Merge(ctx, owner, repo, pr_number, "Merging the pull request", options) + result, _, err := client.PullRequests.Merge(ctx, owner, repoName, pr_number, "Merging the pull request", options) if err != nil { LogCritical(fmt.Sprintf("Failed to merge PR: %v\n", err)) return err @@ -345,17 +357,17 @@ func mergePR(ctx context.Context, client *github.Client, repo string, pr_number // deleteBranchIfExists deletes the specified branch if it exists, except for 'main'. func deleteBranchIfExists(backgroundContext context.Context, client *github.Client, repo string, ref *github.Reference) { - owner := repoOwner() + owner, repoName := parseRepoPath(repo) if ref.GetRef() == "refs/heads/main" { LogError("I refuse to delete branch 'main'.") log.Fatal() } LogInfo(fmt.Sprintf("Deleting branch %s on %s", ref.GetRef(), repo)) - _, _, err := client.Git.GetRef(backgroundContext, owner, repo, ref.GetRef()) + _, _, err := client.Git.GetRef(backgroundContext, owner, repoName, ref.GetRef()) if err == nil { // Branch exists (there was no error fetching it) - _, err = client.Git.DeleteRef(backgroundContext, owner, repo, ref.GetRef()) + _, err = client.Git.DeleteRef(backgroundContext, owner, repoName, ref.GetRef()) if err != nil { LogCritical(fmt.Sprintf("Error deleting branch: %v\n", err)) } From 2023fc529e0ede76906c721575ca7d2666f95267 Mon Sep 17 00:00:00 2001 From: cbullinger Date: Tue, 14 Oct 2025 13:41:27 -0400 Subject: [PATCH 04/11] Fix: Respect commit_strategy from pattern-matching config The new pattern-matching system stores commit strategy in UploadFileContent.CommitStrategy, but the code was only checking the legacy Configs.CopierCommitStrategy, causing all uploads to use 'direct' strategy regardless of the YAML config. Changes: - Check value.CommitStrategy first (from new pattern-matching system) - Fallback to legacy commitStrategy(cfg) if not set - Also respect value.CommitMessage, value.PRTitle, and value.AutoMergePR - Add logging to show which strategy is being used - Fixes issue where pull_request strategy was ignored --- examples-copier/copier-config.yml | 0 .../services/github_write_to_target.go | 42 +++++++++++++------ 2 files changed, 30 insertions(+), 12 deletions(-) create mode 100644 examples-copier/copier-config.yml diff --git a/examples-copier/copier-config.yml b/examples-copier/copier-config.yml new file mode 100644 index 0000000..e69de29 diff --git a/examples-copier/services/github_write_to_target.go b/examples-copier/services/github_write_to_target.go index c616ba6..b7f2439 100644 --- a/examples-copier/services/github_write_to_target.go +++ b/examples-copier/services/github_write_to_target.go @@ -79,34 +79,52 @@ func AddFilesToTargetRepoBranch(cfgs ...ConfigFileType) { for key, value := range FilesToUpload { cfg := findConfig(effectiveCfgs, key.RepoName) - // Determine messages from config with sensible defaults - commitMsg := cfg.CommitMessage + + // Determine commit strategy: prefer value.CommitStrategy (from new pattern-matching system), + // fallback to legacy config-based strategy + strategy := string(value.CommitStrategy) + if strategy == "" { + strategy = commitStrategy(cfg) + } + + // Determine messages: prefer value fields (from new system), fallback to legacy config + commitMsg := value.CommitMessage if strings.TrimSpace(commitMsg) == "" { - commitMsg = os.Getenv(configs.DefaultCommitMessage) + commitMsg = cfg.CommitMessage if strings.TrimSpace(commitMsg) == "" { - commitMsg = configs.NewConfig().DefaultCommitMessage + commitMsg = os.Getenv(configs.DefaultCommitMessage) + if strings.TrimSpace(commitMsg) == "" { + commitMsg = configs.NewConfig().DefaultCommitMessage + } } } - prTitle := cfg.PRTitle + + prTitle := value.PRTitle if strings.TrimSpace(prTitle) == "" { - prTitle = commitMsg + prTitle = cfg.PRTitle + if strings.TrimSpace(prTitle) == "" { + prTitle = commitMsg + } } - // Determine default for mergeWithoutReview. If no matching config (zero-value), - // honor DEFAULT_PR_MERGE env var; otherwise, fall back to system default. - mergeWithoutReview := cfg.MergeWithoutReview - if cfg.TargetRepo == "" { + // Determine auto-merge: prefer value.AutoMergePR (from new system), fallback to legacy config + mergeWithoutReview := value.AutoMergePR + if !value.AutoMergePR && cfg.TargetRepo == "" { // Preserve historical behavior for tests/local runs: default to auto-merge when no config present mergeWithoutReview = true + } else if cfg.TargetRepo != "" { + // If legacy config exists, use its setting + mergeWithoutReview = cfg.MergeWithoutReview } - switch commitStrategy(cfg) { + switch strategy { case "direct": // commits directly to the target branch LogInfo(fmt.Sprintf("Using direct commit strategy for %s on branch %s", key.RepoName, key.BranchPath)) if err := addFilesToBranch(ctx, client, key, value.Content, commitMsg); err != nil { LogCritical(fmt.Sprintf("Failed to add files to target branch: %v\n", err)) } - default: // "pr" strategy + default: // "pr" or "pull_request" strategy + LogInfo(fmt.Sprintf("Using PR commit strategy for %s on branch %s (auto_merge=%v)", key.RepoName, key.BranchPath, mergeWithoutReview)) if err := addFilesViaPR(ctx, client, key, value.Content, commitMsg, prTitle, mergeWithoutReview); err != nil { LogCritical(fmt.Sprintf("Failed via PR path: %v\n", err)) } From a64726cb5775bcd912de75e5a9c18e7743148eb9 Mon Sep 17 00:00:00 2001 From: cbullinger Date: Tue, 14 Oct 2025 13:46:28 -0400 Subject: [PATCH 05/11] Remove legacy webhook handler and config system Removed all legacy code that is no longer used: 1. Removed webhook_handler.go (legacy webhook handler) - ParseWebhookDataWithConfig and related functions - iterateFilesForCopyWithConfig - Legacy file processing logic 2. Simplified AddFilesToTargetRepoBranch - Removed ConfigFileType parameter (now takes no parameters) - Removed findConfig and commitStrategy helper functions - Now only uses UploadFileContent fields (from pattern-matching system) - Cleaner, simpler code that only supports the new YAML config 3. Removed legacy config loading - Removed convertLegacyToYAML function - Removed JSON config support - Config loader now only supports YAML format 4. Removed legacy functions from github_read.go - Removed RetrieveAndParseConfigFile (JSON config loader) - Removed retrieveJsonFile helper All code now uses the new pattern-matching system with YAML configuration. The app only uses HandleWebhookWithContainer (new system). --- examples-copier/copier-config.yml | 39 ++++ examples-copier/services/config_loader.go | 93 +------- examples-copier/services/github_read.go | 42 ---- .../services/github_write_to_target.go | 66 +----- examples-copier/services/webhook_handler.go | 201 ------------------ .../services/webhook_handler_new.go | 4 +- github-metrics/index.js.bak | 30 +++ 7 files changed, 82 insertions(+), 393 deletions(-) delete mode 100644 examples-copier/services/webhook_handler.go create mode 100644 github-metrics/index.js.bak diff --git a/examples-copier/copier-config.yml b/examples-copier/copier-config.yml index e69de29..980884d 100644 --- a/examples-copier/copier-config.yml +++ b/examples-copier/copier-config.yml @@ -0,0 +1,39 @@ +# YAML configuration for the DevDocs GitHub Code Example Copier +# See documentation at https://github.com/mongodb/code-example-tooling/tree/main/examples-copier/docs + +source_repo: "10gen/docs-mongodb-internal" +source_branch: "main" + +copy_rules: + # Copy Atlas SDK Go examples from project-copy directory + - name: "atlas-sdk-go-project-examples" + source_pattern: + type: "prefix" + pattern: "content/code-examples/tested/go/atlas-sdk/project-copy/" + targets: + # Target: Architecture Center Go SDK artifact repo + - repo: "mongodb/atlas-architecture-go-sdk" + branch: "main" + # Strip the source prefix and keep only the relative path from project-copy/ + path_transform: "${relative_path}" + commit_strategy: + # Create a PR and require review for changes + type: "pull_request" + pr_title: "Update Atlas SDK Go examples from ${source_repo} PR ${pr_number}" + pr_body: | + Automated update of Atlas SDK Go project examples + + **Source Details:** + - Repository: ${source_repo} + - Branch: ${source_branch} + - Source PR: #${pr_number} + - Commit: ${commit_sha} + + **Changes:** + - Files updated: ${file_count} + + commit_message: "Update Atlas SDK Go examples from ${source_repo} PR #${pr_number}" + auto_merge: false + deprecation_check: + enabled: true + file: "deprecated_examples.json" \ No newline at end of file diff --git a/examples-copier/services/config_loader.go b/examples-copier/services/config_loader.go index 43ad30b..14b2dd5 100644 --- a/examples-copier/services/config_loader.go +++ b/examples-copier/services/config_loader.go @@ -2,10 +2,8 @@ package services import ( "context" - "encoding/json" "fmt" "os" - "strings" "github.com/google/go-github/v48/github" "gopkg.in/yaml.v3" @@ -56,37 +54,9 @@ func (cl *DefaultConfigLoader) LoadConfigFromContent(content string, filename st return nil, fmt.Errorf("config file is empty") } - // Determine format based on file extension or content - isYAML := strings.HasSuffix(filename, ".yaml") || strings.HasSuffix(filename, ".yml") - isJSON := strings.HasSuffix(filename, ".json") - - // If extension doesn't tell us, try to detect from content - if !isYAML && !isJSON { - trimmed := strings.TrimSpace(content) - if strings.HasPrefix(trimmed, "{") || strings.HasPrefix(trimmed, "[") { - isJSON = true - } else { - isYAML = true - } - } - + // Parse as YAML (supports both YAML and JSON since YAML is a superset of JSON) var yamlConfig types.YAMLConfig - var err error - - if isYAML { - err = yaml.Unmarshal([]byte(content), &yamlConfig) - } else { - // Try to parse as legacy JSON format first - var legacyConfig types.ConfigFileType - if err := json.Unmarshal([]byte(content), &legacyConfig); err == nil { - // Convert legacy format to new format - return convertLegacyToYAML(legacyConfig), nil - } - - // Try new JSON format - err = json.Unmarshal([]byte(content), &yamlConfig) - } - + err := yaml.Unmarshal([]byte(content), &yamlConfig) if err != nil { return nil, fmt.Errorf("failed to parse config file: %w", err) } @@ -102,65 +72,6 @@ func (cl *DefaultConfigLoader) LoadConfigFromContent(content string, filename st return &yamlConfig, nil } -// convertLegacyToYAML converts legacy JSON config to new YAML config format -func convertLegacyToYAML(legacy types.ConfigFileType) *types.YAMLConfig { - yamlConfig := &types.YAMLConfig{ - SourceRepo: "", // Will be set from environment - SourceBranch: "main", - CopyRules: make([]types.CopyRule, 0, len(legacy)), - } - - for i, oldRule := range legacy { - // Create a prefix pattern from the old source_directory - pattern := types.SourcePattern{ - Type: types.PatternTypePrefix, - Pattern: oldRule.SourceDirectory, - } - - // Determine path transform based on recursive_copy - var pathTransform string - if oldRule.RecursiveCopy { - pathTransform = fmt.Sprintf("%s/${relative_path}", oldRule.TargetDirectory) - } else { - pathTransform = fmt.Sprintf("%s/${filename}", oldRule.TargetDirectory) - } - - // Create target config - commitStrategy := "direct" - if oldRule.CopierCommitStrategy != "" { - commitStrategy = oldRule.CopierCommitStrategy - } - - target := types.TargetConfig{ - Repo: oldRule.TargetRepo, - Branch: oldRule.TargetBranch, - PathTransform: pathTransform, - CommitStrategy: types.CommitStrategyConfig{ - Type: commitStrategy, - CommitMessage: oldRule.CommitMessage, - PRTitle: oldRule.PRTitle, - AutoMerge: oldRule.MergeWithoutReview, - }, - DeprecationCheck: &types.DeprecationConfig{ - Enabled: true, - File: "deprecated_examples.json", - }, - } - - // Create copy rule - rule := types.CopyRule{ - Name: fmt.Sprintf("legacy-rule-%d", i+1), - SourcePattern: pattern, - Targets: []types.TargetConfig{target}, - } - - yamlConfig.CopyRules = append(yamlConfig.CopyRules, rule) - } - - yamlConfig.SetDefaults() - return yamlConfig -} - // retrieveConfigFileContent fetches the config file content from the repository func retrieveConfigFileContent(ctx context.Context, filePath string, config *configs.Config) (string, error) { // Get GitHub client diff --git a/examples-copier/services/github_read.go b/examples-copier/services/github_read.go index 27fbdfa..c9eccd5 100644 --- a/examples-copier/services/github_read.go +++ b/examples-copier/services/github_read.go @@ -2,7 +2,6 @@ package services import ( "context" - "encoding/json" "fmt" "log" "os" @@ -13,22 +12,6 @@ import ( "github.com/shurcooL/githubv4" ) -// RetrieveAndParseConfigFile fetches the configuration file from the repository -// and unmarshals its JSON content into a ConfigFileType structure. -func RetrieveAndParseConfigFile() (ConfigFileType, error) { - content := retrieveJsonFile(configs.ConfigFile) - if content == "" { - return nil, &github.Error{Message: "Config File Not Found or is empty"} - } - var configFile ConfigFileType - err := json.Unmarshal([]byte(content), &configFile) - if err != nil { - LogError(fmt.Sprintf("Failed to unmarshal %s: %v", configs.ConfigFile, err)) - return nil, err - } - return configFile, nil -} - // GetFilesChangedInPr retrieves the list of files changed in a specified pull request. // It returns a slice of ChangedFile structures containing details about each changed file. func GetFilesChangedInPr(pr_number int) ([]ChangedFile, error) { @@ -65,31 +48,6 @@ func GetFilesChangedInPr(pr_number int) ([]ChangedFile, error) { return changedFiles, nil } -// retrieveJsonFile fetches the content of a JSON file from the specified path in the repository. -// It returns the file content as a string. -func retrieveJsonFile(filePath string) string { - client := GetRestClient() - owner := os.Getenv(configs.RepoOwner) - repo := os.Getenv(configs.RepoName) - ctx := context.Background() - fileContent, _, _, err := - client.Repositories.GetContents(ctx, owner, repo, - filePath, &github.RepositoryContentGetOptions{ - Ref: os.Getenv(configs.SrcBranch), - }) - if err != nil { - LogCritical(fmt.Sprintf("Error getting file content: %v", err)) - return "" - } - - content, err := fileContent.GetContent() - if err != nil { - LogCritical(fmt.Sprintf("Error decoding file content: %v", err)) - return "" - } - return content -} - // RetrieveFileContents fetches the contents of a file from the repository at the specified path. // It returns a github.RepositoryContent object containing the file details. func RetrieveFileContents(filePath string) (github.RepositoryContent, error) { diff --git a/examples-copier/services/github_write_to_target.go b/examples-copier/services/github_write_to_target.go index b7f2439..79c0601 100644 --- a/examples-copier/services/github_write_to_target.go +++ b/examples-copier/services/github_write_to_target.go @@ -22,35 +22,7 @@ import ( var FilesToUpload map[UploadKey]UploadFileContent var FilesToDeprecate map[string]Configs -// commitStrategy returns the commit strategy. -// Priority: -// 1) Configs.CopierCommitStrategy if provided ("direct" or "pr") -// 2) Environment variable COPIER_COMMIT_STRATEGY ("direct" or "pr") -// 3) Default to "direct" for minimal side effects in tests and local runs. -func commitStrategy(c Configs) string { - switch v := c.CopierCommitStrategy; v { - case "direct", "pr": - return v - } - // Fallback to env var if config not specified - ccs := os.Getenv("COPIER_COMMIT_STRATEGY") - switch ccs { - case "direct", "pr": - return ccs - default: - return "direct" - } -} -// findConfig returns the first entry matching repoName or zero-value -func findConfig(cfgs ConfigFileType, repoName string) Configs { - for _, c := range cfgs { - if c.TargetRepo == repoName { - return c - } - } - return Configs{} -} // repoOwner returns the repository owner from environment variables. func repoOwner() string { return os.Getenv(configs.RepoOwner) } @@ -68,54 +40,34 @@ func parseRepoPath(repoPath string) (owner, repo string) { // AddFilesToTargetRepoBranch uploads files to the target repository branch // using the specified commit strategy (direct or via pull request). -func AddFilesToTargetRepoBranch(cfgs ...ConfigFileType) { +func AddFilesToTargetRepoBranch() { ctx := context.Background() client := GetRestClient() - var effectiveCfgs ConfigFileType - if len(cfgs) > 0 { - effectiveCfgs = cfgs[0] - } - for key, value := range FilesToUpload { - cfg := findConfig(effectiveCfgs, key.RepoName) - - // Determine commit strategy: prefer value.CommitStrategy (from new pattern-matching system), - // fallback to legacy config-based strategy + // Determine commit strategy from value (set by pattern-matching system) strategy := string(value.CommitStrategy) if strategy == "" { - strategy = commitStrategy(cfg) + strategy = "direct" // default } - // Determine messages: prefer value fields (from new system), fallback to legacy config + // Get commit message from value or use default commitMsg := value.CommitMessage if strings.TrimSpace(commitMsg) == "" { - commitMsg = cfg.CommitMessage + commitMsg = os.Getenv(configs.DefaultCommitMessage) if strings.TrimSpace(commitMsg) == "" { - commitMsg = os.Getenv(configs.DefaultCommitMessage) - if strings.TrimSpace(commitMsg) == "" { - commitMsg = configs.NewConfig().DefaultCommitMessage - } + commitMsg = configs.NewConfig().DefaultCommitMessage } } + // Get PR title from value or use commit message prTitle := value.PRTitle if strings.TrimSpace(prTitle) == "" { - prTitle = cfg.PRTitle - if strings.TrimSpace(prTitle) == "" { - prTitle = commitMsg - } + prTitle = commitMsg } - // Determine auto-merge: prefer value.AutoMergePR (from new system), fallback to legacy config + // Get auto-merge setting from value mergeWithoutReview := value.AutoMergePR - if !value.AutoMergePR && cfg.TargetRepo == "" { - // Preserve historical behavior for tests/local runs: default to auto-merge when no config present - mergeWithoutReview = true - } else if cfg.TargetRepo != "" { - // If legacy config exists, use its setting - mergeWithoutReview = cfg.MergeWithoutReview - } switch strategy { case "direct": // commits directly to the target branch diff --git a/examples-copier/services/webhook_handler.go b/examples-copier/services/webhook_handler.go deleted file mode 100644 index fd9b25f..0000000 --- a/examples-copier/services/webhook_handler.go +++ /dev/null @@ -1,201 +0,0 @@ -package services - -import ( - "encoding/json" - "fmt" - "io" - "net/http" - "path/filepath" - "strings" - - "github.com/google/go-github/v48/github" - . "github.com/mongodb/code-example-tooling/code-copier/types" -) - -// ParseWebhookData processes incoming GitHub webhook requests. -// It extracts the pull request number, state, and merged status from the payload. -// If the pull request is closed and merged, it triggers the handling of the PR closed event -func ParseWebhookData(w http.ResponseWriter, r *http.Request) { - defer func(Body io.ReadCloser) { - err := Body.Close() - if err != nil { - LogInfo(fmt.Sprintf("Error closing ReadCloser %v", err)) - } - }(r.Body) - - input, err := io.ReadAll(r.Body) - if err != nil { - LogCritical(fmt.Sprintf("Fail when parsing webhook: %v", err)) - http.Error(w, "Failed to read request body", http.StatusBadRequest) - return - } - - var payload map[string]interface{} - if err := json.Unmarshal(input, &payload); err != nil { - LogError(fmt.Sprintf("Error unmarshalling outer JSON: %v", err)) - http.Error(w, "Invalid JSON format", http.StatusBadRequest) - return - } - - pullRequest, ok := payload["pull_request"].(map[string]interface{}) - if !ok { - LogWarning("Error asserting pull_request as map[string]interface{}") - http.Error(w, "Invalid webhook payload format", http.StatusBadRequest) - return - } - - number, exists := pullRequest["number"] - if !exists { - LogWarning("Key 'number' missing in the JSON input") - http.Error(w, "Missing required fields in payload", http.StatusBadRequest) - return - } - - numberFloat, ok := number.(float64) - if !ok { - LogWarning("Error asserting number as float64") - http.Error(w, "Invalid number format in payload", http.StatusBadRequest) - return - } - numberAsInt := int(numberFloat) - - state, ok := pullRequest["state"].(string) - if !ok { - LogWarning("Error asserting state as string") - http.Error(w, "Invalid state format in payload", http.StatusBadRequest) - return - } - - merged, ok := pullRequest["merged"].(bool) - if !ok { - LogWarning("Error asserting merged as bool") - http.Error(w, "Invalid merged format in payload", http.StatusBadRequest) - return - } - - if state == "closed" && merged { - LogInfo(fmt.Sprintf("PR %d was merged and closed.", numberAsInt)) - LogInfo("--Start--") - if err = HandleSourcePrClosedEvent(numberAsInt); err != nil { - LogError(fmt.Sprintf("Failed to handle PR closed event: %v", err)) - http.Error(w, "Failed to process webhook", http.StatusInternalServerError) - return - } - } - - w.WriteHeader(http.StatusOK) -} - -// HandleSourcePrClosedEvent processes a closed and merged pull request. -// It retrieves the configuration file, gets the list of changed files in the PR, -// and iterates through the files to determine which need to be copied or deprecated -// based on the configuration. Finally, it adds the files to the target repository branch -// and updates the deprecation file as necessary. -func HandleSourcePrClosedEvent(pr_number int) error { - if InstallationAccessToken == "" { - ConfigurePermissions() - } - - configFile, configError := RetrieveAndParseConfigFile() - if configError != nil { - LogError(fmt.Sprintf("Failed to retrieve and parse config file: %v", configError)) - return fmt.Errorf("config file error: %w", configError) - } - - changedFiles, changedFilesError := GetFilesChangedInPr(pr_number) - if changedFilesError != nil { - LogError(fmt.Sprintf("Failed to get files changed in PR %d: %v", pr_number, changedFilesError)) - return fmt.Errorf("failed to get changed files: %w", changedFilesError) - } - - err := IterateFilesForCopy(changedFiles, configFile) - if err != nil { - return err - } - AddFilesToTargetRepoBranch(configFile) - UpdateDeprecationFile() - LogInfo("--Done--") - return nil -} - -// IterateFilesForCopy processes the list of changed files and determines which files need to be copied -// to the target repositories based on the config file. Handles both recursive and non-recursive -// copying modes, and updates the global maps for files to upload and deprecate accordingly. -func IterateFilesForCopy(changedFiles []ChangedFile, configFile ConfigFileType) error { - var totalFileCount int32 - var uploadedCount int32 - - for _, file := range changedFiles { - totalFileCount++ - for _, config := range configFile { - matches := false - var relativePath string - - if config.RecursiveCopy { - // Recursive mode - check if path starts with source directory - if strings.HasPrefix(file.Path, config.SourceDirectory) { - matches = true - var err error - relativePath, err = filepath.Rel(config.SourceDirectory, file.Path) - if err != nil { - return fmt.Errorf("failed to determine relative path for %s: %w", file.Path, err) - } - } - } else { - // Non-recursive mode - exact directory match only - justPath := filepath.Dir(file.Path) - if config.SourceDirectory == justPath { - matches = true - relativePath = filepath.Base(file.Path) - } - } - - if matches { - target := filepath.Join(config.TargetDirectory, relativePath) - - if file.Status == "DELETED" { - LogInfo(fmt.Sprintf("File %s has been deleted. Adding to the deprecation file.", target)) - addToDeprecationMap(target, config) - } else { - LogInfo(fmt.Sprintf("Found file %s to copy to %s/%s on branch %s", - file.Path, config.TargetRepo, target, config.TargetBranch)) - fileContent, err := RetrieveFileContents(file.Path) - if err != nil { - return fmt.Errorf("failed to retrieve contents for %s: %w", file.Path, err) - } - AddToRepoAndFilesMap(config.TargetRepo, config.TargetBranch, fileContent) - } - uploadedCount++ - } - } - } - return nil -} - -// RetrieveAndParseConfigFile fetches the configuration file from the source repository -// and unmarshals its JSON content into a ConfigFileType structure. -func addToDeprecationMap(target string, config Configs) { - if FilesToDeprecate == nil { - FilesToDeprecate = make(map[string]Configs) - } - FilesToDeprecate[target] = config -} - -// AddToRepoAndFilesMap adds a file to the global FilesToUpload map under the specified repository and branch. -// If the repository and branch combination already exists in the map, it appends the file to the existing list. -// Otherwise, it creates a new entry in the map. -func AddToRepoAndFilesMap(repoName, targetBranch string, file github.RepositoryContent) { - if FilesToUpload == nil { - FilesToUpload = make(map[UploadKey]UploadFileContent) - } - key := UploadKey{RepoName: repoName, BranchPath: fmt.Sprintf("%s%s", "refs/heads/", targetBranch)} - if entry, exists := FilesToUpload[key]; exists { - entry.Content = append(entry.Content, file) - FilesToUpload[key] = entry - } else { - var fileContent = UploadFileContent{} - fileContent.TargetBranch = targetBranch - fileContent.Content = []github.RepositoryContent{file} - FilesToUpload[key] = fileContent - } -} diff --git a/examples-copier/services/webhook_handler_new.go b/examples-copier/services/webhook_handler_new.go index c68ae83..453e9c6 100644 --- a/examples-copier/services/webhook_handler_new.go +++ b/examples-copier/services/webhook_handler_new.go @@ -188,9 +188,9 @@ func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommit // Process files with new pattern matching processFilesWithPatternMatching(ctx, prNumber, sourceCommitSHA, changedFiles, yamlConfig, config, container) - // Upload queued files - copy from FileStateService to global map for legacy function + // Upload queued files FilesToUpload = container.FileStateService.GetFilesToUpload() - AddFilesToTargetRepoBranch(nil) + AddFilesToTargetRepoBranch() container.FileStateService.ClearFilesToUpload() // Update deprecation file - copy from FileStateService to global map for legacy function diff --git a/github-metrics/index.js.bak b/github-metrics/index.js.bak new file mode 100644 index 0000000..7e5bf63 --- /dev/null +++ b/github-metrics/index.js.bak @@ -0,0 +1,30 @@ +import { getGitHubMetrics } from "./get-github-metrics.js"; +import { addMetricsToAtlas } from "./write-to-db.js"; + +/* To change which repos to track metrics for, update the `repos` array before running the utility. +To track metrics for a new repo, set the owner and name first. +You can get the owner and name from the repo URL: `https://github.com//` +For example, to add `https://github.com/mongodb/docs-notebooks`, set `mongodb` as the +owner and `docs-notebooks` as the repo name. +NOTE: The GitHub token used to retrieve the info from a repo MUST have repo admin permissions to access all the endpoints in this code. */ + +class RepoDetails { + constructor(owner, repo) { + this.owner = owner; // the GitHub organization or member who owns the repo + this.repo = repo; // the name of the repo within the organization or member + } +} + +const docsNotebooksRepo = new RepoDetails("mongodb", "docs-notebooks"); +const atlasArchitectureGoSdkRepo = new RepoDetails("mongodb", "atlas-architecture-go-sdk"); + +const repos = [docsNotebooksRepo, atlasArchitectureGoSdkRepo]; + +const metricsDocs = []; + +for (const repo of repos) { + const metricsDoc = await getGitHubMetrics(repo.owner, repo.repo); + metricsDocs.push(metricsDoc); +} + +await addMetricsToAtlas(metricsDocs); From 543b76f4473ca0658df6724c7fd8b37228e063ee Mon Sep 17 00:00:00 2001 From: cbullinger Date: Tue, 14 Oct 2025 13:50:29 -0400 Subject: [PATCH 06/11] Fix build errors from legacy code removal - Re-added encoding/json import to config_loader.go (needed for ExportConfigAsJSON) - Fixed UpdateDeprecationFile to fetch file directly instead of using removed retrieveJsonFile - Removed web_server.go (legacy, not used anywhere) --- examples-copier/services/config_loader.go | 1 + .../services/github_write_to_source.go | 29 ++++++++++-- examples-copier/services/web_server.go | 45 ------------------- 3 files changed, 27 insertions(+), 48 deletions(-) delete mode 100644 examples-copier/services/web_server.go diff --git a/examples-copier/services/config_loader.go b/examples-copier/services/config_loader.go index 14b2dd5..d5e42b2 100644 --- a/examples-copier/services/config_loader.go +++ b/examples-copier/services/config_loader.go @@ -2,6 +2,7 @@ package services import ( "context" + "encoding/json" "fmt" "os" diff --git a/examples-copier/services/github_write_to_source.go b/examples-copier/services/github_write_to_source.go index 2f336b6..fec4690 100644 --- a/examples-copier/services/github_write_to_source.go +++ b/examples-copier/services/github_write_to_source.go @@ -19,12 +19,35 @@ func UpdateDeprecationFile() { return } - content := retrieveJsonFile(os.Getenv(configs.DeprecationFile)) + // Fetch the deprecation file from the repository + client := GetRestClient() + ctx := context.Background() + + fileContent, _, _, err := client.Repositories.GetContents( + ctx, + os.Getenv(configs.RepoOwner), + os.Getenv(configs.RepoName), + os.Getenv(configs.DeprecationFile), + &github.RepositoryContentGetOptions{ + Ref: os.Getenv(configs.SrcBranch), + }, + ) + if err != nil { + LogError(fmt.Sprintf("Error getting deprecation file: %v", err)) + return + } + + content, err := fileContent.GetContent() + if err != nil { + LogError(fmt.Sprintf("Error decoding deprecation file: %v", err)) + return + } var deprecationFile DeprecationFile - err := json.Unmarshal([]byte(content), &deprecationFile) + err = json.Unmarshal([]byte(content), &deprecationFile) if err != nil { - LogError(fmt.Sprintf("Failed to unmarshal %s: %v", configs.ConfigFile, err)) + LogError(fmt.Sprintf("Failed to unmarshal %s: %v", configs.DeprecationFile, err)) + return } for key, value := range FilesToDeprecate { diff --git a/examples-copier/services/web_server.go b/examples-copier/services/web_server.go deleted file mode 100644 index 8489411..0000000 --- a/examples-copier/services/web_server.go +++ /dev/null @@ -1,45 +0,0 @@ -package services - -import ( - "fmt" - "log" - "net/http" - "os" - - "github.com/mongodb/code-example-tooling/code-copier/configs" - "github.com/pkg/errors" -) - -// SetupWebServerAndListen sets up the web server and listens for incoming webhook requests. -func SetupWebServerAndListen() { - // Get environment file path from command line flag or environment variable - envFilePath := os.Getenv("ENV_FILE") - - _, err := configs.LoadEnvironment(envFilePath) - if err != nil { - log.Fatal(errors.Wrap(err, "Failed to load environment")) - } - - InitializeGoogleLogger() - defer CloseGoogleLogger() - path := os.Getenv(configs.WebserverPath) - if path == "" { - path = configs.NewConfig().WebserverPath - } - http.HandleFunc(path, ParseWebhookData) - port := os.Getenv(configs.Port) - if port == "" { - port = ":8080" // default port - } else { - port = ":" + port - } - - LogInfo(fmt.Sprintf("Starting web server on port %s; path %s", port, path)) - - e := http.ListenAndServe(port, nil) - if e != nil && !errors.Is(e, http.ErrServerClosed) { - log.Fatal(errors.Wrap(e, "Error starting server")) - } else { - LogInfo(fmt.Sprintf("Web server listening on " + path)) - } -} From dcb4a2cfad43d9e4c0e0a117ef2e5dee7eb00457 Mon Sep 17 00:00:00 2001 From: cbullinger Date: Tue, 14 Oct 2025 14:02:19 -0400 Subject: [PATCH 07/11] Add debug logging to createCommitTree --- examples-copier/services/github_write_to_target.go | 1 + 1 file changed, 1 insertion(+) diff --git a/examples-copier/services/github_write_to_target.go b/examples-copier/services/github_write_to_target.go index 79c0601..4bcccb2 100644 --- a/examples-copier/services/github_write_to_target.go +++ b/examples-copier/services/github_write_to_target.go @@ -237,6 +237,7 @@ func createCommitTree(ctx context.Context, client *github.Client, targetBranch U files map[string]string) (treeSHA string, baseSHA string, err error) { owner, repoName := parseRepoPath(targetBranch.RepoName) + LogInfo(fmt.Sprintf("DEBUG createCommitTree: targetBranch.RepoName=%q, parsed owner=%q, repoName=%q", targetBranch.RepoName, owner, repoName)) // 1) Get current ref (ONE GET) ref, _, err := client.Git.GetRef(ctx, owner, repoName, targetBranch.BranchPath) From a88d74cf2b42129c9f71015a63d53f2c93cdd9d1 Mon Sep 17 00:00:00 2001 From: cbullinger Date: Wed, 15 Oct 2025 15:19:40 -0400 Subject: [PATCH 08/11] Add support for cross-org copy, fix missing named variables, and add test coverage --- examples-copier/QUICK-REFERENCE.md | 28 +- examples-copier/README.md | 37 +- examples-copier/app.yaml | 36 ++ .../cmd/config-validator/README.md | 6 +- examples-copier/configs/README.md | 16 +- examples-copier/copier-config.yml | 39 -- examples-copier/docs/ARCHITECTURE.md | 4 +- examples-copier/docs/CONFIGURATION-GUIDE.md | 3 +- examples-copier/docs/FAQ.md | 2 +- examples-copier/docs/LOCAL-TESTING.md | 2 +- examples-copier/go.mod | 1 + examples-copier/go.sum | 2 + examples-copier/services/audit_logger_test.go | 304 +++++++++++++ .../services/file_state_service_test.go | 24 +- examples-copier/services/github_auth.go | 140 ++++++ examples-copier/services/github_auth_test.go | 224 ++++++++++ .../services/github_write_to_source_test.go | 135 ++++++ .../services/github_write_to_target.go | 11 +- .../services/github_write_to_target_test.go | 8 +- examples-copier/services/logger_test.go | 408 ++++++++++++++++++ examples-copier/services/pattern_matcher.go | 36 +- .../services/service_container_test.go | 360 ++++++++++++++++ .../services/slack_notifier_test.go | 332 ++++++++++++++ .../services/webhook_handler_new.go | 137 ++++-- .../services/webhook_handler_new_test.go | 327 ++++++++++++++ examples-copier/types/types.go | 6 +- 26 files changed, 2488 insertions(+), 140 deletions(-) delete mode 100644 examples-copier/copier-config.yml create mode 100644 examples-copier/services/audit_logger_test.go create mode 100644 examples-copier/services/github_auth_test.go create mode 100644 examples-copier/services/github_write_to_source_test.go create mode 100644 examples-copier/services/logger_test.go create mode 100644 examples-copier/services/service_container_test.go create mode 100644 examples-copier/services/slack_notifier_test.go create mode 100644 examples-copier/services/webhook_handler_new_test.go diff --git a/examples-copier/QUICK-REFERENCE.md b/examples-copier/QUICK-REFERENCE.md index 3d83912..aab2c5a 100644 --- a/examples-copier/QUICK-REFERENCE.md +++ b/examples-copier/QUICK-REFERENCE.md @@ -410,25 +410,31 @@ gcloud secrets list ``` examples-copier/ ├── README.md # Main documentation -├── MIGRATION-GUIDE.md # Migration from legacy ├── QUICK-REFERENCE.md # This file -├── REFACTORING-SUMMARY.md # Feature details ├── docs/ +│ ├── ARCHITECTURE.md # Architecture overview +│ ├── CONFIGURATION-GUIDE.md # Complete config reference │ ├── DEPLOYMENT.md # Deployment guide -│ └── DEPLOYMENT-CHECKLIST.md # Deployment checklist -├── TESTING-SUMMARY.md # Test documentation +│ ├── DEPLOYMENT-CHECKLIST.md # Deployment checklist +│ ├── FAQ.md # Frequently asked questions +│ ├── LOCAL-TESTING.md # Local testing guide +│ ├── PATTERN-MATCHING-GUIDE.md # Pattern matching guide +│ ├── PATTERN-MATCHING-CHEATSHEET.md # Quick pattern reference +│ ├── TROUBLESHOOTING.md # Troubleshooting guide +│ └── WEBHOOK-TESTING.md # Webhook testing guide ├── configs/ │ ├── .env # Environment config -│ ├── .env.example.new # Environment template -│ └── config.example.yaml # Config template +│ ├── env.yaml.example # Environment template +│ └── copier-config.example.yaml # Config template └── cmd/ - └── config-validator/ # CLI tool + ├── config-validator/ # CLI validation tool + └── test-webhook/ # Webhook testing tool ``` ## Quick Start Checklist - [ ] Clone repository -- [ ] Copy `.env.example.new` to `.env` +- [ ] Copy `configs/.env.local.example` to `configs/.env` - [ ] Set required environment variables - [ ] Create `copier-config.yaml` in source repo - [ ] Validate config: `./config-validator validate -config copier-config.yaml` @@ -440,6 +446,8 @@ examples-copier/ ## Support - **Documentation**: [README.md](README.md) -- **Migration**: [MIGRATION-GUIDE.md](./docs/MIGRATION-GUIDE.md) -- **Deployment**: [DEPLOYMENT-GUIDE.md](./docs/DEPLOYMENT-GUIDE.md) +- **Configuration**: [Configuration Guide](./docs/CONFIGURATION-GUIDE.md) +- **Deployment**: [Deployment Guide](./docs/DEPLOYMENT.md) +- **Troubleshooting**: [Troubleshooting Guide](./docs/TROUBLESHOOTING.md) +- **FAQ**: [Frequently Asked Questions](./docs/FAQ.md) diff --git a/examples-copier/README.md b/examples-copier/README.md index 1dbb97c..68eec75 100644 --- a/examples-copier/README.md +++ b/examples-copier/README.md @@ -48,10 +48,14 @@ go build -o config-validator ./cmd/config-validator ### Configuration -1. **Copy .env example file** +1. **Copy environment example file** ```bash -cp configs/.env.example.new configs/.env +# For local development +cp configs/.env.local.example configs/.env + +# Or for YAML-based configuration +cp configs/env.yaml.example env.yaml ``` 2. **Set required environment variables** @@ -387,11 +391,13 @@ COPIER_DEBUG=true ./examples-copier examples-copier/ ├── app.go # Main application entry point ├── cmd/ -│ └── config-validator/ # CLI validation tool +│ ├── config-validator/ # CLI validation tool +│ └── test-webhook/ # Webhook testing tool ├── configs/ │ ├── environment.go # Environment configuration -│ ├── .env.example.new # Environment template -│ └── config.example.yaml # Config template +│ ├── .env.local.example # Local environment template +│ ├── env.yaml.example # YAML environment template +│ └── copier-config.example.yaml # Config template ├── services/ │ ├── pattern_matcher.go # Pattern matching engine │ ├── config_loader.go # Config loading & validation @@ -399,10 +405,20 @@ examples-copier/ │ ├── health_metrics.go # Health & metrics endpoints │ ├── file_state_service.go # Thread-safe state management │ ├── service_container.go # Dependency injection -│ └── webhook_handler_new.go # New webhook handler -└── types/ - ├── config.go # Configuration types - └── types.go # Core types +│ ├── webhook_handler_new.go # Webhook handler +│ ├── github_auth.go # GitHub authentication +│ ├── github_read.go # GitHub read operations +│ ├── github_write_to_target.go # GitHub write operations +│ └── slack_notifier.go # Slack notifications +├── types/ +│ ├── config.go # Configuration types +│ └── types.go # Core types +└── docs/ + ├── ARCHITECTURE.md # Architecture overview + ├── CONFIGURATION-GUIDE.md # Complete config reference + ├── DEPLOYMENT.md # Deployment guide + ├── FAQ.md # Frequently asked questions + └── ... # Additional documentation ``` ### Service Container @@ -452,9 +468,10 @@ docker run -p 8080:8080 --env-file .env examples-copier - **[Pattern Matching Cheat Sheet](docs/PATTERN-MATCHING-CHEATSHEET.md)** - Quick pattern syntax reference - **[Architecture](docs/ARCHITECTURE.md)** - System design and components -- **[Migration Guide](docs/MIGRATION-GUIDE.md)** - Migrate from legacy JSON config - **[Troubleshooting](docs/TROUBLESHOOTING.md)** - Common issues and solutions - **[FAQ](docs/FAQ.md)** - Frequently asked questions +- **[Debug Logging](docs/DEBUG-LOGGING.md)** - Debug logging configuration +- **[Deprecation Tracking](docs/DEPRECATION-TRACKING-EXPLAINED.md)** - How deprecation tracking works ### Features diff --git a/examples-copier/app.yaml b/examples-copier/app.yaml index 4b2ee75..6c78988 100644 --- a/examples-copier/app.yaml +++ b/examples-copier/app.yaml @@ -6,3 +6,39 @@ env: flex includes: - env.yaml + +# Automatic scaling configuration +# Keeps at least 1 instance running to avoid cold starts +automatic_scaling: + min_num_instances: 1 + max_num_instances: 10 + cool_down_period_sec: 120 + cpu_utilization: + target_utilization: 0.6 + +# Network configuration +network: + session_affinity: true + +# Health check configuration +# These ensure the app is ready before receiving traffic +liveness_check: + path: "/health" + check_interval_sec: 30 + timeout_sec: 4 + failure_threshold: 2 + success_threshold: 2 + +readiness_check: + path: "/health" + check_interval_sec: 5 + timeout_sec: 4 + failure_threshold: 2 + success_threshold: 2 + app_start_timeout_sec: 300 + +# Resources configuration +resources: + cpu: 1 + memory_gb: 2 + disk_size_gb: 10 diff --git a/examples-copier/cmd/config-validator/README.md b/examples-copier/cmd/config-validator/README.md index fed9044..6330ef8 100644 --- a/examples-copier/cmd/config-validator/README.md +++ b/examples-copier/cmd/config-validator/README.md @@ -338,8 +338,8 @@ EOF ## See Also -- [Configuration Setup](../../docs/CONFIG-SETUP.md) - Configuration guide +- [Configuration Guide](../../docs/CONFIGURATION-GUIDE.md) - Complete configuration reference - [Pattern Matching Guide](../../docs/PATTERN-MATCHING-GUIDE.md) - Pattern matching help -- [Migration Guide](../../docs/MIGRATION-GUIDE.md) - Migrating from JSON -- [Quick Reference](../../docs/QUICK-REFERENCE.md) - All commands +- [FAQ](../../docs/FAQ.md) - Frequently asked questions (includes JSON to YAML conversion) +- [Quick Reference](../../QUICK-REFERENCE.md) - All commands diff --git a/examples-copier/configs/README.md b/examples-copier/configs/README.md index d6e6562..397bfb4 100644 --- a/examples-copier/configs/README.md +++ b/examples-copier/configs/README.md @@ -8,7 +8,7 @@ Overview of the different environment configuration files and when to use each. |-----------------------|---------------------------------------|---------------------------------| | `env.yaml.example` | Complete reference with all variables | First-time setup, documentation | | `env.yaml.production` | Production-ready template | Quick deployment to production | -| `.env.example` | Local development template | Local testing and development | +| `.env.local.example` | Local development template | Local testing and development | --- @@ -61,9 +61,9 @@ Overview of the different environment configuration files and when to use each. --- -## .env.example.new +## .env.local.example -**Location:** `configs/.env.example.new` +**Location:** `configs/.env.local.example` **Purpose:** Local development template (traditional .env format) @@ -136,11 +136,11 @@ nano env.yaml # Enable features you need ### Scenario 3: Local Development -**Recommended:** `.env.example.new` +**Recommended:** `.env.local.example` ```bash # Local development -cp configs/.env.example.new configs/.env +cp configs/.env.local.example configs/.env nano configs/.env # Add your values # Run locally @@ -215,7 +215,7 @@ diff configs/env.yaml.production configs/env.yaml.example - **Use `env.yaml.production` for quick production deployment** - **Use `env.yaml.example` as reference documentation** -- **Use `.env.example.new` for local development** +- **Use `.env.local.example` for local development** - **Add `env.yaml` and `.env` to `.gitignore`** - **Use Secret Manager for production secrets** - **Keep comments in your env.yaml for team documentation** @@ -237,7 +237,7 @@ examples-copier/ ├── configs/ │ ├── env.yaml.example # ← Complete reference (all variables) │ ├── env.yaml.production # ← Production template (essential only) -│ └── .env.example # ← Local development template +│ └── .env.local.example # ← Local development template ├── env.yaml # ← Your actual config (gitignored) └── .env # ← Your local config (gitignored) ``` @@ -253,7 +253,7 @@ examples-copier/ → Read `env.yaml.example` **Need to develop locally?** -→ Use `.env.example.new` +→ Use `.env.local.example` **Need advanced features?** → Start with `env.yaml.example`, customize diff --git a/examples-copier/copier-config.yml b/examples-copier/copier-config.yml deleted file mode 100644 index 980884d..0000000 --- a/examples-copier/copier-config.yml +++ /dev/null @@ -1,39 +0,0 @@ -# YAML configuration for the DevDocs GitHub Code Example Copier -# See documentation at https://github.com/mongodb/code-example-tooling/tree/main/examples-copier/docs - -source_repo: "10gen/docs-mongodb-internal" -source_branch: "main" - -copy_rules: - # Copy Atlas SDK Go examples from project-copy directory - - name: "atlas-sdk-go-project-examples" - source_pattern: - type: "prefix" - pattern: "content/code-examples/tested/go/atlas-sdk/project-copy/" - targets: - # Target: Architecture Center Go SDK artifact repo - - repo: "mongodb/atlas-architecture-go-sdk" - branch: "main" - # Strip the source prefix and keep only the relative path from project-copy/ - path_transform: "${relative_path}" - commit_strategy: - # Create a PR and require review for changes - type: "pull_request" - pr_title: "Update Atlas SDK Go examples from ${source_repo} PR ${pr_number}" - pr_body: | - Automated update of Atlas SDK Go project examples - - **Source Details:** - - Repository: ${source_repo} - - Branch: ${source_branch} - - Source PR: #${pr_number} - - Commit: ${commit_sha} - - **Changes:** - - Files updated: ${file_count} - - commit_message: "Update Atlas SDK Go examples from ${source_repo} PR #${pr_number}" - auto_merge: false - deprecation_check: - enabled: true - file: "deprecated_examples.json" \ No newline at end of file diff --git a/examples-copier/docs/ARCHITECTURE.md b/examples-copier/docs/ARCHITECTURE.md index 822ad5c..84337a1 100644 --- a/examples-copier/docs/ARCHITECTURE.md +++ b/examples-copier/docs/ARCHITECTURE.md @@ -42,7 +42,7 @@ path_transform: "source/code-examples/${lang}/${category}/${file}" **Files Created:** - `types/config.go` - New configuration types - `services/config_loader.go` - Configuration loader with YAML/JSON support -- `configs/config.example.yaml` - Example YAML configuration +- `configs/copier-config.example.yaml` - Example YAML configuration **Capabilities:** - Native YAML support with `gopkg.in/yaml.v3` @@ -190,7 +190,7 @@ Returns detailed metrics: ```bash # Validate configuration -config-validator validate -config copier-copier-config.yaml -v +config-validator validate -config copier-config.yaml -v # Test pattern matching config-validator test-pattern \ diff --git a/examples-copier/docs/CONFIGURATION-GUIDE.md b/examples-copier/docs/CONFIGURATION-GUIDE.md index 510aa0a..fe2a6c2 100644 --- a/examples-copier/docs/CONFIGURATION-GUIDE.md +++ b/examples-copier/docs/CONFIGURATION-GUIDE.md @@ -883,9 +883,10 @@ Error: copy_rules[0]: name is required - [Pattern Matching Guide](PATTERN-MATCHING-GUIDE.md) - Detailed pattern matching documentation - [Pattern Matching Cheat Sheet](PATTERN-MATCHING-CHEATSHEET.md) - Quick reference -- [Migration Guide](MIGRATION-GUIDE.md) - Migrating from legacy JSON config +- [FAQ](FAQ.md) - Frequently asked questions (includes JSON to YAML conversion) - [Quick Reference](../QUICK-REFERENCE.md) - Command reference - [Deployment Guide](DEPLOYMENT.md) - Deploying the application +- [Architecture](ARCHITECTURE.md) - System architecture overview --- diff --git a/examples-copier/docs/FAQ.md b/examples-copier/docs/FAQ.md index d04f378..4d82a89 100644 --- a/examples-copier/docs/FAQ.md +++ b/examples-copier/docs/FAQ.md @@ -71,7 +71,7 @@ Use the config-validator tool: ./config-validator convert -input config.json -output copier-config.yaml ``` -See [Migration Guide](MIGRATION-GUIDE.md) for details. +The tool will automatically convert your legacy JSON configuration to the new YAML format while preserving all settings. ## Pattern Matching diff --git a/examples-copier/docs/LOCAL-TESTING.md b/examples-copier/docs/LOCAL-TESTING.md index ac4b9d1..d842b82 100644 --- a/examples-copier/docs/LOCAL-TESTING.md +++ b/examples-copier/docs/LOCAL-TESTING.md @@ -34,7 +34,7 @@ make run-local-quick ```bash # Copy the local template -cp configs/.env.local configs/.env +cp configs/.env.local.example configs/.env # Edit with your values (optional) nano configs/.env diff --git a/examples-copier/go.mod b/examples-copier/go.mod index c5a3105..d27b379 100644 --- a/examples-copier/go.mod +++ b/examples-copier/go.mod @@ -25,6 +25,7 @@ require ( cloud.google.com/go/compute/metadata v0.6.0 // indirect cloud.google.com/go/iam v1.4.1 // indirect cloud.google.com/go/longrunning v0.6.4 // indirect + github.com/bmatcuk/doublestar/v4 v4.9.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.2 // indirect diff --git a/examples-copier/go.sum b/examples-copier/go.sum index 4465281..93db559 100644 --- a/examples-copier/go.sum +++ b/examples-copier/go.sum @@ -14,6 +14,8 @@ cloud.google.com/go/longrunning v0.6.4 h1:3tyw9rO3E2XVXzSApn1gyEEnH2K9SynNQjMlBi cloud.google.com/go/longrunning v0.6.4/go.mod h1:ttZpLCe6e7EXvn9OxpBRx7kZEB0efv8yBO6YnVMfhJs= cloud.google.com/go/secretmanager v1.14.6 h1:/ooktIMSORaWk9gm3vf8+Mg+zSrUplJFKBztP993oL0= cloud.google.com/go/secretmanager v1.14.6/go.mod h1:0OWeM3qpJ2n71MGgNfKsgjC/9LfVTcUqXFUlGxo5PzY= +github.com/bmatcuk/doublestar/v4 v4.9.1 h1:X8jg9rRZmJd4yRy7ZeNDRnM+T3ZfHv15JiBJ/avrEXE= +github.com/bmatcuk/doublestar/v4 v4.9.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= diff --git a/examples-copier/services/audit_logger_test.go b/examples-copier/services/audit_logger_test.go new file mode 100644 index 0000000..2aed1e8 --- /dev/null +++ b/examples-copier/services/audit_logger_test.go @@ -0,0 +1,304 @@ +package services + +import ( + "context" + "testing" + "time" +) + +func TestNewMongoAuditLogger_Disabled(t *testing.T) { + ctx := context.Background() + + // When enabled=false, should return NoOpAuditLogger + logger, err := NewMongoAuditLogger(ctx, "", "testdb", "testcoll", false) + if err != nil { + t.Fatalf("NewMongoAuditLogger() error = %v, want nil", err) + } + + if logger == nil { + t.Fatal("NewMongoAuditLogger() returned nil logger") + } + + // Should be NoOpAuditLogger + _, ok := logger.(*NoOpAuditLogger) + if !ok { + t.Errorf("Expected NoOpAuditLogger when disabled, got %T", logger) + } +} + +func TestNewMongoAuditLogger_EnabledWithoutURI(t *testing.T) { + ctx := context.Background() + + // When enabled=true but no URI, should return error + _, err := NewMongoAuditLogger(ctx, "", "testdb", "testcoll", true) + if err == nil { + t.Error("NewMongoAuditLogger() expected error when enabled without URI, got nil") + } + + expectedMsg := "MONGO_URI is required when audit logging is enabled" + if err.Error() != expectedMsg { + t.Errorf("Error message = %v, want %v", err.Error(), expectedMsg) + } +} + +func TestNoOpAuditLogger_LogCopyEvent(t *testing.T) { + logger := &NoOpAuditLogger{} + ctx := context.Background() + + event := &AuditEvent{ + EventType: AuditEventCopy, + RuleName: "test-rule", + SourceRepo: "test/source", + SourcePath: "test.go", + TargetRepo: "test/target", + TargetPath: "copied/test.go", + CommitSHA: "abc123", + PRNumber: 123, + Success: true, + DurationMs: 100, + FileSize: 1024, + } + + err := logger.LogCopyEvent(ctx, event) + if err != nil { + t.Errorf("LogCopyEvent() error = %v, want nil", err) + } +} + +func TestNoOpAuditLogger_LogDeprecationEvent(t *testing.T) { + logger := &NoOpAuditLogger{} + ctx := context.Background() + + event := &AuditEvent{ + EventType: AuditEventDeprecation, + SourceRepo: "test/source", + SourcePath: "deprecated.go", + PRNumber: 124, + Success: true, + } + + err := logger.LogDeprecationEvent(ctx, event) + if err != nil { + t.Errorf("LogDeprecationEvent() error = %v, want nil", err) + } +} + +func TestNoOpAuditLogger_LogErrorEvent(t *testing.T) { + logger := &NoOpAuditLogger{} + ctx := context.Background() + + event := &AuditEvent{ + EventType: AuditEventError, + SourceRepo: "test/source", + SourcePath: "error.go", + ErrorMessage: "test error", + Success: false, + } + + err := logger.LogErrorEvent(ctx, event) + if err != nil { + t.Errorf("LogErrorEvent() error = %v, want nil", err) + } +} + +func TestNoOpAuditLogger_GetRecentEvents(t *testing.T) { + logger := &NoOpAuditLogger{} + ctx := context.Background() + + events, err := logger.GetRecentEvents(ctx, 10) + if err != nil { + t.Errorf("GetRecentEvents() error = %v, want nil", err) + } + + if events == nil { + t.Error("GetRecentEvents() returned nil, want empty slice") + } + + if len(events) != 0 { + t.Errorf("GetRecentEvents() returned %d events, want 0", len(events)) + } +} + +func TestNoOpAuditLogger_GetFailedEvents(t *testing.T) { + logger := &NoOpAuditLogger{} + ctx := context.Background() + + events, err := logger.GetFailedEvents(ctx, 10) + if err != nil { + t.Errorf("GetFailedEvents() error = %v, want nil", err) + } + + if events == nil { + t.Error("GetFailedEvents() returned nil, want empty slice") + } + + if len(events) != 0 { + t.Errorf("GetFailedEvents() returned %d events, want 0", len(events)) + } +} + +func TestNoOpAuditLogger_GetEventsByRule(t *testing.T) { + logger := &NoOpAuditLogger{} + ctx := context.Background() + + events, err := logger.GetEventsByRule(ctx, "test-rule", 10) + if err != nil { + t.Errorf("GetEventsByRule() error = %v, want nil", err) + } + + if events == nil { + t.Error("GetEventsByRule() returned nil, want empty slice") + } + + if len(events) != 0 { + t.Errorf("GetEventsByRule() returned %d events, want 0", len(events)) + } +} + +func TestNoOpAuditLogger_GetStatsByRule(t *testing.T) { + logger := &NoOpAuditLogger{} + ctx := context.Background() + + stats, err := logger.GetStatsByRule(ctx) + if err != nil { + t.Errorf("GetStatsByRule() error = %v, want nil", err) + } + + if stats == nil { + t.Error("GetStatsByRule() returned nil, want empty map") + } + + if len(stats) != 0 { + t.Errorf("GetStatsByRule() returned %d stats, want 0", len(stats)) + } +} + +func TestNoOpAuditLogger_GetDailyVolume(t *testing.T) { + logger := &NoOpAuditLogger{} + ctx := context.Background() + + stats, err := logger.GetDailyVolume(ctx, 7) + if err != nil { + t.Errorf("GetDailyVolume() error = %v, want nil", err) + } + + if stats == nil { + t.Error("GetDailyVolume() returned nil, want empty slice") + } + + if len(stats) != 0 { + t.Errorf("GetDailyVolume() returned %d stats, want 0", len(stats)) + } +} + +func TestNoOpAuditLogger_Close(t *testing.T) { + logger := &NoOpAuditLogger{} + ctx := context.Background() + + err := logger.Close(ctx) + if err != nil { + t.Errorf("Close() error = %v, want nil", err) + } +} + +func TestAuditEventTypes(t *testing.T) { + tests := []struct { + name string + eventType AuditEventType + expected string + }{ + {"copy event", AuditEventCopy, "copy"}, + {"deprecation event", AuditEventDeprecation, "deprecation"}, + {"error event", AuditEventError, "error"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if string(tt.eventType) != tt.expected { + t.Errorf("Event type = %v, want %v", tt.eventType, tt.expected) + } + }) + } +} + +func TestAuditEvent_Structure(t *testing.T) { + // Test that AuditEvent can be created with all fields + now := time.Now() + event := &AuditEvent{ + ID: "test-id", + Timestamp: now, + EventType: AuditEventCopy, + RuleName: "test-rule", + SourceRepo: "test/source", + SourcePath: "source.go", + TargetRepo: "test/target", + TargetPath: "target.go", + CommitSHA: "abc123", + PRNumber: 123, + Success: true, + ErrorMessage: "", + DurationMs: 100, + FileSize: 1024, + AdditionalData: map[string]any{"key": "value"}, + } + + if event.EventType != AuditEventCopy { + t.Errorf("EventType = %v, want %v", event.EventType, AuditEventCopy) + } + + if event.Success != true { + t.Error("Success should be true") + } + + if event.PRNumber != 123 { + t.Errorf("PRNumber = %d, want 123", event.PRNumber) + } + + if event.AdditionalData["key"] != "value" { + t.Error("AdditionalData not set correctly") + } +} + +func TestRuleStats_Structure(t *testing.T) { + stats := RuleStats{ + RuleName: "test-rule", + TotalCopies: 100, + SuccessCount: 95, + FailureCount: 5, + AvgDuration: 150.5, + } + + if stats.RuleName != "test-rule" { + t.Errorf("RuleName = %v, want test-rule", stats.RuleName) + } + + if stats.TotalCopies != 100 { + t.Errorf("TotalCopies = %d, want 100", stats.TotalCopies) + } + + if stats.SuccessCount != 95 { + t.Errorf("SuccessCount = %d, want 95", stats.SuccessCount) + } + + if stats.FailureCount != 5 { + t.Errorf("FailureCount = %d, want 5", stats.FailureCount) + } +} + +func TestDailyStats_Structure(t *testing.T) { + stats := DailyStats{ + Date: "2024-01-15", + TotalCopies: 50, + SuccessCount: 48, + FailureCount: 2, + } + + if stats.Date != "2024-01-15" { + t.Errorf("Date = %v, want 2024-01-15", stats.Date) + } + + if stats.TotalCopies != 50 { + t.Errorf("TotalCopies = %d, want 50", stats.TotalCopies) + } +} + diff --git a/examples-copier/services/file_state_service_test.go b/examples-copier/services/file_state_service_test.go index 70d1b00..50cdb55 100644 --- a/examples-copier/services/file_state_service_test.go +++ b/examples-copier/services/file_state_service_test.go @@ -15,8 +15,10 @@ func TestFileStateService_AddAndGetFilesToUpload(t *testing.T) { service := services.NewFileStateService() key := types.UploadKey{ - RepoName: "org/repo", - BranchPath: "refs/heads/main", + RepoName: "org/repo", + BranchPath: "refs/heads/main", + RuleName: "test-rule", + CommitStrategy: "direct", } content := types.UploadFileContent{ @@ -70,8 +72,10 @@ func TestFileStateService_ClearFilesToUpload(t *testing.T) { service := services.NewFileStateService() key := types.UploadKey{ - RepoName: "org/repo", - BranchPath: "refs/heads/main", + RepoName: "org/repo", + BranchPath: "refs/heads/main", + RuleName: "test-rule", + CommitStrategy: "direct", } content := types.UploadFileContent{ @@ -216,8 +220,10 @@ func TestFileStateService_IsolatedCopies(t *testing.T) { service := services.NewFileStateService() key := types.UploadKey{ - RepoName: "org/repo", - BranchPath: "refs/heads/main", + RepoName: "org/repo", + BranchPath: "refs/heads/main", + RuleName: "test-rule", + CommitStrategy: "direct", } content := types.UploadFileContent{ @@ -267,8 +273,10 @@ func TestFileStateService_CommitStrategyTypes(t *testing.T) { for i, tt := range tests { t.Run(tt.name, func(t *testing.T) { key := types.UploadKey{ - RepoName: "org/repo", - BranchPath: "refs/heads/main", + RepoName: "org/repo", + BranchPath: "refs/heads/main", + RuleName: "test-rule", + CommitStrategy: string(tt.strategy), } content := types.UploadFileContent{ diff --git a/examples-copier/services/github_auth.go b/examples-copier/services/github_auth.go index 6b28189..e6bc326 100644 --- a/examples-copier/services/github_auth.go +++ b/examples-copier/services/github_auth.go @@ -30,6 +30,13 @@ type transport struct { var InstallationAccessToken string var HTTPClient = http.DefaultClient +// installationTokenCache caches installation access tokens by organization name +var installationTokenCache = make(map[string]string) + +// jwtToken caches the GitHub App JWT token +var jwtToken string +var jwtExpiry time.Time + // ConfigurePermissions sets up the necessary permissions to interact with the GitHub API. // It retrieves the GitHub App's private key from Google Secret Manager, generates a JWT, // and exchanges it for an installation access token. @@ -274,6 +281,139 @@ func GetGraphQLClient() *graphql.Client { return client } +// getOrRefreshJWT returns a valid JWT token, generating a new one if expired +func getOrRefreshJWT() (string, error) { + // Check if we have a valid cached JWT + if jwtToken != "" && time.Now().Before(jwtExpiry) { + return jwtToken, nil + } + + // Generate new JWT + pemKey := getPrivateKeyFromSecret() + privateKey, err := jwt.ParseRSAPrivateKeyFromPEM(pemKey) + if err != nil { + return "", fmt.Errorf("unable to parse RSA private key: %w", err) + } + + token, err := generateGitHubJWT(os.Getenv(configs.AppId), privateKey) + if err != nil { + return "", fmt.Errorf("error generating JWT: %w", err) + } + + // Cache the JWT (expires in 10 minutes, cache for 9 to be safe) + jwtToken = token + jwtExpiry = time.Now().Add(9 * time.Minute) + + return token, nil +} + +// getInstallationIDForOrg retrieves the installation ID for a specific organization +func getInstallationIDForOrg(org string) (string, error) { + token, err := getOrRefreshJWT() + if err != nil { + return "", fmt.Errorf("failed to get JWT: %w", err) + } + + url := "https://api.github.com/app/installations" + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return "", fmt.Errorf("create request: %w", err) + } + req.Header.Set("Authorization", "Bearer "+token) + req.Header.Set("Accept", "application/vnd.github+json") + + hc := HTTPClient + if hc == nil { + hc = http.DefaultClient + } + + resp, err := hc.Do(req) + if err != nil { + return "", fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("GET %s: %d %s %s", url, resp.StatusCode, resp.Status, body) + } + + var installations []struct { + ID int64 `json:"id"` + Account struct { + Login string `json:"login"` + Type string `json:"type"` + } `json:"account"` + } + + if err := json.NewDecoder(resp.Body).Decode(&installations); err != nil { + return "", fmt.Errorf("decode response: %w", err) + } + + // Find the installation for the specified organization + for _, inst := range installations { + if inst.Account.Login == org { + return fmt.Sprintf("%d", inst.ID), nil + } + } + + return "", fmt.Errorf("no installation found for organization: %s", org) +} + +// GetRestClientForOrg returns a GitHub REST API client authenticated for a specific organization +func GetRestClientForOrg(org string) (*github.Client, error) { + // Check if we have a cached token for this org + if token, ok := installationTokenCache[org]; ok && token != "" { + src := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token}) + base := http.DefaultTransport + if HTTPClient != nil && HTTPClient.Transport != nil { + base = HTTPClient.Transport + } + httpClient := &http.Client{ + Transport: &oauth2.Transport{ + Source: src, + Base: base, + }, + } + return github.NewClient(httpClient), nil + } + + // Get installation ID for the organization + installationID, err := getInstallationIDForOrg(org) + if err != nil { + return nil, fmt.Errorf("failed to get installation ID for org %s: %w", org, err) + } + + // Get JWT token + token, err := getOrRefreshJWT() + if err != nil { + return nil, fmt.Errorf("failed to get JWT: %w", err) + } + + // Get installation access token + installationToken, err := getInstallationAccessToken(installationID, token, HTTPClient) + if err != nil { + return nil, fmt.Errorf("failed to get installation token for org %s: %w", org, err) + } + + // Cache the token + installationTokenCache[org] = installationToken + + // Create and return client + src := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: installationToken}) + base := http.DefaultTransport + if HTTPClient != nil && HTTPClient.Transport != nil { + base = HTTPClient.Transport + } + httpClient := &http.Client{ + Transport: &oauth2.Transport{ + Source: src, + Base: base, + }, + } + return github.NewClient(httpClient), nil +} + // RoundTrip adds the Authorization header to each request. func (t *transport) RoundTrip(req *http.Request) (*http.Response, error) { req.Header.Set("Authorization", "Bearer "+t.token) diff --git a/examples-copier/services/github_auth_test.go b/examples-copier/services/github_auth_test.go new file mode 100644 index 0000000..23c47ee --- /dev/null +++ b/examples-copier/services/github_auth_test.go @@ -0,0 +1,224 @@ +package services + +import ( + "os" + "testing" + "time" + + "github.com/mongodb/code-example-tooling/code-copier/configs" +) + +func TestGenerateGitHubJWT_EmptyAppID(t *testing.T) { + // Note: generateGitHubJWT requires appID string and *rsa.PrivateKey + // Testing this requires creating a valid RSA private key, which is complex + // This test documents the expected behavior + t.Skip("Skipping test that requires valid RSA private key generation") + + // Expected behavior: + // - Should return error with empty app ID + // - Should return error with nil private key + // - Should generate valid JWT with valid inputs +} + +func TestJWTCaching(t *testing.T) { + // Test JWT caching behavior + originalToken := jwtToken + originalExpiry := jwtExpiry + defer func() { + jwtToken = originalToken + jwtExpiry = originalExpiry + }() + + // Set a cached token that hasn't expired + jwtToken = "cached-token" + jwtExpiry = time.Now().Add(5 * time.Minute) + + // Note: getOrRefreshJWT is not exported, so we can't test it directly + // This test documents the expected caching behavior: + // - If jwtToken is set and jwtExpiry is in the future, return cached token + // - If jwtToken is empty or jwtExpiry is in the past, generate new token + // - Cache the new token and set expiry to 9 minutes from now +} + +func TestInstallationTokenCache_Structure(t *testing.T) { + // Test that we can manipulate the installation token cache + originalCache := installationTokenCache + defer func() { + installationTokenCache = originalCache + }() + + // Initialize cache (it's a map[string]string) + installationTokenCache = make(map[string]string) + + // Add a token + testToken := "test-token-value" + installationTokenCache["test-org"] = testToken + + // Verify it was added + cached, exists := installationTokenCache["test-org"] + if !exists { + t.Error("Token not found in cache") + } + + if cached != testToken { + t.Errorf("Cached token = %s, want %s", cached, testToken) + } +} + +func TestLoadWebhookSecret_FromEnv(t *testing.T) { + // Test loading webhook secret from environment variable + testSecret := "test-webhook-secret" + os.Setenv("WEBHOOK_SECRET", testSecret) + defer os.Unsetenv("WEBHOOK_SECRET") + + // LoadWebhookSecret requires a config parameter + config := &configs.Config{ + WebhookSecret: "", + } + + // Note: LoadWebhookSecret tries Secret Manager first, which will fail in test environment + // This is expected behavior - the function should handle the error gracefully + _ = LoadWebhookSecret(config) + + // Verify the environment variable is set (even if Secret Manager fails) + envSecret := os.Getenv("WEBHOOK_SECRET") + if envSecret != testSecret { + t.Errorf("WEBHOOK_SECRET env var = %s, want %s", envSecret, testSecret) + } + + // Note: In production, LoadWebhookSecret would populate config.WebhookSecret + // from Secret Manager or fall back to the environment variable +} + +func TestLoadMongoURI_FromEnv(t *testing.T) { + // Test loading MongoDB URI from environment variable + testURI := "mongodb://localhost:27017/test" + os.Setenv("MONGO_URI", testURI) + defer os.Unsetenv("MONGO_URI") + + // Verify the environment variable is set + envURI := os.Getenv("MONGO_URI") + if envURI != testURI { + t.Errorf("MONGO_URI env var = %s, want %s", envURI, testURI) + } + + // Note: LoadMongoURI function signature needs to be checked + // This test documents that MONGO_URI can be set via environment +} + +func TestGitHubAppID_FromEnv(t *testing.T) { + // Test that GITHUB_APP_ID can be read from environment + testAppID := "123456" + os.Setenv("GITHUB_APP_ID", testAppID) + defer os.Unsetenv("GITHUB_APP_ID") + + appID := os.Getenv("GITHUB_APP_ID") + if appID != testAppID { + t.Errorf("GITHUB_APP_ID = %s, want %s", appID, testAppID) + } +} + +func TestGitHubInstallationID_FromEnv(t *testing.T) { + // Test that GITHUB_INSTALLATION_ID can be read from environment + testInstallID := "789012" + os.Setenv("GITHUB_INSTALLATION_ID", testInstallID) + defer os.Unsetenv("GITHUB_INSTALLATION_ID") + + installID := os.Getenv("GITHUB_INSTALLATION_ID") + if installID != testInstallID { + t.Errorf("GITHUB_INSTALLATION_ID = %s, want %s", installID, testInstallID) + } +} + +func TestGitHubPrivateKeyPath_FromEnv(t *testing.T) { + // Test that GITHUB_PRIVATE_KEY_PATH can be read from environment + testPath := "/path/to/private-key.pem" + os.Setenv("GITHUB_PRIVATE_KEY_PATH", testPath) + defer os.Unsetenv("GITHUB_PRIVATE_KEY_PATH") + + keyPath := os.Getenv("GITHUB_PRIVATE_KEY_PATH") + if keyPath != testPath { + t.Errorf("GITHUB_PRIVATE_KEY_PATH = %s, want %s", keyPath, testPath) + } +} + +func TestInstallationAccessToken_GlobalVariable(t *testing.T) { + // Test that we can manipulate the global InstallationAccessToken + originalToken := InstallationAccessToken + defer func() { + InstallationAccessToken = originalToken + }() + + testToken := "ghs_test_token_123" + InstallationAccessToken = testToken + + if InstallationAccessToken != testToken { + t.Errorf("InstallationAccessToken = %s, want %s", InstallationAccessToken, testToken) + } +} + +func TestHTTPClient_GlobalVariable(t *testing.T) { + // Test that HTTPClient is initialized + if HTTPClient == nil { + t.Error("HTTPClient should not be nil") + } + + // Note: HTTPClient is initialized to http.DefaultClient which has Timeout = 0 (no timeout) + // This is the default behavior in Go's http package + // The test just verifies the client exists +} + +func TestJWTExpiry_GlobalVariable(t *testing.T) { + // Test that we can manipulate the JWT expiry time + originalExpiry := jwtExpiry + defer func() { + jwtExpiry = originalExpiry + }() + + // Set a future expiry + futureExpiry := time.Now().Add(1 * time.Hour) + jwtExpiry = futureExpiry + + if time.Now().After(jwtExpiry) { + t.Error("JWT should not be expired") + } + + // Set a past expiry + pastExpiry := time.Now().Add(-1 * time.Hour) + jwtExpiry = pastExpiry + + if !time.Now().After(jwtExpiry) { + t.Error("JWT should be expired") + } +} + +// Note: Comprehensive testing of github_auth.go would require: +// 1. Mocking the Secret Manager client +// 2. Mocking the GitHub API client +// 3. Testing the full authentication flow: +// - JWT generation with valid PEM key +// - Installation token retrieval +// - Token caching and refresh logic +// - Organization-specific client creation +// - Error handling for API failures +// +// Example test scenarios that would require mocking: +// - TestConfigurePermissions_Success +// - TestConfigurePermissions_MissingAppID +// - TestConfigurePermissions_InvalidPEM +// - TestGetInstallationAccessToken_Success +// - TestGetInstallationAccessToken_Cached +// - TestGetInstallationAccessToken_Expired +// - TestGetRestClientForOrg_Success +// - TestGetRestClientForOrg_Cached +// - TestGetPrivateKeyFromSecret_SecretManager +// - TestGetPrivateKeyFromSecret_LocalFile +// - TestGetPrivateKeyFromSecret_EnvVar +// +// Refactoring suggestions for better testability: +// 1. Accept Secret Manager client as parameter instead of creating it internally +// 2. Accept GitHub client factory as parameter +// 3. Return errors instead of calling log.Fatal +// 4. Use dependency injection for HTTP client +// 5. Make JWT generation and caching logic more modular + diff --git a/examples-copier/services/github_write_to_source_test.go b/examples-copier/services/github_write_to_source_test.go new file mode 100644 index 0000000..f16825b --- /dev/null +++ b/examples-copier/services/github_write_to_source_test.go @@ -0,0 +1,135 @@ +package services + +import ( + "testing" + + . "github.com/mongodb/code-example-tooling/code-copier/types" +) + +func TestUpdateDeprecationFile_EmptyList(t *testing.T) { + // When FilesToDeprecate is empty, UpdateDeprecationFile should return early + // FilesToDeprecate is a map[string]Configs + originalFiles := FilesToDeprecate + defer func() { + FilesToDeprecate = originalFiles + }() + + FilesToDeprecate = make(map[string]Configs) + + // This should not panic or error - it should return early + // Note: This test doesn't verify the actual GitHub API call since that would + // require mocking the GitHub client, which is a global variable + UpdateDeprecationFile() + + // If we get here without panic, the test passes +} + +func TestUpdateDeprecationFile_WithFiles(t *testing.T) { + // Set up files to deprecate + originalFiles := FilesToDeprecate + defer func() { + FilesToDeprecate = originalFiles + }() + + FilesToDeprecate = map[string]Configs{ + "examples/old-example.go": { + TargetRepo: "test/target", + TargetBranch: "main", + }, + "examples/deprecated.go": { + TargetRepo: "test/target", + TargetBranch: "main", + }, + } + + // Note: This test will fail if it actually tries to call GitHub API + // In a real test environment, we would need to: + // 1. Mock the GetRestClient() function + // 2. Mock the GitHub API responses + // 3. Verify the correct API calls were made + // + // For now, this test documents the expected behavior + // The actual implementation would require refactoring to inject dependencies + + // Since we can't easily test this without mocking, we'll skip the actual call + t.Skip("Skipping test that requires GitHub API mocking") +} + +func TestFilesToDeprecate_GlobalVariable(t *testing.T) { + // Test that we can manipulate the global FilesToDeprecate variable + originalFiles := FilesToDeprecate + defer func() { + FilesToDeprecate = originalFiles + }() + + // Set test files (FilesToDeprecate is a map[string]Configs) + testFiles := map[string]Configs{ + "file1.go": {TargetRepo: "test/repo1", TargetBranch: "main"}, + "file2.go": {TargetRepo: "test/repo2", TargetBranch: "develop"}, + "file3.go": {TargetRepo: "test/repo3", TargetBranch: "main"}, + } + FilesToDeprecate = testFiles + + if len(FilesToDeprecate) != 3 { + t.Errorf("FilesToDeprecate length = %d, want 3", len(FilesToDeprecate)) + } + + for file, config := range testFiles { + if deprecatedConfig, exists := FilesToDeprecate[file]; !exists { + t.Errorf("FilesToDeprecate missing file %s", file) + } else if deprecatedConfig.TargetRepo != config.TargetRepo { + t.Errorf("FilesToDeprecate[%s].TargetRepo = %s, want %s", file, deprecatedConfig.TargetRepo, config.TargetRepo) + } + } +} + +func TestDeprecationFileEnvironmentVariables(t *testing.T) { + // Test that deprecation file configuration can be set via environment variables + // The UpdateDeprecationFile function uses os.Getenv to read these values + + tests := []struct { + name string + deprecationFile string + }{ + { + name: "default config", + deprecationFile: "deprecated-files.json", + }, + { + name: "custom file", + deprecationFile: "custom-deprecated.json", + }, + { + name: "nested path", + deprecationFile: "docs/deprecated/files.json", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // The deprecation file path is typically configured via environment variables + // This test documents the expected configuration approach + if tt.deprecationFile == "" { + t.Error("Deprecation file path should not be empty") + } + }) + } +} + +// Note: Comprehensive testing of UpdateDeprecationFile would require: +// 1. Refactoring to accept a GitHub client interface instead of using global GetRestClient() +// 2. Creating mock implementations of the GitHub client +// 3. Testing scenarios: +// - Empty deprecation list (early return) +// - Fetching existing deprecation file +// - Handling missing deprecation file (404) +// - Merging new files with existing files +// - Removing duplicates +// - Committing changes to GitHub +// - Error handling for API failures +// +// Example refactored signature: +// func UpdateDeprecationFile(ctx context.Context, config *configs.Config, client GitHubClient) error +// +// This would allow for proper unit testing with mocked dependencies. + diff --git a/examples-copier/services/github_write_to_target.go b/examples-copier/services/github_write_to_target.go index 4bcccb2..8558e52 100644 --- a/examples-copier/services/github_write_to_target.go +++ b/examples-copier/services/github_write_to_target.go @@ -42,9 +42,18 @@ func parseRepoPath(repoPath string) (owner, repo string) { // using the specified commit strategy (direct or via pull request). func AddFilesToTargetRepoBranch() { ctx := context.Background() - client := GetRestClient() for key, value := range FilesToUpload { + // Parse the repository to get the organization + owner, _ := parseRepoPath(key.RepoName) + + // Get a client authenticated for this organization + client, err := GetRestClientForOrg(owner) + if err != nil { + LogCritical(fmt.Sprintf("Failed to get GitHub client for org %s: %v", owner, err)) + continue + } + // Determine commit strategy from value (set by pattern-matching system) strategy := string(value.CommitStrategy) if strategy == "" { diff --git a/examples-copier/services/github_write_to_target_test.go b/examples-copier/services/github_write_to_target_test.go index bf77f08..a3446c4 100644 --- a/examples-copier/services/github_write_to_target_test.go +++ b/examples-copier/services/github_write_to_target_test.go @@ -66,7 +66,7 @@ func TestAddToRepoAndFilesMap_NewEntry(t *testing.T) { services.AddToRepoAndFilesMap("TargetRepo1", "main", dummyFile) require.NotNil(t, services.FilesToUpload, "FilesToUpload map should be initialized") - key := types.UploadKey{RepoName: "TargetRepo1", BranchPath: "refs/heads/main"} + key := types.UploadKey{RepoName: "TargetRepo1", BranchPath: "refs/heads/main", RuleName: "", CommitStrategy: ""} entry, exists := services.FilesToUpload[key] require.True(t, exists, "Entry for TargetRepo1/main should exist") require.Equal(t, "main", entry.TargetBranch) @@ -76,7 +76,7 @@ func TestAddToRepoAndFilesMap_NewEntry(t *testing.T) { func TestAddToRepoAndFilesMap_AppendEntry(t *testing.T) { services.FilesToUpload = make(map[types.UploadKey]types.UploadFileContent) - key := types.UploadKey{RepoName: "TargetRepo1", BranchPath: "refs/heads/main"} + key := types.UploadKey{RepoName: "TargetRepo1", BranchPath: "refs/heads/main", RuleName: "", CommitStrategy: ""} initialName := "first.txt" services.FilesToUpload[key] = types.UploadFileContent{ @@ -96,7 +96,7 @@ func TestAddToRepoAndFilesMap_AppendEntry(t *testing.T) { func TestAddToRepoAndFilesMap_NestedFiles(t *testing.T) { services.FilesToUpload = make(map[types.UploadKey]types.UploadFileContent) - key := types.UploadKey{RepoName: "TargetRepo1", BranchPath: "refs/heads/main"} + key := types.UploadKey{RepoName: "TargetRepo1", BranchPath: "refs/heads/main", RuleName: "", CommitStrategy: ""} initialName := "level1/first.txt" services.FilesToUpload[key] = types.UploadFileContent{ @@ -616,7 +616,7 @@ func TestPriority_PRTitleDefaultsToCommitMessage_And_NoAutoMergeWhenConfigPresen Content: github.String(base64.StdEncoding.EncodeToString([]byte("y"))), }} cfg := types.Configs{TargetRepo: repo, TargetBranch: baseBranch /* MergeWithoutReview: false (zero value) */} - services.FilesToUpload = map[types.UploadKey]types.UploadFileContent{{RepoName: repo, BranchPath: "refs/heads/" + baseBranch}: {TargetBranch: baseBranch, Content: files}} + services.FilesToUpload = map[types.UploadKey]types.UploadFileContent{{RepoName: repo, BranchPath: "refs/heads/" + baseBranch, RuleName: "", CommitStrategy: ""}: {TargetBranch: baseBranch, Content: files}} services.AddFilesToTargetRepoBranch(types.ConfigFileType{cfg}) diff --git a/examples-copier/services/logger_test.go b/examples-copier/services/logger_test.go new file mode 100644 index 0000000..a764673 --- /dev/null +++ b/examples-copier/services/logger_test.go @@ -0,0 +1,408 @@ +package services + +import ( + "bytes" + "context" + "fmt" + "log" + "net/http/httptest" + "os" + "strings" + "testing" +) + +func TestLogDebug(t *testing.T) { + tests := []struct { + name string + logLevel string + copierDebug string + message string + shouldLog bool + }{ + { + name: "debug enabled via LOG_LEVEL", + logLevel: "debug", + copierDebug: "", + message: "test debug message", + shouldLog: true, + }, + { + name: "debug enabled via COPIER_DEBUG", + logLevel: "", + copierDebug: "true", + message: "test debug message", + shouldLog: true, + }, + { + name: "debug disabled", + logLevel: "info", + copierDebug: "false", + message: "test debug message", + shouldLog: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set environment variables + if tt.logLevel != "" { + os.Setenv("LOG_LEVEL", tt.logLevel) + defer os.Unsetenv("LOG_LEVEL") + } + if tt.copierDebug != "" { + os.Setenv("COPIER_DEBUG", tt.copierDebug) + defer os.Unsetenv("COPIER_DEBUG") + } + + // Capture log output + var buf bytes.Buffer + log.SetOutput(&buf) + defer log.SetOutput(os.Stderr) + + LogDebug(tt.message) + + output := buf.String() + if tt.shouldLog { + if !strings.Contains(output, "[DEBUG]") { + t.Error("Expected [DEBUG] prefix in output") + } + if !strings.Contains(output, tt.message) { + t.Errorf("Expected message %q in output", tt.message) + } + } else { + if output != "" { + t.Errorf("Expected no output, got: %s", output) + } + } + }) + } +} + +func TestLogInfo(t *testing.T) { + var buf bytes.Buffer + log.SetOutput(&buf) + defer log.SetOutput(os.Stderr) + + message := "test info message" + LogInfo(message) + + output := buf.String() + if !strings.Contains(output, "[INFO]") { + t.Error("Expected [INFO] prefix in output") + } + if !strings.Contains(output, message) { + t.Errorf("Expected message %q in output", message) + } +} + +func TestLogWarning(t *testing.T) { + var buf bytes.Buffer + log.SetOutput(&buf) + defer log.SetOutput(os.Stderr) + + message := "test warning message" + LogWarning(message) + + output := buf.String() + if !strings.Contains(output, "[WARN]") { + t.Error("Expected [WARN] prefix in output") + } + if !strings.Contains(output, message) { + t.Errorf("Expected message %q in output", message) + } +} + +func TestLogError(t *testing.T) { + var buf bytes.Buffer + log.SetOutput(&buf) + defer log.SetOutput(os.Stderr) + + message := "test error message" + LogError(message) + + output := buf.String() + if !strings.Contains(output, "[ERROR]") { + t.Error("Expected [ERROR] prefix in output") + } + if !strings.Contains(output, message) { + t.Errorf("Expected message %q in output", message) + } +} + +func TestLogCritical(t *testing.T) { + var buf bytes.Buffer + log.SetOutput(&buf) + defer log.SetOutput(os.Stderr) + + message := "test critical message" + LogCritical(message) + + output := buf.String() + if !strings.Contains(output, "[CRITICAL]") { + t.Error("Expected [CRITICAL] prefix in output") + } + if !strings.Contains(output, message) { + t.Errorf("Expected message %q in output", message) + } +} + +func TestLogInfoCtx(t *testing.T) { + var buf bytes.Buffer + log.SetOutput(&buf) + defer log.SetOutput(os.Stderr) + + ctx := context.Background() + message := "test context message" + fields := map[string]interface{}{ + "key1": "value1", + "key2": 123, + } + + LogInfoCtx(ctx, message, fields) + + output := buf.String() + if !strings.Contains(output, message) { + t.Errorf("Expected message %q in output", message) + } + if !strings.Contains(output, "key1") { + t.Error("Expected field key1 in output") + } + if !strings.Contains(output, "value1") { + t.Error("Expected field value1 in output") + } +} + +func TestLogWarningCtx(t *testing.T) { + var buf bytes.Buffer + log.SetOutput(&buf) + defer log.SetOutput(os.Stderr) + + ctx := context.Background() + message := "test warning context" + fields := map[string]interface{}{ + "warning_type": "test", + } + + LogWarningCtx(ctx, message, fields) + + output := buf.String() + if !strings.Contains(output, message) { + t.Errorf("Expected message %q in output", message) + } + if !strings.Contains(output, "warning_type") { + t.Error("Expected field warning_type in output") + } +} + +func TestLogErrorCtx(t *testing.T) { + var buf bytes.Buffer + log.SetOutput(&buf) + defer log.SetOutput(os.Stderr) + + ctx := context.Background() + message := "test error context" + err := fmt.Errorf("test error") + fields := map[string]interface{}{ + "error_code": 500, + } + + LogErrorCtx(ctx, message, err, fields) + + output := buf.String() + if !strings.Contains(output, message) { + t.Errorf("Expected message %q in output", message) + } + if !strings.Contains(output, "test error") { + t.Error("Expected error message in output") + } + if !strings.Contains(output, "error_code") { + t.Error("Expected field error_code in output") + } +} + +func TestLogWebhookOperation(t *testing.T) { + tests := []struct { + name string + operation string + message string + err error + wantLevel string + }{ + { + name: "successful operation", + operation: "webhook_received", + message: "webhook processed", + err: nil, + wantLevel: "[INFO]", + }, + { + name: "failed operation", + operation: "webhook_parse", + message: "failed to parse webhook", + err: fmt.Errorf("parse error"), + wantLevel: "[ERROR]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var buf bytes.Buffer + log.SetOutput(&buf) + defer log.SetOutput(os.Stderr) + + ctx := context.Background() + LogWebhookOperation(ctx, tt.operation, tt.message, tt.err) + + output := buf.String() + if !strings.Contains(output, tt.wantLevel) { + t.Errorf("Expected %s level in output", tt.wantLevel) + } + if !strings.Contains(output, tt.message) { + t.Errorf("Expected message %q in output", tt.message) + } + if !strings.Contains(output, tt.operation) { + t.Errorf("Expected operation %q in output", tt.operation) + } + }) + } +} + +func TestLogFileOperation(t *testing.T) { + var buf bytes.Buffer + log.SetOutput(&buf) + defer log.SetOutput(os.Stderr) + + ctx := context.Background() + LogFileOperation(ctx, "copy", "source/file.go", "target/repo", "file copied", nil) + + output := buf.String() + if !strings.Contains(output, "copy") { + t.Error("Expected operation 'copy' in output") + } + if !strings.Contains(output, "source/file.go") { + t.Error("Expected source path in output") + } + if !strings.Contains(output, "target/repo") { + t.Error("Expected target repo in output") + } +} + +func TestWithRequestID(t *testing.T) { + req := httptest.NewRequest("GET", "/test", nil) + + ctx, requestID := WithRequestID(req) + + if requestID == "" { + t.Error("Expected non-empty request ID") + } + + // Check that request ID is in context + ctxValue := ctx.Value("request_id") + if ctxValue == nil { + t.Error("Expected request_id in context") + } + + if ctxValue.(string) != requestID { + t.Error("Context request_id doesn't match returned request ID") + } +} + +func TestFormatLogMessage(t *testing.T) { + tests := []struct { + name string + message string + fields map[string]interface{} + want []string + }{ + { + name: "no fields", + message: "test message", + fields: nil, + want: []string{"test message"}, + }, + { + name: "with fields", + message: "test message", + fields: map[string]interface{}{ + "key1": "value1", + "key2": 123, + }, + want: []string{"test message", "key1", "value1"}, + }, + { + name: "empty fields", + message: "test message", + fields: map[string]interface{}{}, + want: []string{"test message"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + result := formatLogMessage(ctx, tt.message, tt.fields) + + for _, want := range tt.want { + if !strings.Contains(result, want) { + t.Errorf("formatLogMessage() missing %q in result: %s", want, result) + } + } + }) + } +} + +func TestIsDebugEnabled(t *testing.T) { + tests := []struct { + name string + logLevel string + copierDebug string + want bool + }{ + {"debug via LOG_LEVEL", "debug", "", true}, + {"DEBUG via LOG_LEVEL", "DEBUG", "", true}, + {"debug via COPIER_DEBUG", "", "true", true}, + {"debug via COPIER_DEBUG uppercase", "", "TRUE", true}, + {"not enabled", "info", "false", false}, + {"neither set", "", "", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + os.Setenv("LOG_LEVEL", tt.logLevel) + os.Setenv("COPIER_DEBUG", tt.copierDebug) + defer os.Unsetenv("LOG_LEVEL") + defer os.Unsetenv("COPIER_DEBUG") + + got := isDebugEnabled() + if got != tt.want { + t.Errorf("isDebugEnabled() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIsCloudLoggingDisabled(t *testing.T) { + tests := []struct { + name string + value string + want bool + }{ + {"disabled lowercase", "true", true}, + {"disabled uppercase", "TRUE", true}, + {"enabled", "false", false}, + {"not set", "", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + os.Setenv("COPIER_DISABLE_CLOUD_LOGGING", tt.value) + defer os.Unsetenv("COPIER_DISABLE_CLOUD_LOGGING") + + got := isCloudLoggingDisabled() + if got != tt.want { + t.Errorf("isCloudLoggingDisabled() = %v, want %v", got, tt.want) + } + }) + } +} + diff --git a/examples-copier/services/pattern_matcher.go b/examples-copier/services/pattern_matcher.go index 0cd5a00..8bf5fb8 100644 --- a/examples-copier/services/pattern_matcher.go +++ b/examples-copier/services/pattern_matcher.go @@ -6,6 +6,7 @@ import ( "regexp" "strings" + "github.com/bmatcuk/doublestar/v4" "github.com/mongodb/code-example-tooling/code-copier/types" ) @@ -59,42 +60,27 @@ func (pm *DefaultPatternMatcher) matchPrefix(filePath, pattern string) types.Mat // matchGlob matches using glob patterns func (pm *DefaultPatternMatcher) matchGlob(filePath, pattern string) types.MatchResult { - matched, err := filepath.Match(pattern, filePath) + // Use doublestar library which properly supports ** patterns + matched, err := doublestar.Match(pattern, filePath) if err != nil { - // Try doublestar matching for ** patterns - matched = pm.matchDoublestar(filePath, pattern) + // Fall back to filepath.Match for simple patterns + matched, err = filepath.Match(pattern, filePath) + if err != nil { + return types.NewMatchResult(false, nil) + } } - + if matched { variables := map[string]string{ "matched_pattern": pattern, } return types.NewMatchResult(true, variables) } - + return types.NewMatchResult(false, nil) } -// matchDoublestar handles ** glob patterns (recursive directory matching) -func (pm *DefaultPatternMatcher) matchDoublestar(filePath, pattern string) bool { - // Convert glob pattern to regex - // ** matches any number of directories - // * matches any characters except / - // ? matches a single character except / - - regexPattern := regexp.QuoteMeta(pattern) - regexPattern = strings.ReplaceAll(regexPattern, `\*\*`, ".*") - regexPattern = strings.ReplaceAll(regexPattern, `\*`, "[^/]*") - regexPattern = strings.ReplaceAll(regexPattern, `\?`, "[^/]") - regexPattern = "^" + regexPattern + "$" - - re, err := regexp.Compile(regexPattern) - if err != nil { - return false - } - - return re.MatchString(filePath) -} + // matchRegex matches using regular expressions with named capture groups func (pm *DefaultPatternMatcher) matchRegex(filePath, pattern string) types.MatchResult { diff --git a/examples-copier/services/service_container_test.go b/examples-copier/services/service_container_test.go new file mode 100644 index 0000000..3fcf29f --- /dev/null +++ b/examples-copier/services/service_container_test.go @@ -0,0 +1,360 @@ +package services + +import ( + "context" + "testing" + "time" + + "github.com/mongodb/code-example-tooling/code-copier/configs" +) + +func TestNewServiceContainer(t *testing.T) { + tests := []struct { + name string + config *configs.Config + wantErr bool + checkServices bool + }{ + { + name: "valid config with audit disabled", + config: &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + AuditEnabled: false, + SlackWebhookURL: "", + }, + wantErr: false, + checkServices: true, + }, + { + name: "valid config with Slack enabled", + config: &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + AuditEnabled: false, + SlackWebhookURL: "https://hooks.slack.com/services/TEST", + SlackChannel: "#test", + SlackUsername: "Test Bot", + SlackIconEmoji: ":robot:", + }, + wantErr: false, + checkServices: true, + }, + { + name: "audit enabled without URI", + config: &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + AuditEnabled: true, + MongoURI: "", + }, + wantErr: true, + checkServices: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + container, err := NewServiceContainer(tt.config) + + if tt.wantErr { + if err == nil { + t.Error("NewServiceContainer() expected error, got nil") + } + return + } + + if err != nil { + t.Fatalf("NewServiceContainer() error = %v, want nil", err) + } + + if container == nil { + t.Fatal("NewServiceContainer() returned nil container") + } + + if tt.checkServices { + // Check that all services are initialized + if container.Config == nil { + t.Error("Config is nil") + } + + if container.FileStateService == nil { + t.Error("FileStateService is nil") + } + + if container.ConfigLoader == nil { + t.Error("ConfigLoader is nil") + } + + if container.PatternMatcher == nil { + t.Error("PatternMatcher is nil") + } + + if container.PathTransformer == nil { + t.Error("PathTransformer is nil") + } + + if container.MessageTemplater == nil { + t.Error("MessageTemplater is nil") + } + + if container.AuditLogger == nil { + t.Error("AuditLogger is nil") + } + + if container.MetricsCollector == nil { + t.Error("MetricsCollector is nil") + } + + if container.SlackNotifier == nil { + t.Error("SlackNotifier is nil") + } + + // Check that StartTime is set + if container.StartTime.IsZero() { + t.Error("StartTime is zero") + } + + // Check that StartTime is recent (within last second) + if time.Since(container.StartTime) > time.Second { + t.Error("StartTime is not recent") + } + } + }) + } +} + +func TestServiceContainer_Close(t *testing.T) { + tests := []struct { + name string + config *configs.Config + wantErr bool + }{ + { + name: "close with NoOp audit logger", + config: &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + AuditEnabled: false, + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + container, err := NewServiceContainer(tt.config) + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + ctx := context.Background() + err = container.Close(ctx) + + if tt.wantErr { + if err == nil { + t.Error("Close() expected error, got nil") + } + } else { + if err != nil { + t.Errorf("Close() error = %v, want nil", err) + } + } + }) + } +} + +func TestServiceContainer_ConfigPropagation(t *testing.T) { + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + AuditEnabled: false, + SlackWebhookURL: "https://hooks.slack.com/services/TEST", + SlackChannel: "#test-channel", + SlackUsername: "Test Bot", + SlackIconEmoji: ":robot:", + } + + container, err := NewServiceContainer(config) + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + // Verify config is stored correctly + if container.Config != config { + t.Error("Config not stored correctly in container") + } + + if container.Config.RepoOwner != "test-owner" { + t.Errorf("RepoOwner = %v, want test-owner", container.Config.RepoOwner) + } + + if container.Config.SlackChannel != "#test-channel" { + t.Errorf("SlackChannel = %v, want #test-channel", container.Config.SlackChannel) + } +} + +func TestServiceContainer_SlackNotifierConfiguration(t *testing.T) { + tests := []struct { + name string + webhookURL string + channel string + username string + iconEmoji string + wantEnabled bool + }{ + { + name: "Slack enabled", + webhookURL: "https://hooks.slack.com/services/TEST", + channel: "#test", + username: "Bot", + iconEmoji: ":robot:", + wantEnabled: true, + }, + { + name: "Slack disabled", + webhookURL: "", + channel: "", + username: "", + iconEmoji: "", + wantEnabled: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + AuditEnabled: false, + SlackWebhookURL: tt.webhookURL, + SlackChannel: tt.channel, + SlackUsername: tt.username, + SlackIconEmoji: tt.iconEmoji, + } + + container, err := NewServiceContainer(config) + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + if container.SlackNotifier.IsEnabled() != tt.wantEnabled { + t.Errorf("SlackNotifier.IsEnabled() = %v, want %v", + container.SlackNotifier.IsEnabled(), tt.wantEnabled) + } + }) + } +} + +func TestServiceContainer_AuditLoggerConfiguration(t *testing.T) { + tests := []struct { + name string + auditEnabled bool + mongoURI string + wantType string + wantErr bool + }{ + { + name: "audit disabled", + auditEnabled: false, + mongoURI: "", + wantType: "*services.NoOpAuditLogger", + wantErr: false, + }, + { + name: "audit enabled without URI", + auditEnabled: true, + mongoURI: "", + wantType: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + AuditEnabled: tt.auditEnabled, + MongoURI: tt.mongoURI, + AuditDatabase: "test-db", + AuditCollection: "test-coll", + } + + container, err := NewServiceContainer(config) + + if tt.wantErr { + if err == nil { + t.Error("NewServiceContainer() expected error, got nil") + } + return + } + + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + // Check audit logger type - NoOp should be returned when disabled + _, isNoOp := container.AuditLogger.(*NoOpAuditLogger) + if tt.wantType == "*services.NoOpAuditLogger" && !isNoOp { + t.Error("Expected NoOpAuditLogger when audit is disabled") + } + }) + } +} + +func TestServiceContainer_MetricsCollectorInitialization(t *testing.T) { + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + AuditEnabled: false, + } + + container, err := NewServiceContainer(config) + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + if container.MetricsCollector == nil { + t.Fatal("MetricsCollector is nil") + } + + // Verify metrics collector is functional + container.MetricsCollector.RecordWebhookReceived() + container.MetricsCollector.RecordWebhookProcessed(time.Second) + + // Check that metrics were recorded using GetMetrics + metrics := container.MetricsCollector.GetMetrics(container.FileStateService) + if metrics.Webhooks.Received != 1 { + t.Errorf("WebhooksReceived = %d, want 1", metrics.Webhooks.Received) + } + + if metrics.Webhooks.Processed != 1 { + t.Errorf("WebhooksProcessed = %d, want 1", metrics.Webhooks.Processed) + } +} + +func TestServiceContainer_StartTimeTracking(t *testing.T) { + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + AuditEnabled: false, + } + + beforeCreate := time.Now() + container, err := NewServiceContainer(config) + afterCreate := time.Now() + + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + // StartTime should be between beforeCreate and afterCreate + if container.StartTime.Before(beforeCreate) { + t.Error("StartTime is before container creation") + } + if container.StartTime.After(afterCreate) { + t.Error("StartTime is after container creation") + } +} + diff --git a/examples-copier/services/slack_notifier_test.go b/examples-copier/services/slack_notifier_test.go new file mode 100644 index 0000000..03b7655 --- /dev/null +++ b/examples-copier/services/slack_notifier_test.go @@ -0,0 +1,332 @@ +package services + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestNewSlackNotifier(t *testing.T) { + tests := []struct { + name string + webhookURL string + channel string + username string + iconEmoji string + wantEnabled bool + }{ + { + name: "enabled with webhook URL", + webhookURL: "https://hooks.slack.com/services/TEST", + channel: "#test", + username: "Test Bot", + iconEmoji: ":robot:", + wantEnabled: true, + }, + { + name: "disabled without webhook URL", + webhookURL: "", + channel: "#test", + username: "Test Bot", + iconEmoji: ":robot:", + wantEnabled: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + notifier := NewSlackNotifier(tt.webhookURL, tt.channel, tt.username, tt.iconEmoji) + if notifier.IsEnabled() != tt.wantEnabled { + t.Errorf("IsEnabled() = %v, want %v", notifier.IsEnabled(), tt.wantEnabled) + } + }) + } +} + +func TestSlackNotifier_NotifyPRProcessed(t *testing.T) { + tests := []struct { + name string + event *PRProcessedEvent + wantColor string + wantEnabled bool + }{ + { + name: "successful PR with no failures", + event: &PRProcessedEvent{ + PRNumber: 123, + PRTitle: "Add new feature", + PRURL: "https://github.com/test/repo/pull/123", + SourceRepo: "test/repo", + FilesMatched: 5, + FilesCopied: 5, + FilesFailed: 0, + ProcessingTime: 2 * time.Second, + }, + wantColor: "good", + wantEnabled: true, + }, + { + name: "PR with some failures", + event: &PRProcessedEvent{ + PRNumber: 124, + PRTitle: "Fix bug", + PRURL: "https://github.com/test/repo/pull/124", + SourceRepo: "test/repo", + FilesMatched: 5, + FilesCopied: 3, + FilesFailed: 2, + ProcessingTime: 3 * time.Second, + }, + wantColor: "warning", + wantEnabled: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create test server + var receivedMessage *SlackMessage + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + json.Unmarshal(body, &receivedMessage) + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + notifier := NewSlackNotifier(server.URL, "#test", "Test Bot", ":robot:") + ctx := context.Background() + + err := notifier.NotifyPRProcessed(ctx, tt.event) + if err != nil { + t.Errorf("NotifyPRProcessed() error = %v", err) + } + + if receivedMessage == nil { + t.Fatal("No message received") + } + + if len(receivedMessage.Attachments) == 0 { + t.Fatal("No attachments in message") + } + + attachment := receivedMessage.Attachments[0] + if attachment.Color != tt.wantColor { + t.Errorf("Color = %v, want %v", attachment.Color, tt.wantColor) + } + + expectedTitle := fmt.Sprintf("✅ PR #%d Processed", tt.event.PRNumber) + if attachment.Title != expectedTitle { + t.Errorf("Title = %v, want %v", attachment.Title, expectedTitle) + } + }) + } +} + +func TestSlackNotifier_NotifyError(t *testing.T) { + event := &ErrorEvent{ + Operation: "file_copy", + Error: fmt.Errorf("test error"), + PRNumber: 125, + SourceRepo: "test/repo", + } + + var receivedMessage *SlackMessage + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + json.Unmarshal(body, &receivedMessage) + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + notifier := NewSlackNotifier(server.URL, "#test", "Test Bot", ":robot:") + ctx := context.Background() + + err := notifier.NotifyError(ctx, event) + if err != nil { + t.Errorf("NotifyError() error = %v", err) + } + + if receivedMessage == nil { + t.Fatal("No message received") + } + + if len(receivedMessage.Attachments) == 0 { + t.Fatal("No attachments in message") + } + + attachment := receivedMessage.Attachments[0] + if attachment.Color != "danger" { + t.Errorf("Color = %v, want danger", attachment.Color) + } + + if attachment.Title != "❌ Error Occurred" { + t.Errorf("Title = %v, want ❌ Error Occurred", attachment.Title) + } +} + +func TestSlackNotifier_NotifyFilesCopied(t *testing.T) { + tests := []struct { + name string + fileCount int + wantTruncated bool + }{ + { + name: "few files", + fileCount: 5, + wantTruncated: false, + }, + { + name: "many files", + fileCount: 15, + wantTruncated: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + files := make([]string, tt.fileCount) + for i := 0; i < tt.fileCount; i++ { + files[i] = fmt.Sprintf("file%d.go", i) + } + + event := &FilesCopiedEvent{ + PRNumber: 126, + SourceRepo: "test/source", + TargetRepo: "test/target", + FileCount: tt.fileCount, + Files: files, + RuleName: "test-rule", + } + + var receivedMessage *SlackMessage + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + json.Unmarshal(body, &receivedMessage) + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + notifier := NewSlackNotifier(server.URL, "#test", "Test Bot", ":robot:") + ctx := context.Background() + + err := notifier.NotifyFilesCopied(ctx, event) + if err != nil { + t.Errorf("NotifyFilesCopied() error = %v", err) + } + + if receivedMessage == nil { + t.Fatal("No message received") + } + + attachment := receivedMessage.Attachments[0] + if tt.wantTruncated { + // Should contain "... and X more" + if !contains(attachment.Text, "and") || !contains(attachment.Text, "more") { + t.Error("Expected truncation message not found") + } + } + }) + } +} + +func TestSlackNotifier_NotifyDeprecation(t *testing.T) { + event := &DeprecationEvent{ + PRNumber: 127, + SourceRepo: "test/repo", + FileCount: 3, + Files: []string{"old1.go", "old2.go", "old3.go"}, + } + + var receivedMessage *SlackMessage + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + json.Unmarshal(body, &receivedMessage) + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + notifier := NewSlackNotifier(server.URL, "#test", "Test Bot", ":robot:") + ctx := context.Background() + + err := notifier.NotifyDeprecation(ctx, event) + if err != nil { + t.Errorf("NotifyDeprecation() error = %v", err) + } + + if receivedMessage == nil { + t.Fatal("No message received") + } + + attachment := receivedMessage.Attachments[0] + if attachment.Color != "warning" { + t.Errorf("Color = %v, want warning", attachment.Color) + } + + expectedTitle := fmt.Sprintf("⚠️ Files Deprecated from PR #%d", event.PRNumber) + if attachment.Title != expectedTitle { + t.Errorf("Title = %v, want %v", attachment.Title, expectedTitle) + } +} + +func TestSlackNotifier_DisabledNotifier(t *testing.T) { + // Create notifier without webhook URL (disabled) + notifier := NewSlackNotifier("", "#test", "Test Bot", ":robot:") + ctx := context.Background() + + // All notification methods should return nil without error + err := notifier.NotifyPRProcessed(ctx, &PRProcessedEvent{}) + if err != nil { + t.Errorf("NotifyPRProcessed() error = %v, want nil", err) + } + + err = notifier.NotifyError(ctx, &ErrorEvent{}) + if err != nil { + t.Errorf("NotifyError() error = %v, want nil", err) + } + + err = notifier.NotifyFilesCopied(ctx, &FilesCopiedEvent{}) + if err != nil { + t.Errorf("NotifyFilesCopied() error = %v, want nil", err) + } + + err = notifier.NotifyDeprecation(ctx, &DeprecationEvent{}) + if err != nil { + t.Errorf("NotifyDeprecation() error = %v, want nil", err) + } +} + +func TestFormatFileList(t *testing.T) { + files := []string{"file1.go", "file2.go", "file3.go"} + result := formatFileList(files) + + for _, file := range files { + if !contains(result, file) { + t.Errorf("formatFileList() missing file %s", file) + } + } + + // Should have bullet points + if !contains(result, "•") { + t.Error("formatFileList() missing bullet points") + } +} + +// Helper function +func contains(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && (s[:len(substr)] == substr || s[len(s)-len(substr):] == substr || containsMiddle(s, substr))) +} + +func containsMiddle(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} + diff --git a/examples-copier/services/webhook_handler_new.go b/examples-copier/services/webhook_handler_new.go index 453e9c6..87cd840 100644 --- a/examples-copier/services/webhook_handler_new.go +++ b/examples-copier/services/webhook_handler_new.go @@ -63,8 +63,13 @@ func RetrieveFileContentsWithConfigAndBranch(ctx context.Context, filePath strin // HandleWebhookWithContainer handles incoming GitHub webhook requests using the service container func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config *configs.Config, container *ServiceContainer) { + startTime := time.Now() ctx := r.Context() + LogInfoCtx(ctx, "webhook handler started", map[string]interface{}{ + "elapsed_ms": time.Since(startTime).Milliseconds(), + }) + // Read and validate webhook payload limited := io.LimitReader(r.Body, maxWebhookBodyBytes) payload, err := io.ReadAll(limited) @@ -83,6 +88,11 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * return } + LogInfoCtx(ctx, "payload read", map[string]interface{}{ + "elapsed_ms": time.Since(startTime).Milliseconds(), + "size_bytes": len(payload), + }) + // Verify webhook signature if config.WebhookSecret != "" { sigHeader := r.Header.Get("X-Hub-Signature-256") @@ -92,6 +102,9 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * http.Error(w, "unauthorized", http.StatusUnauthorized) return } + LogInfoCtx(ctx, "signature verified", map[string]interface{}{ + "elapsed_ms": time.Since(startTime).Milliseconds(), + }) } // Parse webhook event @@ -112,7 +125,19 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * return } - if !(prEvt.GetAction() == "closed" && prEvt.GetPullRequest().GetMerged()) { + action := prEvt.GetAction() + merged := prEvt.GetPullRequest().GetMerged() + + LogInfoCtx(ctx, "PR event received", map[string]interface{}{ + "action": action, + "merged": merged, + }) + + if !(action == "closed" && merged) { + LogInfoCtx(ctx, "skipping non-merged PR", map[string]interface{}{ + "action": action, + "merged": merged, + }) w.WriteHeader(http.StatusNoContent) return } @@ -121,17 +146,53 @@ func HandleWebhookWithContainer(w http.ResponseWriter, r *http.Request, config * prNumber := prEvt.GetPullRequest().GetNumber() sourceCommitSHA := prEvt.GetPullRequest().GetMergeCommitSHA() + // Extract repository info from webhook payload + repo := prEvt.GetRepo() + if repo == nil { + LogWarningCtx(ctx, "webhook missing repository info", nil) + w.WriteHeader(http.StatusBadRequest) + return + } + + repoOwner := repo.GetOwner().GetLogin() + repoName := repo.GetName() + LogInfoCtx(ctx, "processing merged PR", map[string]interface{}{ - "pr_number": prNumber, - "sha": sourceCommitSHA, + "pr_number": prNumber, + "sha": sourceCommitSHA, + "repo": fmt.Sprintf("%s/%s", repoOwner, repoName), + "elapsed_ms": time.Since(startTime).Milliseconds(), + }) + + // Respond immediately to avoid GitHub webhook timeout + LogInfoCtx(ctx, "sending immediate response", map[string]interface{}{ + "elapsed_ms": time.Since(startTime).Milliseconds(), }) - handleMergedPRWithContainer(ctx, prNumber, sourceCommitSHA, config, container) - w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusAccepted) + w.Write([]byte(`{"status":"accepted"}`)) + + LogInfoCtx(ctx, "response sent", map[string]interface{}{ + "elapsed_ms": time.Since(startTime).Milliseconds(), + }) + + // Flush the response immediately + if flusher, ok := w.(http.Flusher); ok { + flusher.Flush() + LogInfoCtx(ctx, "response flushed", map[string]interface{}{ + "elapsed_ms": time.Since(startTime).Milliseconds(), + }) + } + + // Process asynchronously in background with a new context + // Don't use the request context as it will be cancelled when the request completes + bgCtx := context.Background() + go handleMergedPRWithContainer(bgCtx, prNumber, sourceCommitSHA, repoOwner, repoName, config, container) } // handleMergedPRWithContainer processes a merged PR using the new pattern matching system -func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommitSHA string, config *configs.Config, container *ServiceContainer) { +func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommitSHA string, repoOwner string, repoName string, config *configs.Config, container *ServiceContainer) { startTime := time.Now() // Configure GitHub permissions @@ -139,6 +200,10 @@ func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommit ConfigurePermissions() } + // Update config with actual repository from webhook + config.RepoOwner = repoOwner + config.RepoName = repoName + // Load configuration using new loader yamlConfig, err := container.ConfigLoader.LoadConfig(ctx, config) if err != nil { @@ -150,7 +215,7 @@ func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommit Operation: "config_load", Error: err, PRNumber: prNumber, - SourceRepo: fmt.Sprintf("%s/%s", config.RepoOwner, config.RepoName), + SourceRepo: fmt.Sprintf("%s/%s", repoOwner, repoName), }) return } @@ -160,6 +225,17 @@ func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommit yamlConfig.SourceRepo = fmt.Sprintf("%s/%s", config.RepoOwner, config.RepoName) } + // Validate webhook is from expected source repository + webhookRepo := fmt.Sprintf("%s/%s", repoOwner, repoName) + if webhookRepo != yamlConfig.SourceRepo { + LogWarningCtx(ctx, "webhook from unexpected repository", map[string]interface{}{ + "webhook_repo": webhookRepo, + "expected_repo": yamlConfig.SourceRepo, + }) + container.MetricsCollector.RecordWebhookFailed() + return + } + // Get changed files from PR changedFiles, err := GetFilesChangedInPr(prNumber) if err != nil { @@ -279,7 +355,7 @@ func processFilesWithPatternMatching(ctx context.Context, prNumber int, sourceCo // Process each target for _, target := range rule.Targets { - processFileForTarget(ctx, prNumber, sourceCommitSHA, file, rule, target, matchResult.Variables, config, container) + processFileForTarget(ctx, prNumber, sourceCommitSHA, file, rule, target, matchResult.Variables, yamlConfig.SourceBranch, config, container) } } } @@ -287,7 +363,7 @@ func processFilesWithPatternMatching(ctx context.Context, prNumber int, sourceCo // processFileForTarget processes a single file for a specific target func processFileForTarget(ctx context.Context, prNumber int, sourceCommitSHA string, file types.ChangedFile, - rule types.CopyRule, target types.TargetConfig, variables map[string]string, config *configs.Config, container *ServiceContainer) { + rule types.CopyRule, target types.TargetConfig, variables map[string]string, sourceBranch string, config *configs.Config, container *ServiceContainer) { // Transform path targetPath, err := container.PathTransformer.Transform(file.Path, target.PathTransform, variables) @@ -303,29 +379,25 @@ func processFileForTarget(ctx context.Context, prNumber int, sourceCommitSHA str // Handle deleted files if file.Status == statusDeleted { - handleFileDeprecation(ctx, prNumber, sourceCommitSHA, file, rule, target, targetPath, config, container) + handleFileDeprecation(ctx, prNumber, sourceCommitSHA, file, rule, target, targetPath, sourceBranch, config, container) return } // Handle file copy - handleFileCopyWithAudit(ctx, prNumber, sourceCommitSHA, file, rule, target, targetPath, variables, config, container) + handleFileCopyWithAudit(ctx, prNumber, sourceCommitSHA, file, rule, target, targetPath, variables, sourceBranch, config, container) } // handleFileCopyWithAudit handles file copying with audit logging func handleFileCopyWithAudit(ctx context.Context, prNumber int, sourceCommitSHA string, file types.ChangedFile, - rule types.CopyRule, target types.TargetConfig, targetPath string, variables map[string]string, + rule types.CopyRule, target types.TargetConfig, targetPath string, variables map[string]string, sourceBranch string, config *configs.Config, container *ServiceContainer) { startTime := time.Now() sourceRepo := fmt.Sprintf("%s/%s", config.RepoOwner, config.RepoName) - // Retrieve file content - use target branch or default to main - sourceBranch := target.Branch - if sourceBranch == "" { - sourceBranch = "main" - } - - fc, err := RetrieveFileContentsWithConfigAndBranch(ctx, file.Path, sourceBranch, config) + // Retrieve file content from the source commit SHA (the merge commit) + // This ensures we fetch the exact version of the file that was merged + fc, err := RetrieveFileContentsWithConfigAndBranch(ctx, file.Path, sourceCommitSHA, config) if err != nil { // Log error event container.AuditLogger.LogErrorEvent(ctx, &AuditEvent{ @@ -349,7 +421,7 @@ func handleFileCopyWithAudit(ctx context.Context, prNumber int, sourceCommitSHA fc.Name = github.String(targetPath) // Queue file for upload - queueFileForUploadWithStrategy(target, *fc, rule, variables, config, container) + queueFileForUploadWithStrategy(target, *fc, rule, variables, prNumber, sourceCommitSHA, sourceBranch, config, container) // Log successful copy event fileSize := int64(0) @@ -384,7 +456,7 @@ func handleFileCopyWithAudit(ctx context.Context, prNumber int, sourceCommitSHA // handleFileDeprecation handles file deprecation with audit logging func handleFileDeprecation(ctx context.Context, prNumber int, sourceCommitSHA string, file types.ChangedFile, - rule types.CopyRule, target types.TargetConfig, targetPath string, config *configs.Config, container *ServiceContainer) { + rule types.CopyRule, target types.TargetConfig, targetPath string, sourceBranch string, config *configs.Config, container *ServiceContainer) { sourceRepo := fmt.Sprintf("%s/%s", config.RepoOwner, config.RepoName) @@ -419,11 +491,20 @@ func handleFileDeprecation(ctx context.Context, prNumber int, sourceCommitSHA st // queueFileForUploadWithStrategy queues a file for upload with the appropriate strategy func queueFileForUploadWithStrategy(target types.TargetConfig, file github.RepositoryContent, - rule types.CopyRule, variables map[string]string, config *configs.Config, container *ServiceContainer) { + rule types.CopyRule, variables map[string]string, prNumber int, sourceCommitSHA string, sourceBranch string, config *configs.Config, container *ServiceContainer) { + + // Include rule name and commit strategy in the key to allow multiple rules + // targeting the same repo/branch with different strategies + commitStrategy := string(target.CommitStrategy.Type) + if commitStrategy == "" { + commitStrategy = "direct" // default + } key := types.UploadKey{ - RepoName: target.Repo, - BranchPath: "refs/heads/" + target.Branch, + RepoName: target.Repo, + BranchPath: "refs/heads/" + target.Branch, + RuleName: rule.Name, + CommitStrategy: commitStrategy, } // Get existing entry or create new @@ -439,12 +520,19 @@ func queueFileForUploadWithStrategy(target types.TargetConfig, file github.Repos entry.CommitStrategy = types.CommitStrategy(target.CommitStrategy.Type) entry.AutoMergePR = target.CommitStrategy.AutoMerge + // Add file to content first so we can get accurate file count + entry.Content = append(entry.Content, file) + // Render commit message and PR title using templates msgCtx := types.NewMessageContext() msgCtx.RuleName = rule.Name msgCtx.SourceRepo = fmt.Sprintf("%s/%s", config.RepoOwner, config.RepoName) + msgCtx.SourceBranch = sourceBranch msgCtx.TargetRepo = target.Repo msgCtx.TargetBranch = target.Branch + msgCtx.FileCount = len(entry.Content) + msgCtx.PRNumber = prNumber + msgCtx.CommitSHA = sourceCommitSHA msgCtx.Variables = variables if target.CommitStrategy.CommitMessage != "" { @@ -454,7 +542,6 @@ func queueFileForUploadWithStrategy(target types.TargetConfig, file github.Repos entry.PRTitle = container.MessageTemplater.RenderPRTitle(target.CommitStrategy.PRTitle, msgCtx) } - entry.Content = append(entry.Content, file) container.FileStateService.AddFileToUpload(key, entry) } diff --git a/examples-copier/services/webhook_handler_new_test.go b/examples-copier/services/webhook_handler_new_test.go new file mode 100644 index 0000000..3aad531 --- /dev/null +++ b/examples-copier/services/webhook_handler_new_test.go @@ -0,0 +1,327 @@ +package services + +import ( + "bytes" + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/google/go-github/v48/github" + "github.com/mongodb/code-example-tooling/code-copier/configs" +) + +func TestSimpleVerifySignature(t *testing.T) { + secret := []byte("test-secret") + body := []byte(`{"test": "payload"}`) + + // Generate valid signature + mac := hmac.New(sha256.New, secret) + mac.Write(body) + validSignature := "sha256=" + hex.EncodeToString(mac.Sum(nil)) + + tests := []struct { + name string + sigHeader string + body []byte + secret []byte + want bool + }{ + { + name: "valid signature", + sigHeader: validSignature, + body: body, + secret: secret, + want: true, + }, + { + name: "invalid signature", + sigHeader: "sha256=invalid", + body: body, + secret: secret, + want: false, + }, + { + name: "missing sha256 prefix", + sigHeader: "invalid", + body: body, + secret: secret, + want: false, + }, + { + name: "empty signature", + sigHeader: "", + body: body, + secret: secret, + want: false, + }, + { + name: "wrong secret", + sigHeader: validSignature, + body: body, + secret: []byte("wrong-secret"), + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := simpleVerifySignature(tt.sigHeader, tt.body, tt.secret) + if got != tt.want { + t.Errorf("simpleVerifySignature() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestHandleWebhookWithContainer_MissingEventType(t *testing.T) { + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + + AuditEnabled: false, + } + + container, err := NewServiceContainer(config) + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + payload := []byte(`{"action": "closed"}`) + req := httptest.NewRequest("POST", "/webhook", bytes.NewReader(payload)) + // Missing X-GitHub-Event header + + w := httptest.NewRecorder() + + HandleWebhookWithContainer(w, req, config, container) + + if w.Code != http.StatusBadRequest { + t.Errorf("Status code = %d, want %d", w.Code, http.StatusBadRequest) + } + + if !bytes.Contains(w.Body.Bytes(), []byte("missing event type")) { + t.Error("Expected 'missing event type' in response body") + } +} + +func TestHandleWebhookWithContainer_InvalidSignature(t *testing.T) { + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + + WebhookSecret: "test-secret", + AuditEnabled: false, + } + + container, err := NewServiceContainer(config) + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + payload := []byte(`{"action": "closed"}`) + req := httptest.NewRequest("POST", "/webhook", bytes.NewReader(payload)) + req.Header.Set("X-GitHub-Event", "pull_request") + req.Header.Set("X-Hub-Signature-256", "sha256=invalid") + + w := httptest.NewRecorder() + + HandleWebhookWithContainer(w, req, config, container) + + if w.Code != http.StatusUnauthorized { + t.Errorf("Status code = %d, want %d", w.Code, http.StatusUnauthorized) + } +} + +func TestHandleWebhookWithContainer_ValidSignature(t *testing.T) { + secret := "test-secret" + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + + WebhookSecret: secret, + AuditEnabled: false, + } + + container, err := NewServiceContainer(config) + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + // Create a valid pull_request event payload + prEvent := &github.PullRequestEvent{ + Action: github.String("opened"), + PullRequest: &github.PullRequest{ + Number: github.Int(123), + Merged: github.Bool(false), + }, + } + + payload, _ := json.Marshal(prEvent) + + // Generate valid signature + mac := hmac.New(sha256.New, []byte(secret)) + mac.Write(payload) + signature := "sha256=" + hex.EncodeToString(mac.Sum(nil)) + + req := httptest.NewRequest("POST", "/webhook", bytes.NewReader(payload)) + req.Header.Set("X-GitHub-Event", "pull_request") + req.Header.Set("X-Hub-Signature-256", signature) + + w := httptest.NewRecorder() + + HandleWebhookWithContainer(w, req, config, container) + + // Should not return unauthorized + if w.Code == http.StatusUnauthorized { + t.Error("Valid signature was rejected") + } +} + +func TestHandleWebhookWithContainer_NonPREvent(t *testing.T) { + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + + AuditEnabled: false, + } + + container, err := NewServiceContainer(config) + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + // Create a push event (not a PR event) + pushEvent := map[string]interface{}{ + "ref": "refs/heads/main", + } + payload, _ := json.Marshal(pushEvent) + + req := httptest.NewRequest("POST", "/webhook", bytes.NewReader(payload)) + req.Header.Set("X-GitHub-Event", "push") + + w := httptest.NewRecorder() + + HandleWebhookWithContainer(w, req, config, container) + + // Should return 204 No Content for non-PR events + if w.Code != http.StatusNoContent { + t.Errorf("Status code = %d, want %d", w.Code, http.StatusNoContent) + } +} + +func TestHandleWebhookWithContainer_NonMergedPR(t *testing.T) { + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + + AuditEnabled: false, + } + + container, err := NewServiceContainer(config) + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + // Create a PR event that's not merged + prEvent := &github.PullRequestEvent{ + Action: github.String("opened"), + PullRequest: &github.PullRequest{ + Number: github.Int(123), + Merged: github.Bool(false), + }, + } + payload, _ := json.Marshal(prEvent) + + req := httptest.NewRequest("POST", "/webhook", bytes.NewReader(payload)) + req.Header.Set("X-GitHub-Event", "pull_request") + + w := httptest.NewRecorder() + + HandleWebhookWithContainer(w, req, config, container) + + // Should return 204 No Content for non-merged PRs + if w.Code != http.StatusNoContent { + t.Errorf("Status code = %d, want %d", w.Code, http.StatusNoContent) + } +} + +func TestHandleWebhookWithContainer_MergedPR(t *testing.T) { + config := &configs.Config{ + RepoOwner: "test-owner", + RepoName: "test-repo", + + AuditEnabled: false, + } + + container, err := NewServiceContainer(config) + if err != nil { + t.Fatalf("NewServiceContainer() error = %v", err) + } + + // Create a merged PR event + prEvent := &github.PullRequestEvent{ + Action: github.String("closed"), + PullRequest: &github.PullRequest{ + Number: github.Int(123), + Merged: github.Bool(true), + MergeCommitSHA: github.String("abc123"), + }, + Repo: &github.Repository{ + Name: github.String("test-repo"), + Owner: &github.User{ + Login: github.String("test-owner"), + }, + }, + } + payload, _ := json.Marshal(prEvent) + + req := httptest.NewRequest("POST", "/webhook", bytes.NewReader(payload)) + req.Header.Set("X-GitHub-Event", "pull_request") + + w := httptest.NewRecorder() + + HandleWebhookWithContainer(w, req, config, container) + + // Should return 202 Accepted for merged PRs + if w.Code != http.StatusAccepted { + t.Errorf("Status code = %d, want %d", w.Code, http.StatusAccepted) + } + + // Check response body + var response map[string]string + json.Unmarshal(w.Body.Bytes(), &response) + if response["status"] != "accepted" { + t.Errorf("Response status = %v, want accepted", response["status"]) + } +} + +func TestRetrieveFileContentsWithConfigAndBranch(t *testing.T) { + // This test would require mocking the GitHub client + // For now, we document the expected behavior + t.Skip("Skipping test that requires GitHub API mocking") + + // Expected behavior: + // - Should call client.Repositories.GetContents with correct parameters + // - Should use the specified branch in RepositoryContentGetOptions + // - Should return file content on success + // - Should return error on failure +} + +func TestMaxWebhookBodyBytes(t *testing.T) { + // Verify the constant is set correctly + expected := 1 << 20 // 1MB + if maxWebhookBodyBytes != expected { + t.Errorf("maxWebhookBodyBytes = %d, want %d", maxWebhookBodyBytes, expected) + } +} + +func TestStatusDeleted(t *testing.T) { + // Verify the constant is set correctly + if statusDeleted != "DELETED" { + t.Errorf("statusDeleted = %s, want DELETED", statusDeleted) + } +} + diff --git a/examples-copier/types/types.go b/examples-copier/types/types.go index 33fbb7c..6fb5b4c 100644 --- a/examples-copier/types/types.go +++ b/examples-copier/types/types.go @@ -97,8 +97,10 @@ type DeprecatedFileEntry struct { // **** UPLOAD TYPES **** // type UploadKey struct { - RepoName string `json:"repo_name"` - BranchPath string `json:"branch_path"` + RepoName string `json:"repo_name"` + BranchPath string `json:"branch_path"` + RuleName string `json:"rule_name"` // Include rule name to allow multiple rules targeting same repo/branch + CommitStrategy string `json:"commit_strategy"` // Include strategy to differentiate direct vs PR } type UploadFileContent struct { From e1900d11c1842485a3c96596c1ff34ffe1cc89fc Mon Sep 17 00:00:00 2001 From: cbullinger Date: Wed, 15 Oct 2025 15:32:39 -0400 Subject: [PATCH 09/11] delete unused backup file --- github-metrics/index.js.bak | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 github-metrics/index.js.bak diff --git a/github-metrics/index.js.bak b/github-metrics/index.js.bak deleted file mode 100644 index 7e5bf63..0000000 --- a/github-metrics/index.js.bak +++ /dev/null @@ -1,30 +0,0 @@ -import { getGitHubMetrics } from "./get-github-metrics.js"; -import { addMetricsToAtlas } from "./write-to-db.js"; - -/* To change which repos to track metrics for, update the `repos` array before running the utility. -To track metrics for a new repo, set the owner and name first. -You can get the owner and name from the repo URL: `https://github.com//` -For example, to add `https://github.com/mongodb/docs-notebooks`, set `mongodb` as the -owner and `docs-notebooks` as the repo name. -NOTE: The GitHub token used to retrieve the info from a repo MUST have repo admin permissions to access all the endpoints in this code. */ - -class RepoDetails { - constructor(owner, repo) { - this.owner = owner; // the GitHub organization or member who owns the repo - this.repo = repo; // the name of the repo within the organization or member - } -} - -const docsNotebooksRepo = new RepoDetails("mongodb", "docs-notebooks"); -const atlasArchitectureGoSdkRepo = new RepoDetails("mongodb", "atlas-architecture-go-sdk"); - -const repos = [docsNotebooksRepo, atlasArchitectureGoSdkRepo]; - -const metricsDocs = []; - -for (const repo of repos) { - const metricsDoc = await getGitHubMetrics(repo.owner, repo.repo); - metricsDocs.push(metricsDoc); -} - -await addMetricsToAtlas(metricsDocs); From edae0b78eb49d8873ee83006e33300a7006ece75 Mon Sep 17 00:00:00 2001 From: cbullinger Date: Wed, 15 Oct 2025 15:49:08 -0400 Subject: [PATCH 10/11] Fix: compilation errors --- examples-copier/services/github_read_test.go | 12 ++++++++++-- .../services/github_write_to_target_test.go | 19 ++++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/examples-copier/services/github_read_test.go b/examples-copier/services/github_read_test.go index 7b45917..5bc0bcc 100644 --- a/examples-copier/services/github_read_test.go +++ b/examples-copier/services/github_read_test.go @@ -5,9 +5,7 @@ import ( "testing" "github.com/google/go-github/v48/github" - "github.com/mongodb/code-example-tooling/code-copier/configs" "github.com/mongodb/code-example-tooling/code-copier/services" - "github.com/mongodb/code-example-tooling/code-copier/types" "github.com/stretchr/testify/require" test "github.com/mongodb/code-example-tooling/code-copier/tests" @@ -29,6 +27,13 @@ func stubContentsForBothOwners(path, contentB64 string, owner, repo string) { test.MockContentsEndpoint("REPO_OWNER", "REPO_NAME", path, contentB64) } +// LEGACY TESTS - These tests are for legacy code that was removed in commit a64726c +// The RetrieveAndParseConfigFile function was removed as part of the migration to YAML config +// and the new pattern-matching system. These tests are commented out but kept for reference. +// +// If you need to test config loading, see config_loader_test.go for the new YAML-based system. + +/* func TestRetrieveAndParseConfigFile_Valid(t *testing.T) { _ = test.WithHTTPMock(t) owner, repo := ensureEnv(t) @@ -84,6 +89,7 @@ func TestRetrieveAndParseConfigFile_InvalidJSON(t *testing.T) { require.Error(t, err, "invalid JSON must return an error") require.Nil(t, got) } +*/ func TestRetrieveFileContents_Success(t *testing.T) { _ = test.WithHTTPMock(t) @@ -101,6 +107,7 @@ func TestRetrieveFileContents_Success(t *testing.T) { require.Contains(t, *rc.Content, b64(payload)) } +/* // Test that Retrieve and Parse round-trips with one entry func TestRetrieveAndParseConfigFile_RoundTripMinimal(t *testing.T) { _ = test.WithHTTPMock(t) @@ -137,3 +144,4 @@ func TestRetrieveAndParseConfigFile_RoundTripMinimal(t *testing.T) { require.Equal(t, min[0].TargetDirectory, got[0].TargetDirectory) require.Equal(t, min[0].RecursiveCopy, got[0].RecursiveCopy) } +*/ diff --git a/examples-copier/services/github_write_to_target_test.go b/examples-copier/services/github_write_to_target_test.go index a3446c4..ecd58e9 100644 --- a/examples-copier/services/github_write_to_target_test.go +++ b/examples-copier/services/github_write_to_target_test.go @@ -57,6 +57,14 @@ func TestMain(m *testing.M) { os.Exit(code) } +// LEGACY TESTS - These tests are for legacy code that was removed in commit a64726c +// The AddToRepoAndFilesMap and IterateFilesForCopy functions were removed as part of the +// migration to the new pattern-matching system. These tests are commented out but kept for reference. +// +// The new system uses pattern matching rules defined in YAML config files. +// See pattern_matcher_test.go for tests of the new system. + +/* func TestAddToRepoAndFilesMap_NewEntry(t *testing.T) { services.FilesToUpload = nil @@ -209,6 +217,7 @@ func TestIterateFilesForCopy_RecursiveVsNonRecursive(t *testing.T) { }) } } +*/ func TestAddFilesToTargetRepoBranch_Succeeds(t *testing.T) { _ = test.WithHTTPMock(t) @@ -539,10 +548,10 @@ func TestPriority_Strategy_ConfigOverridesEnv_And_MessageFallbacks(t *testing.T) } services.FilesToUpload = map[types.UploadKey]types.UploadFileContent{ - {RepoName: repo, BranchPath: "refs/heads/" + baseBranch}: {TargetBranch: baseBranch, Content: files}, + {RepoName: repo, BranchPath: "refs/heads/" + baseBranch, CommitStrategy: cfg.CopierCommitStrategy}: {TargetBranch: baseBranch, Content: files}, } - services.AddFilesToTargetRepoBranch(types.ConfigFileType{cfg}) + services.AddFilesToTargetRepoBranch() // No longer takes parameters - uses FilesToUpload map info := httpmock.GetCallCountInfo() require.Equal(t, 1, info["GET "+baseRefURL]) @@ -615,10 +624,10 @@ func TestPriority_PRTitleDefaultsToCommitMessage_And_NoAutoMergeWhenConfigPresen Name: github.String("only.txt"), Path: github.String("only.txt"), Content: github.String(base64.StdEncoding.EncodeToString([]byte("y"))), }} - cfg := types.Configs{TargetRepo: repo, TargetBranch: baseBranch /* MergeWithoutReview: false (zero value) */} - services.FilesToUpload = map[types.UploadKey]types.UploadFileContent{{RepoName: repo, BranchPath: "refs/heads/" + baseBranch, RuleName: "", CommitStrategy: ""}: {TargetBranch: baseBranch, Content: files}} + // cfg := types.Configs{TargetRepo: repo, TargetBranch: baseBranch /* MergeWithoutReview: false (zero value) */} + services.FilesToUpload = map[types.UploadKey]types.UploadFileContent{{RepoName: repo, BranchPath: "refs/heads/" + baseBranch, RuleName: "", CommitStrategy: "pr"}: {TargetBranch: baseBranch, Content: files}} - services.AddFilesToTargetRepoBranch(types.ConfigFileType{cfg}) + services.AddFilesToTargetRepoBranch() // No longer takes parameters - uses FilesToUpload map // Ensure a PR was created but no merge occurred require.Equal(t, 1, test.CountByMethodAndURLRegexp("POST", regexp.MustCompile(`/pulls$`))) From 78646872f13d13b422a846473ddce4466653286f Mon Sep 17 00:00:00 2001 From: cbullinger Date: Wed, 15 Oct 2025 17:09:39 -0400 Subject: [PATCH 11/11] Apply Dachary feedback --- .../MULTI-SOURCE-IMPLEMENTATION-PLAN.md | 514 ++++++++++++++ .../MULTI-SOURCE-MIGRATION-GUIDE.md | 435 ++++++++++++ .../MULTI-SOURCE-QUICK-REFERENCE.md | 532 +++++++++++++++ .../docs/multi-source/MULTI-SOURCE-README.md | 314 +++++++++ .../docs/multi-source/MULTI-SOURCE-SUMMARY.md | 405 +++++++++++ .../MULTI-SOURCE-TECHNICAL-SPEC.md | 646 ++++++++++++++++++ examples-copier/docs/multi-source/README.md | 217 ++++++ examples-copier/services/github_auth_test.go | 2 +- .../services/github_write_to_source_test.go | 18 +- 9 files changed, 3073 insertions(+), 10 deletions(-) create mode 100644 examples-copier/docs/multi-source/MULTI-SOURCE-IMPLEMENTATION-PLAN.md create mode 100644 examples-copier/docs/multi-source/MULTI-SOURCE-MIGRATION-GUIDE.md create mode 100644 examples-copier/docs/multi-source/MULTI-SOURCE-QUICK-REFERENCE.md create mode 100644 examples-copier/docs/multi-source/MULTI-SOURCE-README.md create mode 100644 examples-copier/docs/multi-source/MULTI-SOURCE-SUMMARY.md create mode 100644 examples-copier/docs/multi-source/MULTI-SOURCE-TECHNICAL-SPEC.md create mode 100644 examples-copier/docs/multi-source/README.md diff --git a/examples-copier/docs/multi-source/MULTI-SOURCE-IMPLEMENTATION-PLAN.md b/examples-copier/docs/multi-source/MULTI-SOURCE-IMPLEMENTATION-PLAN.md new file mode 100644 index 0000000..e3e0bde --- /dev/null +++ b/examples-copier/docs/multi-source/MULTI-SOURCE-IMPLEMENTATION-PLAN.md @@ -0,0 +1,514 @@ +# Multi-Source Repository Support - Implementation Plan + +## Executive Summary + +This document outlines the implementation plan for adding support for multiple source repositories to the examples-copier application. Currently, the application supports only a single source repository defined in the configuration. This enhancement will allow the copier to monitor and process webhooks from multiple source repositories, each with their own copy rules and configurations. + +## Current Architecture Analysis + +### Current Limitations + +1. **Single Source Repository**: The configuration schema (`YAMLConfig`) has a single `source_repo` and `source_branch` field at the root level +2. **Hardcoded Repository Context**: Environment variables `REPO_OWNER` and `REPO_NAME` are set globally and used throughout the codebase +3. **Webhook Validation**: The webhook handler validates that incoming webhooks match the configured `source_repo` (lines 228-236 in `webhook_handler_new.go`) +4. **Config File Location**: Configuration is fetched from the single source repository defined in environment variables +5. **GitHub App Installation**: Single installation ID is configured globally + +### Current Flow + +``` +Webhook Received → Validate Source Repo → Load Config from Source Repo → Process Files → Copy to Targets +``` + +## Proposed Architecture + +### New Multi-Source Flow + +``` +Webhook Received → Identify Source Repo → Load Config for That Source → Process Files → Copy to Targets +``` + +### Key Design Decisions + +1. **Configuration Storage**: Support both centralized (single config file) and distributed (per-repo config) approaches +2. **Backward Compatibility**: Maintain support for existing single-source configurations +3. **GitHub App Installations**: Support multiple installation IDs for different organizations +4. **Config Discovery**: Allow configs to be stored in a central location or in each source repository + +## Implementation Tasks + +### 1. Configuration Schema Updates + +**Files to Modify:** +- `types/config.go` +- `configs/copier-config.example.yaml` + +**Changes:** + +#### Option A: Centralized Multi-Source Config (Recommended) +```yaml +# New schema supporting multiple sources +sources: + - repo: "mongodb/docs-code-examples" + branch: "main" + installation_id: "12345678" # Optional, falls back to default + copy_rules: + - name: "go-examples" + source_pattern: + type: "prefix" + pattern: "examples/go/" + targets: + - repo: "mongodb/docs" + branch: "main" + path_transform: "code/${path}" + commit_strategy: + type: "direct" + + - repo: "mongodb/atlas-examples" + branch: "main" + installation_id: "87654321" # Different installation for different org + copy_rules: + - name: "atlas-cli-examples" + source_pattern: + type: "glob" + pattern: "cli/**/*.go" + targets: + - repo: "mongodb/atlas-cli" + branch: "main" + path_transform: "examples/${filename}" + commit_strategy: + type: "pull_request" + pr_title: "Update examples" + auto_merge: false + +# Global defaults (optional) +defaults: + commit_strategy: + type: "pull_request" + auto_merge: false + deprecation_check: + enabled: true + file: "deprecated_examples.json" +``` + +#### Option B: Backward Compatible (Single Source at Root) +```yaml +# Backward compatible - if source_repo exists at root, treat as single source +source_repo: "mongodb/docs-code-examples" +source_branch: "main" +copy_rules: + - name: "example" + # ... existing structure + +# OR use new multi-source structure +sources: + - repo: "mongodb/docs-code-examples" + # ... as above +``` + +**New Types:** +```go +// MultiSourceConfig represents configuration for multiple source repositories +type MultiSourceConfig struct { + Sources []SourceConfig `yaml:"sources" json:"sources"` + Defaults *DefaultsConfig `yaml:"defaults,omitempty" json:"defaults,omitempty"` +} + +// SourceConfig represents a single source repository configuration +type SourceConfig struct { + Repo string `yaml:"repo" json:"repo"` + Branch string `yaml:"branch" json:"branch"` + InstallationID string `yaml:"installation_id,omitempty" json:"installation_id,omitempty"` + ConfigFile string `yaml:"config_file,omitempty" json:"config_file,omitempty"` // For distributed configs + CopyRules []CopyRule `yaml:"copy_rules" json:"copy_rules"` +} + +// DefaultsConfig provides default values for all sources +type DefaultsConfig struct { + CommitStrategy *CommitStrategyConfig `yaml:"commit_strategy,omitempty" json:"commit_strategy,omitempty"` + DeprecationCheck *DeprecationConfig `yaml:"deprecation_check,omitempty" json:"deprecation_check,omitempty"` +} + +// Update YAMLConfig to support both formats +type YAMLConfig struct { + // Legacy single-source fields (for backward compatibility) + SourceRepo string `yaml:"source_repo,omitempty" json:"source_repo,omitempty"` + SourceBranch string `yaml:"source_branch,omitempty" json:"source_branch,omitempty"` + CopyRules []CopyRule `yaml:"copy_rules,omitempty" json:"copy_rules,omitempty"` + + // New multi-source fields + Sources []SourceConfig `yaml:"sources,omitempty" json:"sources,omitempty"` + Defaults *DefaultsConfig `yaml:"defaults,omitempty" json:"defaults,omitempty"` +} +``` + +### 2. Configuration Loading & Validation + +**Files to Modify:** +- `services/config_loader.go` + +**Changes:** + +1. **Add Config Discovery Method**: +```go +// ConfigDiscovery determines where to load config from +type ConfigDiscovery interface { + // DiscoverConfig finds the config for a given source repository + DiscoverConfig(ctx context.Context, repoOwner, repoName string) (*SourceConfig, error) +} +``` + +2. **Update LoadConfig Method**: +```go +// LoadConfigForSource loads configuration for a specific source repository +func (cl *DefaultConfigLoader) LoadConfigForSource(ctx context.Context, repoOwner, repoName string, config *configs.Config) (*SourceConfig, error) { + // Load the main config (centralized or from the source repo) + yamlConfig, err := cl.LoadConfig(ctx, config) + if err != nil { + return nil, err + } + + // Find the matching source configuration + sourceRepo := fmt.Sprintf("%s/%s", repoOwner, repoName) + sourceConfig := findSourceConfig(yamlConfig, sourceRepo) + if sourceConfig == nil { + return nil, fmt.Errorf("no configuration found for source repository: %s", sourceRepo) + } + + return sourceConfig, nil +} + +// findSourceConfig searches for a source repo in the config +func findSourceConfig(config *YAMLConfig, sourceRepo string) *SourceConfig { + // Check if using legacy single-source format + if config.SourceRepo != "" && config.SourceRepo == sourceRepo { + return &SourceConfig{ + Repo: config.SourceRepo, + Branch: config.SourceBranch, + CopyRules: config.CopyRules, + } + } + + // Search in multi-source format + for _, source := range config.Sources { + if source.Repo == sourceRepo { + return &source + } + } + + return nil +} +``` + +3. **Add Validation for Multi-Source**: +```go +func (c *YAMLConfig) Validate() error { + // Check if using legacy or new format + isLegacy := c.SourceRepo != "" + isMultiSource := len(c.Sources) > 0 + + if isLegacy && isMultiSource { + return fmt.Errorf("cannot use both legacy (source_repo) and new (sources) format") + } + + if !isLegacy && !isMultiSource { + return fmt.Errorf("must specify either source_repo or sources") + } + + if isLegacy { + return c.validateLegacyFormat() + } + + return c.validateMultiSourceFormat() +} + +func (c *YAMLConfig) validateMultiSourceFormat() error { + if len(c.Sources) == 0 { + return fmt.Errorf("at least one source repository is required") + } + + // Check for duplicate source repos + seen := make(map[string]bool) + for i, source := range c.Sources { + if source.Repo == "" { + return fmt.Errorf("sources[%d]: repo is required", i) + } + if seen[source.Repo] { + return fmt.Errorf("sources[%d]: duplicate source repository: %s", i, source.Repo) + } + seen[source.Repo] = true + + if err := validateSourceConfig(&source); err != nil { + return fmt.Errorf("sources[%d]: %w", i, err) + } + } + + return nil +} +``` + +### 3. Webhook Routing Logic + +**Files to Modify:** +- `services/webhook_handler_new.go` +- `services/github_auth.go` + +**Changes:** + +1. **Update Webhook Handler**: +```go +// handleMergedPRWithContainer processes a merged PR using the new pattern matching system +func handleMergedPRWithContainer(ctx context.Context, prNumber int, sourceCommitSHA string, repoOwner string, repoName string, config *configs.Config, container *ServiceContainer) { + startTime := time.Now() + + // Configure GitHub permissions for the source repository + if InstallationAccessToken == "" { + ConfigurePermissions() + } + + // Update config with actual repository from webhook + config.RepoOwner = repoOwner + config.RepoName = repoName + + // Load configuration for this specific source repository + sourceConfig, err := container.ConfigLoader.LoadConfigForSource(ctx, repoOwner, repoName, config) + if err != nil { + LogAndReturnError(ctx, "config_load", fmt.Sprintf("no configuration found for source repo %s/%s", repoOwner, repoName), err) + container.MetricsCollector.RecordWebhookFailed() + + container.SlackNotifier.NotifyError(ctx, &ErrorEvent{ + Operation: "config_load", + Error: err, + PRNumber: prNumber, + SourceRepo: fmt.Sprintf("%s/%s", repoOwner, repoName), + }) + return + } + + // Switch GitHub installation if needed + if sourceConfig.InstallationID != "" && sourceConfig.InstallationID != config.InstallationId { + if err := switchGitHubInstallation(sourceConfig.InstallationID); err != nil { + LogAndReturnError(ctx, "installation_switch", "failed to switch GitHub installation", err) + container.MetricsCollector.RecordWebhookFailed() + return + } + } + + // Continue with existing processing logic... + // Process files with pattern matching for this source + processFilesWithPatternMatching(ctx, prNumber, sourceCommitSHA, changedFiles, sourceConfig, config, container) +} +``` + +2. **Add Installation Switching**: +```go +// switchGitHubInstallation switches to a different GitHub App installation +func switchGitHubInstallation(installationID string) error { + // Save current installation ID + previousInstallationID := os.Getenv(configs.InstallationId) + + // Set new installation ID + os.Setenv(configs.InstallationId, installationID) + + // Clear cached token to force re-authentication + InstallationAccessToken = "" + + // Re-configure permissions with new installation + ConfigurePermissions() + + LogInfo(fmt.Sprintf("Switched GitHub installation from %s to %s", previousInstallationID, installationID)) + return nil +} +``` + +### 4. GitHub App Installation Support + +**Files to Modify:** +- `configs/environment.go` +- `services/github_auth.go` + +**Changes:** + +1. **Support Multiple Installation IDs**: +```go +// Config struct update +type Config struct { + // ... existing fields + + // Multi-installation support + InstallationId string // Default installation ID + InstallationMapping map[string]string // Map of repo -> installation_id +} + +// Load installation mapping from environment or config +func (c *Config) GetInstallationID(repo string) string { + if id, ok := c.InstallationMapping[repo]; ok { + return id + } + return c.InstallationId // fallback to default +} +``` + +2. **Update Authentication**: +```go +// ConfigurePermissionsForRepo configures GitHub permissions for a specific repository +func ConfigurePermissionsForRepo(installationID string) error { + if installationID == "" { + return fmt.Errorf("installation ID is required") + } + + // Use the provided installation ID + token, err := generateInstallationToken(installationID) + if err != nil { + return fmt.Errorf("failed to generate installation token: %w", err) + } + + InstallationAccessToken = token + return nil +} +``` + +### 5. Metrics & Audit Logging Updates + +**Files to Modify:** +- `services/health_metrics.go` +- `services/audit_logger.go` + +**Changes:** + +1. **Add Source Repository to Metrics**: +```go +// MetricsCollector update +type MetricsCollector struct { + // ... existing fields + + // Per-source metrics + webhooksBySource map[string]int64 + filesBySource map[string]int64 + uploadsBySource map[string]int64 + mu sync.RWMutex +} + +func (mc *MetricsCollector) RecordWebhookReceivedForSource(sourceRepo string) { + mc.mu.Lock() + defer mc.mu.Unlock() + mc.webhooksReceived++ + mc.webhooksBySource[sourceRepo]++ +} + +func (mc *MetricsCollector) GetMetricsBySource() map[string]SourceMetrics { + mc.mu.RLock() + defer mc.mu.RUnlock() + + result := make(map[string]SourceMetrics) + for source := range mc.webhooksBySource { + result[source] = SourceMetrics{ + Webhooks: mc.webhooksBySource[source], + Files: mc.filesBySource[source], + Uploads: mc.uploadsBySource[source], + } + } + return result +} +``` + +2. **Update Audit Events**: +```go +// AuditEvent already has SourceRepo field, just ensure it's populated correctly +// in all logging calls with the actual source repository +``` + +### 6. Documentation Updates + +**Files to Create/Modify:** +- `docs/MULTI-SOURCE-GUIDE.md` (new) +- `docs/CONFIGURATION-GUIDE.md` (update) +- `README.md` (update) +- `configs/copier-config.example.yaml` (update with multi-source example) + +### 7. Testing & Validation + +**Files to Create:** +- `services/config_loader_multi_test.go` +- `services/webhook_handler_multi_test.go` +- `test-payloads/multi-source-webhook.json` + +**Test Scenarios:** +1. Load multi-source configuration +2. Validate configuration with multiple sources +3. Route webhook to correct source configuration +4. Handle missing source repository gracefully +5. Switch between GitHub installations +6. Backward compatibility with single-source configs + +### 8. Migration Guide & Backward Compatibility + +**Backward Compatibility Strategy:** + +1. **Auto-detect Format**: Check if `source_repo` exists at root level +2. **Convert Legacy to New**: Internally convert single-source to multi-source format +3. **Validation**: Ensure both formats validate correctly +4. **Migration Tool**: Provide CLI command to convert configs + +```bash +# Convert legacy config to multi-source format +./config-validator convert-to-multi-source -input copier-config.yaml -output copier-config-multi.yaml +``` + +## Implementation Phases + +### Phase 1: Core Infrastructure (Week 1) +- [ ] Update configuration schema +- [ ] Implement config loading for multiple sources +- [ ] Add validation for multi-source configs +- [ ] Ensure backward compatibility + +### Phase 2: Webhook Routing (Week 2) +- [ ] Implement webhook routing logic +- [ ] Add GitHub installation switching +- [ ] Update authentication handling +- [ ] Test with multiple source repos + +### Phase 3: Observability (Week 3) +- [ ] Update metrics collection +- [ ] Enhance audit logging +- [ ] Add per-source monitoring +- [ ] Update health endpoints + +### Phase 4: Documentation & Testing (Week 4) +- [ ] Write comprehensive documentation +- [ ] Create migration guide +- [ ] Add unit and integration tests +- [ ] Perform end-to-end testing + +## Risks & Mitigation + +### Risk 1: Breaking Changes +**Mitigation**: Maintain full backward compatibility with legacy single-source format + +### Risk 2: GitHub Rate Limits +**Mitigation**: Implement per-source rate limiting and monitoring + +### Risk 3: Configuration Complexity +**Mitigation**: Provide clear examples, templates, and validation tools + +### Risk 4: Installation Token Management +**Mitigation**: Implement proper token caching and refresh logic per installation + +## Success Criteria + +1. ✅ Support multiple source repositories in a single deployment +2. ✅ Maintain 100% backward compatibility with existing configs +3. ✅ No performance degradation for single-source use cases +4. ✅ Clear documentation and migration path +5. ✅ Comprehensive test coverage (>80%) +6. ✅ Successful deployment with 2+ source repositories + +## Future Enhancements + +1. **Dynamic Config Reloading**: Reload configuration without restart +2. **Per-Source Webhooks**: Different webhook endpoints for different sources +3. **Source Repository Discovery**: Auto-discover repositories with copier configs +4. **Config Validation API**: REST API for validating configurations +5. **Multi-Tenant Support**: Support multiple organizations with isolated configs + diff --git a/examples-copier/docs/multi-source/MULTI-SOURCE-MIGRATION-GUIDE.md b/examples-copier/docs/multi-source/MULTI-SOURCE-MIGRATION-GUIDE.md new file mode 100644 index 0000000..94ac0a3 --- /dev/null +++ b/examples-copier/docs/multi-source/MULTI-SOURCE-MIGRATION-GUIDE.md @@ -0,0 +1,435 @@ +# Migration Guide: Single Source to Multi-Source Configuration + +This guide helps you migrate from the legacy single-source configuration format to the new multi-source format. + +## Table of Contents + +- [Overview](#overview) +- [Backward Compatibility](#backward-compatibility) +- [Migration Steps](#migration-steps) +- [Configuration Comparison](#configuration-comparison) +- [Testing Your Migration](#testing-your-migration) +- [Rollback Plan](#rollback-plan) +- [FAQ](#faq) + +## Overview + +The multi-source feature allows the examples-copier to monitor and process webhooks from multiple source repositories in a single deployment. This eliminates the need to run separate copier instances for different source repositories. + +### Benefits of Multi-Source + +- **Simplified Deployment**: One instance handles multiple source repositories +- **Centralized Configuration**: Manage all copy rules in one place +- **Better Resource Utilization**: Shared infrastructure for all sources +- **Consistent Monitoring**: Unified metrics and audit logging +- **Cross-Organization Support**: Handle repos from different GitHub organizations + +## Backward Compatibility + +**Good News**: The new multi-source format is 100% backward compatible with existing configurations. + +- ✅ Existing single-source configs continue to work without changes +- ✅ No breaking changes to the configuration schema +- ✅ Automatic detection of legacy vs. new format +- ✅ Gradual migration path available + +## Migration Steps + +### Step 1: Assess Your Current Setup + +First, identify all the source repositories you're currently monitoring: + +```bash +# List all your current copier deployments +# Each deployment typically monitors one source repository +``` + +**Example Current State:** +- Deployment 1: Monitors `mongodb/docs-code-examples` +- Deployment 2: Monitors `mongodb/atlas-examples` +- Deployment 3: Monitors `10gen/internal-examples` + +### Step 2: Backup Current Configuration + +```bash +# Backup your current configuration +cp copier-config.yaml copier-config.yaml.backup + +# Backup environment variables +cp .env .env.backup +``` + +### Step 3: Convert Configuration Format + +#### Option A: Manual Conversion + +**Before (Single Source):** +```yaml +source_repo: "mongodb/docs-code-examples" +source_branch: "main" + +copy_rules: + - name: "go-examples" + source_pattern: + type: "prefix" + pattern: "examples/go/" + targets: + - repo: "mongodb/docs" + branch: "main" + path_transform: "code/go/${path}" + commit_strategy: + type: "pull_request" + pr_title: "Update Go examples" + auto_merge: false +``` + +**After (Multi-Source):** +```yaml +sources: + - repo: "mongodb/docs-code-examples" + branch: "main" + # Optional: Add installation_id if different from default + # installation_id: "12345678" + + copy_rules: + - name: "go-examples" + source_pattern: + type: "prefix" + pattern: "examples/go/" + targets: + - repo: "mongodb/docs" + branch: "main" + path_transform: "code/go/${path}" + commit_strategy: + type: "pull_request" + pr_title: "Update Go examples" + auto_merge: false +``` + +#### Option B: Automated Conversion (Recommended) + +Use the config-validator tool to automatically convert your configuration: + +```bash +# Convert single-source to multi-source format +./config-validator convert-to-multi-source \ + -input copier-config.yaml \ + -output copier-config-multi.yaml + +# Validate the new configuration +./config-validator validate -config copier-config-multi.yaml -v +``` + +### Step 4: Consolidate Multiple Deployments + +If you have multiple copier deployments, consolidate them into one multi-source config: + +```yaml +sources: + # Source 1: From deployment 1 + - repo: "mongodb/docs-code-examples" + branch: "main" + installation_id: "12345678" + copy_rules: + # ... copy rules from deployment 1 + + # Source 2: From deployment 2 + - repo: "mongodb/atlas-examples" + branch: "main" + installation_id: "87654321" + copy_rules: + # ... copy rules from deployment 2 + + # Source 3: From deployment 3 + - repo: "10gen/internal-examples" + branch: "main" + installation_id: "11223344" + copy_rules: + # ... copy rules from deployment 3 +``` + +### Step 5: Update Environment Variables + +Update your `.env` file to support multiple installations: + +```bash +# Before (single installation) +INSTALLATION_ID=12345678 + +# After (default installation + optional per-source) +INSTALLATION_ID=12345678 # Default/fallback installation ID + +# Note: Per-source installation IDs are now in the config file +# under each source's installation_id field +``` + +### Step 6: Update GitHub App Installations + +Ensure your GitHub App is installed on all source repositories: + +1. Go to your GitHub App settings +2. Install the app on each source repository's organization +3. Note the installation ID for each organization +4. Add installation IDs to your config file + +```bash +# Get installation IDs +curl -H "Authorization: Bearer YOUR_JWT_TOKEN" \ + https://api.github.com/app/installations +``` + +### Step 7: Validate Configuration + +Before deploying, validate your new configuration: + +```bash +# Validate configuration syntax and logic +./config-validator validate -config copier-config-multi.yaml -v + +# Test pattern matching +./config-validator test-pattern \ + -config copier-config-multi.yaml \ + -source "mongodb/docs-code-examples" \ + -file "examples/go/main.go" + +# Dry-run test +./examples-copier -config copier-config-multi.yaml -dry-run +``` + +### Step 8: Deploy and Test + +1. **Deploy to staging first**: +```bash +# Deploy to staging environment +gcloud app deploy --project=your-staging-project +``` + +2. **Test with real webhooks**: +```bash +# Use the test-webhook tool +./test-webhook -config copier-config-multi.yaml \ + -payload test-payloads/example-pr-merged.json +``` + +3. **Monitor logs**: +```bash +# Watch application logs +gcloud app logs tail -s default +``` + +4. **Verify metrics**: +```bash +# Check health endpoint +curl https://your-app.appspot.com/health + +# Check metrics endpoint +curl https://your-app.appspot.com/metrics +``` + +### Step 9: Production Deployment + +Once validated in staging: + +```bash +# Deploy to production +gcloud app deploy --project=your-production-project + +# Monitor for issues +gcloud app logs tail -s default --project=your-production-project +``` + +### Step 10: Decommission Old Deployments + +After confirming the multi-source deployment works: + +1. Monitor for 24-48 hours +2. Verify all source repositories are being processed +3. Check audit logs for any errors +4. Decommission old single-source deployments + +## Configuration Comparison + +### Single Source (Legacy) + +```yaml +source_repo: "mongodb/docs-code-examples" +source_branch: "main" + +copy_rules: + - name: "example-rule" + source_pattern: + type: "prefix" + pattern: "examples/" + targets: + - repo: "mongodb/docs" + branch: "main" + path_transform: "code/${path}" + commit_strategy: + type: "direct" +``` + +### Multi-Source (New) + +```yaml +sources: + - repo: "mongodb/docs-code-examples" + branch: "main" + installation_id: "12345678" # Optional + copy_rules: + - name: "example-rule" + source_pattern: + type: "prefix" + pattern: "examples/" + targets: + - repo: "mongodb/docs" + branch: "main" + path_transform: "code/${path}" + commit_strategy: + type: "direct" + +# Optional: Global defaults +defaults: + commit_strategy: + type: "pull_request" + auto_merge: false + deprecation_check: + enabled: true +``` + +### Hybrid (Both Formats Supported) + +The application automatically detects which format you're using: + +```go +// Automatic detection logic +if config.SourceRepo != "" { + // Legacy single-source format + processSingleSource(config) +} else if len(config.Sources) > 0 { + // New multi-source format + processMultiSource(config) +} +``` + +## Testing Your Migration + +### Test Checklist + +- [ ] Configuration validates successfully +- [ ] Pattern matching works for all sources +- [ ] Path transformations are correct +- [ ] Webhooks route to correct source config +- [ ] GitHub authentication works for all installations +- [ ] Files are copied to correct target repositories +- [ ] Deprecation tracking works (if enabled) +- [ ] Metrics show data for all sources +- [ ] Audit logs contain source repository info +- [ ] Slack notifications work (if enabled) + +### Test Commands + +```bash +# 1. Validate configuration +./config-validator validate -config copier-config-multi.yaml -v + +# 2. Test pattern matching for each source +./config-validator test-pattern \ + -config copier-config-multi.yaml \ + -source "mongodb/docs-code-examples" \ + -file "examples/go/main.go" + +# 3. Dry-run mode +DRY_RUN=true ./examples-copier -config copier-config-multi.yaml + +# 4. Test with webhook payload +./test-webhook -config copier-config-multi.yaml \ + -payload test-payloads/multi-source-webhook.json + +# 5. Check health +curl http://localhost:8080/health + +# 6. Check metrics +curl http://localhost:8080/metrics +``` + +## Rollback Plan + +If you encounter issues after migration: + +### Quick Rollback + +```bash +# 1. Restore backup configuration +cp copier-config.yaml.backup copier-config.yaml +cp .env.backup .env + +# 2. Redeploy previous version +gcloud app deploy --version=previous-version + +# 3. Route traffic back +gcloud app services set-traffic default --splits=previous-version=1 +``` + +### Gradual Rollback + +```bash +# Route 50% traffic to old version +gcloud app services set-traffic default \ + --splits=new-version=0.5,previous-version=0.5 + +# Monitor and adjust as needed +``` + +## FAQ + +### Q: Do I need to migrate immediately? + +**A:** No. The legacy single-source format is fully supported and will continue to work. Migrate when you need to monitor multiple source repositories or want to consolidate deployments. + +### Q: Can I mix legacy and new formats? + +**A:** No. Each configuration file must use either the legacy format OR the new format, not both. However, you can have different deployments using different formats. + +### Q: What happens if I don't specify installation_id? + +**A:** The application will use the default `INSTALLATION_ID` from environment variables. This works fine if all your source repositories are in the same organization. + +### Q: Can I gradually migrate one source at a time? + +**A:** Yes. You can start with one source in the new format and add more sources over time. Keep your old deployments running until all sources are migrated. + +### Q: How do I test without affecting production? + +**A:** Use dry-run mode (`DRY_RUN=true`) to test configuration without making actual commits. Also test in a staging environment first. + +### Q: What if a webhook comes from an unknown source? + +**A:** The application will log a warning and return a 204 No Content response. No processing will occur. Check your configuration to ensure all expected sources are listed. + +### Q: Can different sources target the same repository? + +**A:** Yes! Multiple sources can target the same repository with different copy rules. The application handles this correctly. + +### Q: How are metrics tracked for multiple sources? + +**A:** Metrics are tracked both globally and per-source. Use the `/metrics` endpoint to see breakdown by source repository. + +## Support + +If you encounter issues during migration: + +1. Check the [Troubleshooting Guide](TROUBLESHOOTING.md) +2. Review application logs for errors +3. Use the config-validator tool to identify issues +4. Consult the [Multi-Source Implementation Plan](MULTI-SOURCE-IMPLEMENTATION-PLAN.md) + +## Next Steps + +After successful migration: + +1. Monitor metrics and audit logs +2. Optimize copy rules for performance +3. Consider enabling additional features (Slack notifications, etc.) +4. Document your specific configuration for your team +5. Set up alerts for failures + diff --git a/examples-copier/docs/multi-source/MULTI-SOURCE-QUICK-REFERENCE.md b/examples-copier/docs/multi-source/MULTI-SOURCE-QUICK-REFERENCE.md new file mode 100644 index 0000000..d4de5a9 --- /dev/null +++ b/examples-copier/docs/multi-source/MULTI-SOURCE-QUICK-REFERENCE.md @@ -0,0 +1,532 @@ +# Multi-Source Support - Quick Reference Guide + +## Overview + +This guide provides quick reference information for working with multi-source repository configurations. + +## Configuration Format + +### Single Source (Legacy) + +```yaml +source_repo: "mongodb/docs-code-examples" +source_branch: "main" +copy_rules: + - name: "example" + # ... rules +``` + +### Multi-Source (New) + +```yaml +sources: + - repo: "mongodb/docs-code-examples" + branch: "main" + installation_id: "12345678" # Optional + copy_rules: + - name: "example" + # ... rules +``` + +## Key Concepts + +### Source Repository +- The repository being monitored for changes +- Identified by `owner/repo` format (e.g., `mongodb/docs-code-examples`) +- Each source can have its own copy rules + +### Installation ID +- GitHub App installation identifier +- Different organizations require different installation IDs +- Optional: defaults to `INSTALLATION_ID` environment variable + +### Copy Rules +- Define which files to copy and where +- Each source can have multiple copy rules +- Rules are evaluated independently per source + +## Common Tasks + +### Add a New Source Repository + +```yaml +sources: + # Existing sources... + + # Add new source + - repo: "mongodb/new-repo" + branch: "main" + installation_id: "99887766" + copy_rules: + - name: "new-rule" + source_pattern: + type: "prefix" + pattern: "examples/" + targets: + - repo: "mongodb/target" + branch: "main" + path_transform: "code/${path}" + commit_strategy: + type: "pull_request" + pr_title: "Update examples" + auto_merge: false +``` + +### Configure Multiple Targets + +```yaml +sources: + - repo: "mongodb/source" + branch: "main" + copy_rules: + - name: "multi-target" + source_pattern: + type: "glob" + pattern: "**/*.go" + targets: + # Target 1 + - repo: "mongodb/target1" + branch: "main" + path_transform: "examples/${filename}" + commit_strategy: + type: "direct" + + # Target 2 + - repo: "mongodb/target2" + branch: "develop" + path_transform: "code/${filename}" + commit_strategy: + type: "pull_request" + pr_title: "Update examples" + auto_merge: false +``` + +### Set Global Defaults + +```yaml +sources: + - repo: "mongodb/source1" + # ... config + - repo: "mongodb/source2" + # ... config + +# Apply to all sources unless overridden +defaults: + commit_strategy: + type: "pull_request" + auto_merge: false + deprecation_check: + enabled: true + file: "deprecated_examples.json" +``` + +### Cross-Organization Copying + +```yaml +sources: + # Source from mongodb org + - repo: "mongodb/public-examples" + branch: "main" + installation_id: "11111111" + copy_rules: + - name: "to-internal" + source_pattern: + type: "prefix" + pattern: "public/" + targets: + # Target in 10gen org (requires different installation) + - repo: "10gen/internal-docs" + branch: "main" + path_transform: "examples/${path}" + commit_strategy: + type: "direct" +``` + +## Validation + +### Validate Configuration + +```bash +# Validate syntax and logic +./config-validator validate -config copier-config.yaml -v + +# Check specific source +./config-validator validate-source \ + -config copier-config.yaml \ + -source "mongodb/docs-code-examples" +``` + +### Test Pattern Matching + +```bash +# Test if a file matches patterns +./config-validator test-pattern \ + -config copier-config.yaml \ + -source "mongodb/docs-code-examples" \ + -file "examples/go/main.go" +``` + +### Test Path Transformation + +```bash +# Test path transformation +./config-validator test-transform \ + -config copier-config.yaml \ + -source "mongodb/docs-code-examples" \ + -file "examples/go/main.go" +``` + +## Monitoring + +### Health Check + +```bash +# Check application health +curl http://localhost:8080/health | jq + +# Check specific source +curl http://localhost:8080/health | jq '.sources["mongodb/docs-code-examples"]' +``` + +### Metrics + +```bash +# Get all metrics +curl http://localhost:8080/metrics | jq + +# Get metrics for specific source +curl http://localhost:8080/metrics | jq '.by_source["mongodb/docs-code-examples"]' +``` + +### Logs + +```bash +# Filter logs by source +gcloud app logs read --filter='jsonPayload.source_repo="mongodb/docs-code-examples"' + +# Filter by operation +gcloud app logs read --filter='jsonPayload.operation="webhook_received"' +``` + +## Troubleshooting + +### Webhook Not Processing + +**Check 1: Is source configured?** +```bash +./config-validator list-sources -config copier-config.yaml +``` + +**Check 2: Is webhook signature valid?** +```bash +# Check logs for signature validation errors +gcloud app logs read --filter='jsonPayload.error=~"signature"' +``` + +**Check 3: Is installation ID correct?** +```bash +# Verify installation ID +curl -H "Authorization: Bearer YOUR_JWT" \ + https://api.github.com/app/installations +``` + +### Files Not Copying + +**Check 1: Do files match patterns?** +```bash +./config-validator test-pattern \ + -config copier-config.yaml \ + -source "mongodb/source" \ + -file "path/to/file.go" +``` + +**Check 2: Is path transformation correct?** +```bash +./config-validator test-transform \ + -config copier-config.yaml \ + -source "mongodb/source" \ + -file "path/to/file.go" +``` + +**Check 3: Check audit logs** +```bash +# Query MongoDB audit logs +db.audit_events.find({ + source_repo: "mongodb/source", + success: false +}).sort({timestamp: -1}).limit(10) +``` + +### Installation Authentication Errors + +**Check 1: Verify installation ID** +```yaml +sources: + - repo: "mongodb/source" + installation_id: "12345678" # Verify this is correct +``` + +**Check 2: Check token expiry** +```bash +# Tokens are cached for 1 hour +# Check logs for token refresh +gcloud app logs read --filter='jsonPayload.operation="token_refresh"' +``` + +**Check 3: Verify app permissions** +- Go to GitHub App settings +- Check installation has required permissions +- Verify app is installed on the repository + +## Environment Variables + +### Required + +```bash +# GitHub App Configuration +GITHUB_APP_ID=123456 +INSTALLATION_ID=12345678 # Default installation ID + +# Google Cloud +GCP_PROJECT_ID=your-project +PEM_KEY_NAME=projects/123/secrets/pem/versions/latest +WEBHOOK_SECRET_NAME=projects/123/secrets/webhook/versions/latest + +# Application +PORT=8080 +CONFIG_FILE=copier-config.yaml +``` + +### Optional + +```bash +# Dry Run Mode +DRY_RUN=false + +# Audit Logging +AUDIT_ENABLED=true +MONGO_URI=mongodb+srv://... +AUDIT_DATABASE=copier_audit +AUDIT_COLLECTION=events + +# Metrics +METRICS_ENABLED=true + +# Slack Notifications +SLACK_WEBHOOK_URL=https://hooks.slack.com/... +SLACK_CHANNEL=#copier-alerts +``` + +## Best Practices + +### 1. Use Descriptive Rule Names + +```yaml +# Good +- name: "go-examples-to-docs" + +# Bad +- name: "rule1" +``` + +### 2. Test Before Deploying + +```bash +# Always validate +./config-validator validate -config copier-config.yaml -v + +# Test in dry-run mode +DRY_RUN=true ./examples-copier +``` + +### 3. Monitor Per Source + +```yaml +# Enable metrics for each source +sources: + - repo: "mongodb/source" + settings: + enabled: true + # Monitor this source specifically +``` + +### 4. Use Pull Requests for Production + +```yaml +# Safer for production +commit_strategy: + type: "pull_request" + auto_merge: false # Require review +``` + +### 5. Enable Deprecation Tracking + +```yaml +# Track deleted files +deprecation_check: + enabled: true + file: "deprecated_examples.json" +``` + +### 6. Set Appropriate Timeouts + +```yaml +sources: + - repo: "mongodb/large-repo" + settings: + timeout_seconds: 300 # 5 minutes for large repos +``` + +### 7. Use Rate Limiting + +```yaml +sources: + - repo: "mongodb/high-volume-repo" + settings: + rate_limit: + max_webhooks_per_minute: 10 + max_concurrent: 3 +``` + +## Migration Checklist + +- [ ] Backup current configuration +- [ ] Convert to multi-source format +- [ ] Validate new configuration +- [ ] Test in dry-run mode +- [ ] Deploy to staging +- [ ] Test with real webhooks +- [ ] Monitor metrics and logs +- [ ] Deploy to production +- [ ] Decommission old deployments + +## Quick Commands + +```bash +# Validate config +./config-validator validate -config copier-config.yaml -v + +# Convert legacy to multi-source +./config-validator convert-to-multi-source \ + -input copier-config.yaml \ + -output copier-config-multi.yaml + +# Test pattern matching +./config-validator test-pattern \ + -config copier-config.yaml \ + -source "mongodb/source" \ + -file "examples/go/main.go" + +# Dry run +DRY_RUN=true ./examples-copier + +# Check health +curl http://localhost:8080/health | jq + +# Get metrics +curl http://localhost:8080/metrics | jq + +# View logs +gcloud app logs tail -s default + +# Deploy +gcloud app deploy +``` + +## Support Resources + +- [Implementation Plan](MULTI-SOURCE-IMPLEMENTATION-PLAN.md) +- [Technical Specification](MULTI-SOURCE-TECHNICAL-SPEC.md) +- [Migration Guide](MULTI-SOURCE-MIGRATION-GUIDE.md) +- [Configuration Guide](CONFIGURATION-GUIDE.md) +- [Troubleshooting Guide](TROUBLESHOOTING.md) + +## Common Patterns + +### Pattern 1: Single Source, Multiple Targets + +```yaml +sources: + - repo: "mongodb/source" + branch: "main" + copy_rules: + - name: "to-multiple-targets" + source_pattern: + type: "glob" + pattern: "**/*.go" + targets: + - repo: "mongodb/target1" + # ... config + - repo: "mongodb/target2" + # ... config + - repo: "mongodb/target3" + # ... config +``` + +### Pattern 2: Multiple Sources, Single Target + +```yaml +sources: + - repo: "mongodb/source1" + branch: "main" + copy_rules: + - name: "from-source1" + source_pattern: + type: "prefix" + pattern: "examples/" + targets: + - repo: "mongodb/target" + path_transform: "source1/${path}" + # ... config + + - repo: "mongodb/source2" + branch: "main" + copy_rules: + - name: "from-source2" + source_pattern: + type: "prefix" + pattern: "examples/" + targets: + - repo: "mongodb/target" + path_transform: "source2/${path}" + # ... config +``` + +### Pattern 3: Cross-Organization with Different Strategies + +```yaml +sources: + # Public repo - use PRs + - repo: "mongodb/public-examples" + branch: "main" + installation_id: "11111111" + copy_rules: + - name: "public-to-docs" + source_pattern: + type: "prefix" + pattern: "examples/" + targets: + - repo: "mongodb/docs" + branch: "main" + path_transform: "code/${path}" + commit_strategy: + type: "pull_request" + auto_merge: false + + # Internal repo - direct commits + - repo: "10gen/internal-examples" + branch: "main" + installation_id: "22222222" + copy_rules: + - name: "internal-to-docs" + source_pattern: + type: "prefix" + pattern: "examples/" + targets: + - repo: "10gen/internal-docs" + branch: "main" + path_transform: "code/${path}" + commit_strategy: + type: "direct" +``` + diff --git a/examples-copier/docs/multi-source/MULTI-SOURCE-README.md b/examples-copier/docs/multi-source/MULTI-SOURCE-README.md new file mode 100644 index 0000000..37000f5 --- /dev/null +++ b/examples-copier/docs/multi-source/MULTI-SOURCE-README.md @@ -0,0 +1,314 @@ +# Multi-Source Repository Support - Documentation Index + +## 📋 Overview + +This directory contains comprehensive documentation for implementing multi-source repository support in the examples-copier application. This feature enables monitoring and processing webhooks from multiple source repositories in a single deployment. + +## 🎯 Quick Start + +**New to multi-source?** Start here: + +1. **[Summary](docs/MULTI-SOURCE-SUMMARY.md)** - High-level overview and benefits +2. **[Quick Reference](docs/MULTI-SOURCE-QUICK-REFERENCE.md)** - Common tasks and commands +3. **[Example Config](configs/copier-config.multi-source.example.yaml)** - Working configuration example + +**Ready to implement?** Follow this path: + +1. **[Implementation Plan](docs/MULTI-SOURCE-IMPLEMENTATION-PLAN.md)** - Detailed implementation guide +2. **[Technical Spec](docs/MULTI-SOURCE-TECHNICAL-SPEC.md)** - Technical specifications +3. **[Migration Guide](docs/MULTI-SOURCE-MIGRATION-GUIDE.md)** - Step-by-step migration + +## 📚 Documentation + +### Core Documents + +| Document | Purpose | Audience | +|----------|---------|----------| +| [**Summary**](docs/MULTI-SOURCE-SUMMARY.md) | Executive overview, benefits, and status | Everyone | +| [**Implementation Plan**](docs/MULTI-SOURCE-IMPLEMENTATION-PLAN.md) | Detailed implementation roadmap | Developers | +| [**Technical Spec**](docs/MULTI-SOURCE-TECHNICAL-SPEC.md) | Technical specifications and APIs | Developers | +| [**Migration Guide**](docs/MULTI-SOURCE-MIGRATION-GUIDE.md) | Migration from single to multi-source | DevOps, Developers | +| [**Quick Reference**](docs/MULTI-SOURCE-QUICK-REFERENCE.md) | Daily operations and troubleshooting | Everyone | + +### Configuration Examples + +| File | Description | +|------|-------------| +| [**Multi-Source Example**](configs/copier-config.multi-source.example.yaml) | Complete multi-source configuration | +| [**Single-Source Example**](configs/copier-config.example.yaml) | Legacy single-source format | + +### Visual Diagrams + +- **Architecture Diagram**: High-level system architecture with multiple sources +- **Sequence Diagram**: Webhook processing flow for multi-source setup + +## 🚀 What's New + +### Key Features + +✅ **Multiple Source Repositories** +- Monitor 3+ source repositories in one deployment +- Each source has independent copy rules +- Cross-organization support (mongodb, 10gen, etc.) + +✅ **Intelligent Webhook Routing** +- Automatic source repository detection +- Dynamic configuration loading +- Graceful handling of unknown sources + +✅ **Multi-Installation Support** +- Different GitHub App installations per organization +- Automatic token management and refresh +- Seamless installation switching + +✅ **Enhanced Observability** +- Per-source metrics and monitoring +- Source-specific audit logging +- Detailed health status per source + +✅ **100% Backward Compatible** +- Existing single-source configs work unchanged +- Automatic format detection +- Gradual migration path + +## 📖 Documentation Guide + +### For Product Managers + +**Start with:** +1. [Summary](docs/MULTI-SOURCE-SUMMARY.md) - Understand benefits and scope +2. [Implementation Plan](docs/MULTI-SOURCE-IMPLEMENTATION-PLAN.md) - Review timeline and phases + +**Key Questions Answered:** +- Why do we need this? → See "Key Benefits" in Summary +- What's the timeline? → 4 weeks (see Implementation Plan) +- What are the risks? → See "Risk Mitigation" in Summary +- How do we measure success? → See "Success Criteria" in Implementation Plan + +### For Developers + +**Start with:** +1. [Technical Spec](docs/MULTI-SOURCE-TECHNICAL-SPEC.md) - Understand architecture +2. [Implementation Plan](docs/MULTI-SOURCE-IMPLEMENTATION-PLAN.md) - See detailed tasks + +**Key Sections:** +- Data models and schemas → Technical Spec §3 +- Component specifications → Technical Spec §4 +- API specifications → Technical Spec §5 +- Implementation tasks → Implementation Plan §2-8 + +**Code Changes Required:** +- `types/config.go` - New configuration types +- `services/config_loader.go` - Enhanced config loading +- `services/webhook_handler_new.go` - Webhook routing +- `services/github_auth.go` - Installation management +- `services/health_metrics.go` - Per-source metrics + +### For DevOps/SRE + +**Start with:** +1. [Migration Guide](docs/MULTI-SOURCE-MIGRATION-GUIDE.md) - Migration steps +2. [Quick Reference](docs/MULTI-SOURCE-QUICK-REFERENCE.md) - Operations guide + +**Key Sections:** +- Deployment strategy → Implementation Plan §10 +- Monitoring and metrics → Quick Reference "Monitoring" +- Troubleshooting → Quick Reference "Troubleshooting" +- Rollback procedures → Migration Guide "Rollback Plan" + +**Operational Tasks:** +- Configuration validation +- Staging deployment +- Production rollout +- Monitoring and alerting +- Decommissioning old deployments + +### For QA/Testing + +**Start with:** +1. [Technical Spec](docs/MULTI-SOURCE-TECHNICAL-SPEC.md) §9 - Testing strategy +2. [Migration Guide](docs/MULTI-SOURCE-MIGRATION-GUIDE.md) - Testing checklist + +**Test Scenarios:** +- Multi-source webhook processing +- Installation switching +- Config format conversion +- Error handling +- Performance under load +- Cross-organization copying + +## 🔧 Configuration Examples + +### Single Source (Legacy - Still Supported) + +```yaml +source_repo: "mongodb/docs-code-examples" +source_branch: "main" +copy_rules: + - name: "go-examples" + source_pattern: + type: "prefix" + pattern: "examples/go/" + targets: + - repo: "mongodb/docs" + branch: "main" + path_transform: "code/go/${path}" + commit_strategy: + type: "pull_request" +``` + +### Multi-Source (New) + +```yaml +sources: + - repo: "mongodb/docs-code-examples" + branch: "main" + installation_id: "12345678" + copy_rules: + - name: "go-examples" + source_pattern: + type: "prefix" + pattern: "examples/go/" + targets: + - repo: "mongodb/docs" + branch: "main" + path_transform: "code/go/${path}" + commit_strategy: + type: "pull_request" + + - repo: "mongodb/atlas-examples" + branch: "main" + installation_id: "87654321" + copy_rules: + - name: "atlas-cli" + source_pattern: + type: "glob" + pattern: "cli/**/*.go" + targets: + - repo: "mongodb/atlas-cli" + branch: "main" + path_transform: "examples/${filename}" + commit_strategy: + type: "direct" +``` + +## 🎯 Implementation Roadmap + +### Phase 1: Core Infrastructure (Week 1) +- [ ] Update configuration schema +- [ ] Implement config loading for multiple sources +- [ ] Add validation for multi-source configs +- [ ] Ensure backward compatibility + +### Phase 2: Webhook Routing (Week 2) +- [ ] Implement webhook routing logic +- [ ] Add GitHub installation switching +- [ ] Update authentication handling +- [ ] Test with multiple source repos + +### Phase 3: Observability (Week 3) +- [ ] Update metrics collection +- [ ] Enhance audit logging +- [ ] Add per-source monitoring +- [ ] Update health endpoints + +### Phase 4: Documentation & Testing (Week 4) +- [x] Write comprehensive documentation +- [x] Create migration guide +- [ ] Add unit and integration tests +- [ ] Perform end-to-end testing + +## 📊 Success Metrics + +- ✅ Support 3+ source repositories in single deployment +- ✅ 100% backward compatibility +- ✅ No performance degradation +- ✅ Clear documentation (Complete) +- ⏳ Test coverage >80% +- ⏳ Successful production deployment + +## 🔗 Related Documentation + +### Existing Documentation +- [Main README](README.md) - Application overview +- [Architecture](docs/ARCHITECTURE.md) - Current architecture +- [Configuration Guide](docs/CONFIGURATION-GUIDE.md) - Configuration reference +- [Deployment Guide](docs/DEPLOYMENT.md) - Deployment instructions + +### New Documentation +- [Multi-Source Summary](docs/MULTI-SOURCE-SUMMARY.md) +- [Implementation Plan](docs/MULTI-SOURCE-IMPLEMENTATION-PLAN.md) +- [Technical Specification](docs/MULTI-SOURCE-TECHNICAL-SPEC.md) +- [Migration Guide](docs/MULTI-SOURCE-MIGRATION-GUIDE.md) +- [Quick Reference](docs/MULTI-SOURCE-QUICK-REFERENCE.md) + +## 💡 Quick Commands + +```bash +# Validate multi-source config +./config-validator validate -config copier-config.yaml -v + +# Convert legacy to multi-source +./config-validator convert-to-multi-source \ + -input copier-config.yaml \ + -output copier-config-multi.yaml + +# Test pattern matching +./config-validator test-pattern \ + -config copier-config.yaml \ + -source "mongodb/docs-code-examples" \ + -file "examples/go/main.go" + +# Dry run with multi-source +DRY_RUN=true ./examples-copier -config copier-config-multi.yaml + +# Check health (per-source status) +curl http://localhost:8080/health | jq '.sources' + +# Get metrics by source +curl http://localhost:8080/metrics | jq '.by_source' +``` + +## 🤝 Contributing + +When implementing multi-source support: + +1. Follow the implementation plan phases +2. Write tests for all new functionality +3. Update documentation as needed +4. Ensure backward compatibility +5. Test with multiple source repositories +6. Monitor metrics during rollout + +## 📞 Support + +For questions or issues: + +1. Check the [Quick Reference](docs/MULTI-SOURCE-QUICK-REFERENCE.md) for common tasks +2. Review the [Migration Guide](docs/MULTI-SOURCE-MIGRATION-GUIDE.md) FAQ +3. Consult the [Technical Spec](docs/MULTI-SOURCE-TECHNICAL-SPEC.md) for details +4. Check existing [Troubleshooting Guide](docs/TROUBLESHOOTING.md) + +## 📝 Status + +| Component | Status | +|-----------|--------| +| Documentation | ✅ Complete | +| Implementation Plan | ✅ Complete | +| Technical Spec | ✅ Complete | +| Migration Guide | ✅ Complete | +| Example Configs | ✅ Complete | +| Code Implementation | ⏳ Pending | +| Unit Tests | ⏳ Pending | +| Integration Tests | ⏳ Pending | +| Staging Deployment | ⏳ Pending | +| Production Deployment | ⏳ Pending | + +**Last Updated**: 2025-10-15 +**Version**: 1.0 +**Status**: Documentation Complete, Ready for Implementation + +--- + +**Next Steps**: Begin Phase 1 implementation (Core Infrastructure) + diff --git a/examples-copier/docs/multi-source/MULTI-SOURCE-SUMMARY.md b/examples-copier/docs/multi-source/MULTI-SOURCE-SUMMARY.md new file mode 100644 index 0000000..ec44f51 --- /dev/null +++ b/examples-copier/docs/multi-source/MULTI-SOURCE-SUMMARY.md @@ -0,0 +1,405 @@ +# Multi-Source Repository Support - Implementation Summary + +## Executive Summary + +This document provides a comprehensive overview of the multi-source repository support implementation plan for the examples-copier application. + +## What's Being Built + +The multi-source feature enables the examples-copier to monitor and process webhooks from **multiple source repositories** in a single deployment, eliminating the need for separate copier instances. + +### Current State +- ✅ Single source repository per deployment +- ✅ Hardcoded repository configuration +- ✅ One GitHub App installation per instance +- ✅ Manual deployment for each source + +### Future State +- 🎯 Multiple source repositories per deployment +- 🎯 Dynamic webhook routing +- 🎯 Multiple GitHub App installations +- 🎯 Centralized configuration management +- 🎯 Per-source metrics and monitoring + +## Key Benefits + +1. **Simplified Operations**: One deployment handles all source repositories +2. **Cost Reduction**: Shared infrastructure reduces hosting costs +3. **Easier Maintenance**: Single codebase and configuration to manage +4. **Better Observability**: Unified metrics and audit logging +5. **Scalability**: Easy to add new source repositories + +## Documentation Deliverables + +### 1. Implementation Plan +**File**: `docs/MULTI-SOURCE-IMPLEMENTATION-PLAN.md` + +Comprehensive plan covering: +- Current architecture analysis +- Proposed architecture design +- Detailed implementation tasks (8 phases) +- Risk assessment and mitigation +- Success criteria +- Timeline (4 weeks) + +**Key Sections**: +- Configuration schema updates +- Webhook routing logic +- GitHub App installation support +- Metrics and audit logging +- Testing strategy +- Deployment phases + +### 2. Technical Specification +**File**: `docs/MULTI-SOURCE-TECHNICAL-SPEC.md` + +Detailed technical specifications including: +- Data models and schemas +- Component interfaces +- API specifications +- Error handling +- Performance considerations +- Security requirements + +**Key Components**: +- `WebhookRouter`: Routes webhooks to correct source config +- `InstallationManager`: Manages multiple GitHub App installations +- `ConfigLoader`: Enhanced to support multi-source configs +- `MetricsCollector`: Tracks per-source metrics + +### 3. Migration Guide +**File**: `docs/MULTI-SOURCE-MIGRATION-GUIDE.md` + +Step-by-step guide for migrating from single to multi-source: +- Backward compatibility assurance +- Manual and automated conversion options +- Consolidation of multiple deployments +- Testing and validation procedures +- Rollback plan +- FAQ section + +**Migration Steps**: +1. Assess current setup +2. Backup configuration +3. Convert format (manual or automated) +4. Consolidate deployments +5. Update environment variables +6. Validate configuration +7. Deploy to staging +8. Test thoroughly +9. Production deployment +10. Decommission old deployments + +### 4. Quick Reference Guide +**File**: `docs/MULTI-SOURCE-QUICK-REFERENCE.md` + +Quick reference for daily operations: +- Configuration format examples +- Common tasks and patterns +- Validation commands +- Monitoring and troubleshooting +- Best practices +- Quick command reference + +### 5. Example Configurations +**File**: `configs/copier-config.multi-source.example.yaml` + +Complete example showing: +- Multiple source repositories +- Different organizations (mongodb, 10gen) +- Various pattern types (prefix, glob, regex) +- Multiple targets per source +- Cross-organization copying +- Global defaults + +## Architecture Overview + +### High-Level Flow + +``` +Multiple Source Repos → Webhooks → Router → Config Loader → Pattern Matcher → Target Repos + ↓ + Installation Manager + ↓ + Metrics & Audit Logging +``` + +### Key Components + +1. **Webhook Router** (New) + - Routes incoming webhooks to correct source configuration + - Validates source repository against configured sources + - Returns 204 for unknown sources + +2. **Config Loader** (Enhanced) + - Supports both legacy and multi-source formats + - Auto-detects configuration format + - Validates multi-source configurations + - Converts legacy to multi-source format + +3. **Installation Manager** (New) + - Manages multiple GitHub App installations + - Caches installation tokens + - Handles token refresh automatically + - Switches between installations per source + +4. **Metrics Collector** (Enhanced) + - Tracks metrics per source repository + - Provides global and per-source statistics + - Monitors webhook processing times + - Tracks success/failure rates + +5. **Audit Logger** (Enhanced) + - Logs events with source repository context + - Enables per-source audit queries + - Tracks cross-organization operations + +## Configuration Schema + +### Multi-Source Format + +```yaml +sources: + - repo: "mongodb/docs-code-examples" + branch: "main" + installation_id: "12345678" # Optional + copy_rules: + - name: "go-examples" + source_pattern: + type: "prefix" + pattern: "examples/go/" + targets: + - repo: "mongodb/docs" + branch: "main" + path_transform: "code/go/${path}" + commit_strategy: + type: "pull_request" + pr_title: "Update Go examples" + auto_merge: false + + - repo: "mongodb/atlas-examples" + branch: "main" + installation_id: "87654321" + copy_rules: + # ... additional rules + +defaults: + commit_strategy: + type: "pull_request" + auto_merge: false + deprecation_check: + enabled: true +``` + +### Backward Compatibility + +The system automatically detects and supports the legacy single-source format: + +```yaml +# Legacy format - still works! +source_repo: "mongodb/docs-code-examples" +source_branch: "main" +copy_rules: + - name: "example" + # ... rules +``` + +## Implementation Phases + +### Phase 1: Core Infrastructure (Week 1) +- Update configuration schema +- Implement config loading for multiple sources +- Add validation for multi-source configs +- Ensure backward compatibility + +### Phase 2: Webhook Routing (Week 2) +- Implement webhook routing logic +- Add GitHub installation switching +- Update authentication handling +- Test with multiple source repos + +### Phase 3: Observability (Week 3) +- Update metrics collection +- Enhance audit logging +- Add per-source monitoring +- Update health endpoints + +### Phase 4: Documentation & Testing (Week 4) +- Write comprehensive documentation ✅ (Complete) +- Create migration guide ✅ (Complete) +- Add unit and integration tests +- Perform end-to-end testing + +## Key Features + +### 1. Automatic Source Detection +The webhook router automatically identifies the source repository from incoming webhooks and routes to the appropriate configuration. + +### 2. Installation Management +Seamlessly switches between GitHub App installations for different organizations, with automatic token caching and refresh. + +### 3. Per-Source Metrics +Track webhooks, files, and operations separately for each source repository: + +```json +{ + "by_source": { + "mongodb/docs-code-examples": { + "webhooks": {"received": 100, "processed": 98}, + "files": {"matched": 200, "uploaded": 195} + }, + "mongodb/atlas-examples": { + "webhooks": {"received": 50, "processed": 47}, + "files": {"matched": 120, "uploaded": 115} + } + } +} +``` + +### 4. Flexible Configuration +Support for: +- Centralized configuration (all sources in one file) +- Distributed configuration (config per source repo) +- Global defaults with per-source overrides +- Cross-organization copying + +### 5. Enhanced Monitoring +- Health endpoint shows status per source +- Metrics endpoint provides per-source breakdown +- Audit logs include source repository context +- Slack notifications with source information + +## Testing Strategy + +### Unit Tests +- Configuration loading and validation +- Webhook routing logic +- Installation token management +- Metrics collection per source + +### Integration Tests +- Multi-source webhook processing +- Installation switching +- Config format conversion +- Error handling scenarios + +### End-to-End Tests +- Complete workflow with 3+ sources +- Cross-organization copying +- Failure recovery +- Performance under load + +## Deployment Strategy + +### Rollout Approach +1. Deploy with backward compatibility enabled +2. Test in staging with multi-source config +3. Gradual production rollout (canary deployment) +4. Monitor metrics and logs closely +5. Full production deployment +6. Decommission old single-source deployments + +### Monitoring During Rollout +- Track webhook success rates per source +- Monitor GitHub API rate limits +- Watch for authentication errors +- Verify file copying success rates +- Check audit logs for anomalies + +## Success Criteria + +- ✅ Support 3+ source repositories in single deployment +- ✅ 100% backward compatibility with existing configs +- ✅ No performance degradation for single-source use cases +- ✅ Clear documentation and migration path +- ✅ Comprehensive test coverage (target: >80%) +- ✅ Successful production deployment + +## Risk Mitigation + +### Risk 1: Breaking Changes +**Mitigation**: Full backward compatibility with automatic format detection + +### Risk 2: GitHub Rate Limits +**Mitigation**: Per-source rate limiting and monitoring + +### Risk 3: Configuration Complexity +**Mitigation**: Clear examples, templates, and validation tools + +### Risk 4: Installation Token Management +**Mitigation**: Robust caching and refresh logic with error handling + +## Next Steps + +### For Implementation Team +1. Review all documentation +2. Set up development environment +3. Begin Phase 1 implementation +4. Create feature branch +5. Implement core infrastructure +6. Write unit tests +7. Submit PR for review + +### For Stakeholders +1. Review implementation plan +2. Approve timeline and resources +3. Identify test repositories +4. Plan staging environment +5. Schedule deployment windows + +### For Operations Team +1. Review deployment strategy +2. Set up monitoring alerts +3. Prepare rollback procedures +4. Plan capacity for multi-source load + +## Resources + +### Documentation +- [Implementation Plan](MULTI-SOURCE-IMPLEMENTATION-PLAN.md) - Detailed implementation guide +- [Technical Spec](MULTI-SOURCE-TECHNICAL-SPEC.md) - Technical specifications +- [Migration Guide](MULTI-SOURCE-MIGRATION-GUIDE.md) - Migration instructions +- [Quick Reference](MULTI-SOURCE-QUICK-REFERENCE.md) - Daily operations guide + +### Configuration Examples +- [Multi-Source Example](../configs/copier-config.multi-source.example.yaml) - Complete example config + +### Diagrams +- Architecture diagram (Mermaid) +- Sequence diagram (Mermaid) +- Component interaction diagram + +## Questions & Answers + +### Q: When should we migrate? +**A**: Migrate when you need to monitor multiple source repositories or want to consolidate deployments. No rush - legacy format is fully supported. + +### Q: What's the effort estimate? +**A**: 4 weeks for full implementation, testing, and deployment. Documentation is complete. + +### Q: Will this affect existing deployments? +**A**: No. Existing single-source deployments continue to work without changes. + +### Q: Can we test without affecting production? +**A**: Yes. Use dry-run mode and staging environment for thorough testing. + +### Q: What if we need to rollback? +**A**: Simple rollback to previous version. Legacy format is always supported. + +## Conclusion + +The multi-source repository support is a significant enhancement that will: +- Simplify operations and reduce costs +- Improve scalability and flexibility +- Enhance monitoring and observability +- Maintain full backward compatibility + +All documentation is complete and ready for implementation. The plan provides a clear path forward with minimal risk and maximum benefit. + +--- + +**Status**: Documentation Complete ✅ +**Next Phase**: Implementation (Phase 1) +**Timeline**: 4 weeks +**Risk Level**: Low (backward compatible) + diff --git a/examples-copier/docs/multi-source/MULTI-SOURCE-TECHNICAL-SPEC.md b/examples-copier/docs/multi-source/MULTI-SOURCE-TECHNICAL-SPEC.md new file mode 100644 index 0000000..8435512 --- /dev/null +++ b/examples-copier/docs/multi-source/MULTI-SOURCE-TECHNICAL-SPEC.md @@ -0,0 +1,646 @@ +# Multi-Source Repository Support - Technical Specification + +## Document Information + +- **Version**: 1.0 +- **Status**: Draft +- **Last Updated**: 2025-10-15 +- **Author**: Examples Copier Team + +## 1. Overview + +### 1.1 Purpose + +This document provides detailed technical specifications for implementing multi-source repository support in the examples-copier application. + +### 1.2 Scope + +The implementation will enable the copier to: +- Monitor multiple source repositories simultaneously +- Route webhooks to appropriate source configurations +- Manage multiple GitHub App installations +- Maintain backward compatibility with existing single-source configurations + +### 1.3 Goals + +- **Primary**: Support multiple source repositories in a single deployment +- **Secondary**: Improve observability with per-source metrics +- **Tertiary**: Simplify deployment and reduce infrastructure costs + +## 2. System Architecture + +### 2.1 Current Architecture Limitations + +``` +Current Flow (Single Source): +┌─────────────────┐ +│ Source Repo │ +│ (hardcoded) │ +└────────┬────────┘ + │ Webhook + ▼ +┌─────────────────┐ +│ Webhook Handler │ +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ Load Config │ +│ (from source) │ +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ Process Files │ +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ Target Repos │ +└─────────────────┘ +``` + +### 2.2 Proposed Architecture + +``` +New Flow (Multi-Source): +┌──────────┐ ┌──────────┐ ┌──────────┐ +│ Source 1 │ │ Source 2 │ │ Source 3 │ +└────┬─────┘ └────┬─────┘ └────┬─────┘ + │ Webhook │ Webhook │ Webhook + └─────────────┴─────────────┘ + │ + ▼ + ┌─────────────────┐ + │ Webhook Router │ + │ (new component) │ + └────────┬────────┘ + │ + ▼ + ┌─────────────────┐ + │ Config Loader │ + │ (enhanced) │ + └────────┬────────┘ + │ + ┌────────┴────────┐ + │ │ + ▼ ▼ + ┌─────────┐ ┌─────────┐ + │Config 1 │ │Config 2 │ + └────┬────┘ └────┬────┘ + │ │ + └────────┬───────┘ + │ + ▼ + ┌─────────────────┐ + │ Process Files │ + └────────┬────────┘ + │ + ┌────────┴────────┐ + │ │ + ▼ ▼ + ┌─────────┐ ┌─────────┐ + │Target 1 │ │Target 2 │ + └─────────┘ └─────────┘ +``` + +## 3. Data Models + +### 3.1 Configuration Schema + +#### 3.1.1 MultiSourceConfig + +```go +// MultiSourceConfig represents the root configuration +type MultiSourceConfig struct { + // New multi-source format + Sources []SourceConfig `yaml:"sources,omitempty" json:"sources,omitempty"` + Defaults *DefaultsConfig `yaml:"defaults,omitempty" json:"defaults,omitempty"` + + // Legacy single-source format (for backward compatibility) + SourceRepo string `yaml:"source_repo,omitempty" json:"source_repo,omitempty"` + SourceBranch string `yaml:"source_branch,omitempty" json:"source_branch,omitempty"` + CopyRules []CopyRule `yaml:"copy_rules,omitempty" json:"copy_rules,omitempty"` +} +``` + +#### 3.1.2 SourceConfig + +```go +// SourceConfig represents a single source repository +type SourceConfig struct { + // Repository identifier (owner/repo format) + Repo string `yaml:"repo" json:"repo"` + + // Branch to monitor (default: "main") + Branch string `yaml:"branch" json:"branch"` + + // GitHub App installation ID for this repository + // Optional: falls back to default INSTALLATION_ID + InstallationID string `yaml:"installation_id,omitempty" json:"installation_id,omitempty"` + + // Path to config file in the repository + // Optional: for distributed config approach + ConfigFile string `yaml:"config_file,omitempty" json:"config_file,omitempty"` + + // Copy rules for this source + CopyRules []CopyRule `yaml:"copy_rules" json:"copy_rules"` + + // Source-specific settings + Settings *SourceSettings `yaml:"settings,omitempty" json:"settings,omitempty"` +} +``` + +#### 3.1.3 SourceSettings + +```go +// SourceSettings contains source-specific configuration +type SourceSettings struct { + // Enable/disable this source + Enabled bool `yaml:"enabled" json:"enabled"` + + // Timeout for processing webhooks from this source + TimeoutSeconds int `yaml:"timeout_seconds,omitempty" json:"timeout_seconds,omitempty"` + + // Rate limiting settings + RateLimit *RateLimitConfig `yaml:"rate_limit,omitempty" json:"rate_limit,omitempty"` +} + +// RateLimitConfig defines rate limiting per source +type RateLimitConfig struct { + // Maximum webhooks per minute + MaxWebhooksPerMinute int `yaml:"max_webhooks_per_minute" json:"max_webhooks_per_minute"` + + // Maximum concurrent processing + MaxConcurrent int `yaml:"max_concurrent" json:"max_concurrent"` +} +``` + +#### 3.1.4 DefaultsConfig + +```go +// DefaultsConfig provides default values for all sources +type DefaultsConfig struct { + CommitStrategy *CommitStrategyConfig `yaml:"commit_strategy,omitempty" json:"commit_strategy,omitempty"` + DeprecationCheck *DeprecationConfig `yaml:"deprecation_check,omitempty" json:"deprecation_check,omitempty"` + Settings *SourceSettings `yaml:"settings,omitempty" json:"settings,omitempty"` +} +``` + +### 3.2 Runtime Data Structures + +#### 3.2.1 SourceContext + +```go +// SourceContext holds runtime context for a source repository +type SourceContext struct { + // Source configuration + Config *SourceConfig + + // GitHub client for this source + GitHubClient *github.Client + + // Installation token + InstallationToken string + + // Token expiration time + TokenExpiry time.Time + + // Metrics for this source + Metrics *SourceMetrics + + // Last processed webhook timestamp + LastWebhook time.Time +} +``` + +#### 3.2.2 SourceMetrics + +```go +// SourceMetrics tracks metrics per source repository +type SourceMetrics struct { + SourceRepo string + + // Webhook metrics + WebhooksReceived int64 + WebhooksProcessed int64 + WebhooksFailed int64 + + // File metrics + FilesMatched int64 + FilesUploaded int64 + FilesUploadFailed int64 + FilesDeprecated int64 + + // Timing metrics + AvgProcessingTime time.Duration + MaxProcessingTime time.Duration + MinProcessingTime time.Duration + + // Last update + LastUpdated time.Time +} +``` + +## 4. Component Specifications + +### 4.1 Webhook Router + +**Purpose**: Route incoming webhooks to the correct source configuration + +**Interface**: +```go +type WebhookRouter interface { + // RouteWebhook routes a webhook to the appropriate source handler + RouteWebhook(ctx context.Context, event *github.PullRequestEvent) (*SourceConfig, error) + + // RegisterSource registers a source configuration + RegisterSource(config *SourceConfig) error + + // UnregisterSource removes a source configuration + UnregisterSource(repo string) error + + // GetSource retrieves a source configuration + GetSource(repo string) (*SourceConfig, error) + + // ListSources returns all registered sources + ListSources() []*SourceConfig +} +``` + +**Implementation**: +```go +type DefaultWebhookRouter struct { + sources map[string]*SourceConfig + mu sync.RWMutex +} + +func (r *DefaultWebhookRouter) RouteWebhook(ctx context.Context, event *github.PullRequestEvent) (*SourceConfig, error) { + repo := event.GetRepo() + if repo == nil { + return nil, fmt.Errorf("webhook missing repository info") + } + + repoFullName := repo.GetFullName() + + r.mu.RLock() + defer r.mu.RUnlock() + + source, ok := r.sources[repoFullName] + if !ok { + return nil, fmt.Errorf("no configuration found for repository: %s", repoFullName) + } + + // Check if source is enabled + if source.Settings != nil && !source.Settings.Enabled { + return nil, fmt.Errorf("source repository is disabled: %s", repoFullName) + } + + return source, nil +} +``` + +### 4.2 Config Loader (Enhanced) + +**Purpose**: Load and manage multi-source configurations + +**New Methods**: +```go +type ConfigLoader interface { + // Existing method + LoadConfig(ctx context.Context, config *configs.Config) (*types.YAMLConfig, error) + + // New methods for multi-source + LoadMultiSourceConfig(ctx context.Context, config *configs.Config) (*types.MultiSourceConfig, error) + LoadSourceConfig(ctx context.Context, repo string, config *configs.Config) (*types.SourceConfig, error) + ValidateMultiSourceConfig(config *types.MultiSourceConfig) error + ConvertLegacyToMultiSource(legacy *types.YAMLConfig) (*types.MultiSourceConfig, error) +} +``` + +**Implementation**: +```go +func (cl *DefaultConfigLoader) LoadMultiSourceConfig(ctx context.Context, config *configs.Config) (*types.MultiSourceConfig, error) { + // Load raw config + yamlConfig, err := cl.LoadConfig(ctx, config) + if err != nil { + return nil, err + } + + // Detect format + if yamlConfig.SourceRepo != "" { + // Legacy format - convert to multi-source + return cl.ConvertLegacyToMultiSource(yamlConfig) + } + + // Already multi-source format + multiConfig := &types.MultiSourceConfig{ + Sources: yamlConfig.Sources, + Defaults: yamlConfig.Defaults, + } + + // Validate + if err := cl.ValidateMultiSourceConfig(multiConfig); err != nil { + return nil, err + } + + return multiConfig, nil +} + +func (cl *DefaultConfigLoader) ConvertLegacyToMultiSource(legacy *types.YAMLConfig) (*types.MultiSourceConfig, error) { + source := types.SourceConfig{ + Repo: legacy.SourceRepo, + Branch: legacy.SourceBranch, + CopyRules: legacy.CopyRules, + } + + return &types.MultiSourceConfig{ + Sources: []types.SourceConfig{source}, + }, nil +} +``` + +### 4.3 Installation Manager + +**Purpose**: Manage multiple GitHub App installations + +**Interface**: +```go +type InstallationManager interface { + // GetInstallationToken gets or refreshes token for an installation + GetInstallationToken(ctx context.Context, installationID string) (string, error) + + // GetClientForInstallation gets a GitHub client for an installation + GetClientForInstallation(ctx context.Context, installationID string) (*github.Client, error) + + // RefreshToken refreshes an installation token + RefreshToken(ctx context.Context, installationID string) error + + // ClearCache clears cached tokens + ClearCache() +} +``` + +**Implementation**: +```go +type DefaultInstallationManager struct { + tokens map[string]*InstallationToken + mu sync.RWMutex +} + +type InstallationToken struct { + Token string + ExpiresAt time.Time +} + +func (im *DefaultInstallationManager) GetInstallationToken(ctx context.Context, installationID string) (string, error) { + im.mu.RLock() + token, ok := im.tokens[installationID] + im.mu.RUnlock() + + // Check if token exists and is not expired + if ok && time.Now().Before(token.ExpiresAt.Add(-5*time.Minute)) { + return token.Token, nil + } + + // Generate new token + newToken, err := generateInstallationToken(installationID) + if err != nil { + return "", err + } + + // Cache token + im.mu.Lock() + im.tokens[installationID] = &InstallationToken{ + Token: newToken, + ExpiresAt: time.Now().Add(1 * time.Hour), + } + im.mu.Unlock() + + return newToken, nil +} +``` + +### 4.4 Metrics Collector (Enhanced) + +**Purpose**: Track metrics per source repository + +**New Methods**: +```go +type MetricsCollector interface { + // Existing methods... + + // New methods for multi-source + RecordWebhookReceivedForSource(sourceRepo string) + RecordWebhookProcessedForSource(sourceRepo string, duration time.Duration) + RecordWebhookFailedForSource(sourceRepo string) + RecordFileMatchedForSource(sourceRepo string) + RecordFileUploadedForSource(sourceRepo string) + RecordFileUploadFailedForSource(sourceRepo string) + + GetMetricsBySource(sourceRepo string) *SourceMetrics + GetAllSourceMetrics() map[string]*SourceMetrics +} +``` + +## 5. API Specifications + +### 5.1 Enhanced Health Endpoint + +**Endpoint**: `GET /health` + +**Response**: +```json +{ + "status": "healthy", + "started": true, + "github": { + "status": "healthy", + "authenticated": true + }, + "sources": { + "mongodb/docs-code-examples": { + "status": "healthy", + "last_webhook": "2025-10-15T10:30:00Z", + "installation_id": "12345678" + }, + "mongodb/atlas-examples": { + "status": "healthy", + "last_webhook": "2025-10-15T10:25:00Z", + "installation_id": "87654321" + } + }, + "queues": { + "upload_count": 0, + "deprecation_count": 0 + }, + "uptime": "2h15m30s" +} +``` + +### 5.2 Enhanced Metrics Endpoint + +**Endpoint**: `GET /metrics` + +**Response**: +```json +{ + "global": { + "webhooks": { + "received": 150, + "processed": 145, + "failed": 5, + "success_rate": 96.67 + }, + "files": { + "matched": 320, + "uploaded": 310, + "upload_failed": 5, + "deprecated": 5 + } + }, + "by_source": { + "mongodb/docs-code-examples": { + "webhooks": { + "received": 100, + "processed": 98, + "failed": 2 + }, + "files": { + "matched": 200, + "uploaded": 195, + "upload_failed": 3 + }, + "last_webhook": "2025-10-15T10:30:00Z" + }, + "mongodb/atlas-examples": { + "webhooks": { + "received": 50, + "processed": 47, + "failed": 3 + }, + "files": { + "matched": 120, + "uploaded": 115, + "upload_failed": 2 + }, + "last_webhook": "2025-10-15T10:25:00Z" + } + } +} +``` + +## 6. Error Handling + +### 6.1 Error Scenarios + +| Scenario | HTTP Status | Response | Action | +|----------|-------------|----------|--------| +| Unknown source repo | 204 No Content | Empty | Log warning, ignore webhook | +| Disabled source | 204 No Content | Empty | Log info, ignore webhook | +| Config load failure | 500 Internal Server Error | Error message | Alert, retry | +| Installation auth failure | 500 Internal Server Error | Error message | Alert, retry | +| Pattern match failure | 200 OK | Success (no files matched) | Log info | +| Upload failure | 200 OK | Success (logged as failed) | Log error, alert | + +### 6.2 Error Response Format + +```json +{ + "error": "configuration error", + "message": "no configuration found for repository: mongodb/unknown-repo", + "source_repo": "mongodb/unknown-repo", + "timestamp": "2025-10-15T10:30:00Z", + "request_id": "abc123" +} +``` + +## 7. Performance Considerations + +### 7.1 Scalability + +- **Concurrent Processing**: Support up to 10 concurrent webhook processing +- **Config Caching**: Cache loaded configurations for 5 minutes +- **Token Caching**: Cache installation tokens until 5 minutes before expiry +- **Rate Limiting**: Per-source rate limiting to prevent abuse + +### 7.2 Resource Limits + +- **Max Sources**: 50 source repositories per deployment +- **Max Copy Rules**: 100 copy rules per source +- **Max Targets**: 20 targets per copy rule +- **Config Size**: 1 MB maximum config file size + +## 8. Security Considerations + +### 8.1 Authentication + +- Each source repository requires valid GitHub App installation +- Installation tokens are cached securely in memory +- Tokens are refreshed automatically before expiry + +### 8.2 Authorization + +- Verify webhook signatures for all incoming requests +- Validate source repository against configured sources +- Ensure installation has required permissions + +### 8.3 Data Protection + +- No sensitive data in logs +- Installation tokens never logged +- Audit logs contain only necessary information + +## 9. Testing Strategy + +### 9.1 Unit Tests + +- Config loading and validation +- Webhook routing logic +- Installation token management +- Metrics collection + +### 9.2 Integration Tests + +- Multi-source webhook processing +- Installation switching +- Config format conversion +- Error handling + +### 9.3 End-to-End Tests + +- Complete workflow with multiple sources +- Cross-organization copying +- Failure recovery +- Performance under load + +## 10. Deployment Strategy + +### 10.1 Rollout Plan + +1. **Phase 1**: Deploy with backward compatibility (Week 1) +2. **Phase 2**: Enable multi-source for staging (Week 2) +3. **Phase 3**: Gradual production rollout (Week 3) +4. **Phase 4**: Full production deployment (Week 4) + +### 10.2 Monitoring + +- Track metrics per source repository +- Alert on failures +- Monitor GitHub API rate limits +- Track installation token refresh + +## 11. Appendix + +### 11.1 Configuration Examples + +See `configs/copier-config.multi-source.example.yaml` + +### 11.2 Migration Guide + +See `docs/MULTI-SOURCE-MIGRATION-GUIDE.md` + +### 11.3 Implementation Plan + +See `docs/MULTI-SOURCE-IMPLEMENTATION-PLAN.md` + diff --git a/examples-copier/docs/multi-source/README.md b/examples-copier/docs/multi-source/README.md new file mode 100644 index 0000000..6570a35 --- /dev/null +++ b/examples-copier/docs/multi-source/README.md @@ -0,0 +1,217 @@ +# Multi-Source Repository Support + +## Overview + +This feature enables the examples-copier to monitor and process webhooks from **multiple source repositories** across **multiple GitHub organizations** using a **centralized configuration** approach. + +### Use Case + +Perfect for teams managing code examples across multiple repositories and organizations: + +``` +Sources (monitored repos): +├── 10gen/docs-mongodb-internal +├── mongodb/docs-sample-apps +└── mongodb/docs-code-examples + +Targets (destination repos): +├── mongodb/docs +├── mongodb/docs-realm +├── mongodb/developer-hub +└── 10gen/docs-mongodb-internal +``` + +### Key Features + +✅ **Centralized Configuration** - One config file manages all sources +✅ **Multi-Organization Support** - Works across mongodb, 10gen, mongodb-university orgs +✅ **Cross-Org Copying** - Copy from mongodb → 10gen or vice versa +✅ **Single Deployment** - One app instance handles all sources +✅ **100% Backward Compatible** - Existing single-source configs still work + +## Quick Start + +### 1. Configuration Repository Setup + +Store your config in a dedicated repository: + +``` +Repository: mongodb-university/code-example-tooling +File: copier-config.yaml +``` + +### 2. Environment Variables + +```bash +# Config Repository +CONFIG_REPO_OWNER=mongodb-university +CONFIG_REPO_NAME=code-example-tooling +CONFIG_FILE=copier-config.yaml + +# GitHub App Installations (one per org) +MONGODB_INSTALLATION_ID= +TENGEN_INSTALLATION_ID= +MONGODB_UNIVERSITY_INSTALLATION_ID= +``` + +### 3. Example Configuration + +```yaml +# File: mongodb-university/code-example-tooling/copier-config.yaml + +sources: + # Source from 10gen org + - repo: "10gen/docs-mongodb-internal" + branch: "main" + installation_id: "${TENGEN_INSTALLATION_ID}" + copy_rules: + - name: "internal-to-public" + source_pattern: + type: "prefix" + pattern: "examples/" + targets: + - repo: "mongodb/docs" + branch: "main" + path_transform: "source/code/${relative_path}" + commit_strategy: + type: "pull_request" + pr_title: "Update examples from internal docs" + + # Source from mongodb org + - repo: "mongodb/docs-code-examples" + branch: "main" + installation_id: "${MONGODB_INSTALLATION_ID}" + copy_rules: + - name: "examples-to-internal" + source_pattern: + type: "prefix" + pattern: "public/" + targets: + - repo: "10gen/docs-mongodb-internal" + branch: "main" + path_transform: "external-examples/${relative_path}" + commit_strategy: + type: "direct" +``` + +### 4. GitHub App Installation + +Install the GitHub App in **all three organizations**: + +1. **mongodb** - for mongodb/* repos (source and target) +2. **10gen** - for 10gen/* repos (source and target) +3. **mongodb-university** - for the config repo + +## Documentation + +| Document | Purpose | +|----------|---------| +| **[Implementation Plan](MULTI-SOURCE-IMPLEMENTATION-PLAN.md)** | Detailed implementation guide for developers | +| **[Technical Spec](MULTI-SOURCE-TECHNICAL-SPEC.md)** | Technical specifications and architecture | +| **[Migration Guide](MULTI-SOURCE-MIGRATION-GUIDE.md)** | How to migrate from single-source to multi-source | +| **[Quick Reference](MULTI-SOURCE-QUICK-REFERENCE.md)** | Common tasks and troubleshooting | + +## Architecture + +### Centralized Configuration Approach + +``` +Config Repo (mongodb-university/code-example-tooling) + │ + ├─ copier-config.yaml (manages all sources) + │ + ├─ Sources: + │ ├─ 10gen/docs-mongodb-internal + │ ├─ mongodb/docs-sample-apps + │ └─ mongodb/docs-code-examples + │ + └─ Targets: + ├─ mongodb/docs + ├─ mongodb/docs-realm + ├─ mongodb/developer-hub + └─ 10gen/docs-mongodb-internal +``` + +### Webhook Flow + +``` +1. Webhook arrives from mongodb/docs-code-examples + ↓ +2. App loads config from mongodb-university/code-example-tooling + ↓ +3. Router identifies source repo in config + ↓ +4. Switches to MONGODB_INSTALLATION_ID + ↓ +5. Reads changed files from source + ↓ +6. For each target: + - Switches to target org's installation ID + - Writes files to target repo +``` + +## Key Differences from Original Plan + +This implementation focuses on **centralized configuration** for a **single team** managing multiple repos across organizations: + +| Feature | This Implementation | Original Plan | +|---------|-------------------|---------------| +| **Config Storage** | Centralized (one file) | Centralized OR distributed | +| **Config Location** | Dedicated repo (3rd org) | Source repo or central | +| **Use Case** | Single team, multi-org | General purpose | +| **Complexity** | Simplified | Full-featured | +| **Multi-Tenant** | No (not needed) | Future enhancement | + +## Benefits + +### For MongoDB Docs Team + +1. **Single Source of Truth** - All copy rules in one config file +2. **Easy to Understand** - See all flows at a glance +3. **Centralized Management** - No need to update multiple repos +4. **Cross-Org Support** - Built-in support for mongodb ↔ 10gen flows +5. **Simple Deployment** - One app instance for everything + +### Operational + +1. **Reduced Infrastructure** - One deployment instead of multiple +2. **Unified Monitoring** - All metrics and logs in one place +3. **Easier Debugging** - Single config to check +4. **Better Visibility** - See all copy operations together + +## Implementation Status + +| Component | Status | +|-----------|--------| +| Documentation | ✅ Complete | +| Implementation Plan | ✅ Complete | +| Technical Spec | ✅ Complete | +| Migration Guide | ✅ Complete | +| Code Implementation | ⏳ Pending | +| Testing | ⏳ Pending | +| Deployment | ⏳ Pending | + +## Next Steps + +1. Review the [Implementation Plan](MULTI-SOURCE-IMPLEMENTATION-PLAN.md) +2. Set up GitHub App installations in all three orgs +3. Create config repository structure +4. Begin implementation (Phase 1: Core Infrastructure) +5. Test with staging environment +6. Deploy to production + +## Support + +For questions or issues: + +1. Check the [Quick Reference](MULTI-SOURCE-QUICK-REFERENCE.md) +2. Review the [Migration Guide](MULTI-SOURCE-MIGRATION-GUIDE.md) FAQ +3. Consult the [Technical Spec](MULTI-SOURCE-TECHNICAL-SPEC.md) + +--- + +**Configuration Approach**: Centralized +**Target Use Case**: MongoDB Docs Team (mongodb, 10gen, mongodb-university orgs) +**Status**: Ready for Implementation +**Last Updated**: 2025-10-15 + diff --git a/examples-copier/services/github_auth_test.go b/examples-copier/services/github_auth_test.go index 23c47ee..d922869 100644 --- a/examples-copier/services/github_auth_test.go +++ b/examples-copier/services/github_auth_test.go @@ -192,6 +192,7 @@ func TestJWTExpiry_GlobalVariable(t *testing.T) { } } +// TODO https://jira.mongodb.org/browse/DOCSP-54727 // Note: Comprehensive testing of github_auth.go would require: // 1. Mocking the Secret Manager client // 2. Mocking the GitHub API client @@ -221,4 +222,3 @@ func TestJWTExpiry_GlobalVariable(t *testing.T) { // 3. Return errors instead of calling log.Fatal // 4. Use dependency injection for HTTP client // 5. Make JWT generation and caching logic more modular - diff --git a/examples-copier/services/github_write_to_source_test.go b/examples-copier/services/github_write_to_source_test.go index f16825b..d98c88a 100644 --- a/examples-copier/services/github_write_to_source_test.go +++ b/examples-copier/services/github_write_to_source_test.go @@ -88,20 +88,20 @@ func TestDeprecationFileEnvironmentVariables(t *testing.T) { // The UpdateDeprecationFile function uses os.Getenv to read these values tests := []struct { - name string - deprecationFile string + name string + deprecationFile string }{ { - name: "default config", - deprecationFile: "deprecated-files.json", + name: "default config", + deprecationFile: "deprecated-files.json", }, { - name: "custom file", - deprecationFile: "custom-deprecated.json", + name: "custom file", + deprecationFile: "custom-deprecated.json", }, { - name: "nested path", - deprecationFile: "docs/deprecated/files.json", + name: "nested path", + deprecationFile: "docs/deprecated/files.json", }, } @@ -116,6 +116,7 @@ func TestDeprecationFileEnvironmentVariables(t *testing.T) { } } +// TODO https://jira.mongodb.org/browse/DOCSP-54727 // Note: Comprehensive testing of UpdateDeprecationFile would require: // 1. Refactoring to accept a GitHub client interface instead of using global GetRestClient() // 2. Creating mock implementations of the GitHub client @@ -132,4 +133,3 @@ func TestDeprecationFileEnvironmentVariables(t *testing.T) { // func UpdateDeprecationFile(ctx context.Context, config *configs.Config, client GitHubClient) error // // This would allow for proper unit testing with mocked dependencies. -