diff --git a/gh-cli/README.md b/gh-cli/README.md index 1de05b4..aa51f94 100644 --- a/gh-cli/README.md +++ b/gh-cli/README.md @@ -601,41 +601,102 @@ Gets the status of Actions on a repository (ie, if Actions are disabled) Returns a list of all actions used in an organization using the SBOM API -Example output: +Usage: + +- `./get-actions-usage-in-organization.sh [count-by-version|count-by-action] [txt|csv|md] [--resolve-shas] [--dedupe-by-repo]` + +Examples: + +- `./get-actions-usage-in-organization.sh joshjohanning-org count-by-version txt > output.txt` +- `./get-actions-usage-in-organization.sh joshjohanning-org count-by-action md > output.md` +- `./get-actions-usage-in-organization.sh joshjohanning-org count-by-version txt --resolve-shas > output.txt` +- `./get-actions-usage-in-organization.sh joshjohanning-org count-by-action txt --dedupe-by-repo > output.txt` + +Output formats: + +- `txt` (default) - Plain text format +- `csv` - Comma-separated values +- `md` - Markdown table format + +Count methods: + +- `count-by-version` (default) - Count actions by version (actions/checkout@v2 separate from actions/checkout@v3) +- `count-by-action` - Count actions by name only (versions stripped) + +Optional flags: + +- `--resolve-shas` - Resolve commit SHAs to their corresponding tags (works with count-by-version only) +- `--dedupe-by-repo` - Count unique repositories per action (works with count-by-action only) + +Example output (count-by-version) (with `--resolve-shas`): ```csv -71 actions/checkout@3 -42 actions/checkout@2 -13 actions/upload-artifact@2 -13 actions/setup-node@3 +Count,Action +4 actions/upload-artifact@v4 +3 actions/setup-node@v3 +2,actions/checkout@v4.3.0 +2,actions/checkout@main +2,actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # sha not associated to tag +2,actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 +2,actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 +1,actions/dependency-review-action@v4 +1,actions/checkout@v4 ``` -Or (`count-by-action` option to count by action as opposed to action@version): +Example output (count-by-action) (with `--dedupe-by-repo`): ```csv -130 actions/checkout -35 actions/upload-artifact -27 actions/github-script -21 actions/setup-node +Count,Action +3,actions/checkout +2,actions/upload-artifact +2,actions/setup-node +1,actions/dependency-review-action ``` +> [!TIP] +> If outputting to `txt` or `md`, you'll see a warning message for each repository that returned an error (because Dependency Graph is disabled). You will also see an informational message providing context around what the count is returning. `csv` returns clean data. + > [!NOTE] -> The count returned is the # of repositories that use the action - if single a repository uses the action 2x times, it will only be counted 1x +> The count returned is the # of repositories that use the `action@version` combination - if a single repository uses the `action@version` combination 2x times, it will only be counted 1x (unless using `count-by-action` in combination with `--dedupe-by-repo`, which counts unique repositories per action). Conversely, if different `action@version` combinations are being used, they will be counted separately (for example, if the same action appears twice in a repository but one uses `@v2` and one uses `@v3`, by default they will be counted separately unless using `count-by-action` in combination with `--dedupe-by-repo`). + +> [!NOTE] +> Using `--resolve-shas` will add additional API calls, but we attempt to cache tag lookups to improve performance. The cache is stored in temporary files and automatically cleaned up when the script exits. ### get-actions-usage-in-repository.sh Returns a list of all actions used in a repository using the SBOM API -Example output: +Usage: + +- `./get-actions-usage-in-repository.sh [--resolve-shas]` + +Examples: + +- `./get-actions-usage-in-repository.sh joshjohanning-org ghas-demo` +- `./get-actions-usage-in-repository.sh joshjohanning-org ghas-demo --resolve-shas` + +Optional flags: + +- `--resolve-shas` - Resolve commit SHAs to their corresponding tags + +Example output (with `--resolve-shas`): ```csv -actions/checkout@3 -github/codeql-action/analyze@2 -github/codeql-action/autobuild@2 -github/codeql-action/init@2 -actions/dependency-review-action@3 +actions/checkout@v4 +actions/dependency-review-action@v4 +ossf/scorecard-action@e38b1902ae4f44df626f11ba0734b14fb91f8f86 # sha not associated to tag +actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 +actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # v3.1.0 +github/codeql-action/upload-sarif@17573ee1cc1b9d061760f3a006fc4aac4f944fd5 # sha not associated to tag +actions/checkout@v3 +github/codeql-action/analyze@v2 +github/codeql-action/autobuild@v2 +github/codeql-action/init@v2 ``` +> [!NOTE] +> Using `--resolve-shas` will add significant time to resolve commit SHAs to their corresponding tags + ### get-all-users-in-repository.sh Gets all users who have created an issue, pull request, issue comment, or pull request comment in a repository. diff --git a/gh-cli/get-actions-usage-in-organization.sh b/gh-cli/get-actions-usage-in-organization.sh index 0c5d2ea..cdf2288 100755 --- a/gh-cli/get-actions-usage-in-organization.sh +++ b/gh-cli/get-actions-usage-in-organization.sh @@ -3,8 +3,10 @@ # Returns a list of all actions used in an organization using the SBOM API # Example usage: -# - ./get-actions-usage-in-repository.sh joshjohanning-org count-by-version txt > output.txt -# - ./get-actions-usage-in-repository.sh joshjohanning-org count-by-action md > output.md +# - ./get-actions-usage-in-organization.sh joshjohanning-org count-by-version txt > output.txt +# - ./get-actions-usage-in-organization.sh joshjohanning-org count-by-action md > output.md +# - ./get-actions-usage-in-organization.sh joshjohanning-org count-by-version txt --resolve-shas > output.txt +# - ./get-actions-usage-in-organization.sh joshjohanning-org count-by-action txt --dedupe-by-repo > output.txt # count-by-version (default): returns a count of actions by version; actions/checkout@v2 would be counted separately from actions/checkout@v3 # count-by-action: returns a count of actions by action name; only care about actions/checkout usage, not the version @@ -12,15 +14,27 @@ # Notes: # - The count returned is the # of repositories that use the action - if a single repository uses the action 2x times, it will only be counted 1x # - The script will take about 1 minute per 100 repositories +# - Using --resolve-shas will add significant time to resolve commit SHAs to their corresponding tags -if [ $# -lt 1 ] || [ $# -gt 3 ] ; then - echo "Usage: $0 | " +if [ $# -lt 1 ] || [ $# -gt 5 ] ; then + echo "Usage: $0 [--resolve-shas] [--dedupe-by-repo]" exit 1 fi org=$1 count_method=$2 report_format=$3 +resolve_shas="" +dedupe_by_repo="" + +# Parse parameters and flags +for arg in "$@"; do + if [ "$arg" == "--resolve-shas" ]; then + resolve_shas="true" + elif [ "$arg" == "--dedupe-by-repo" ]; then + dedupe_by_repo="true" + fi +done if [ -z "$count_method" ]; then count_method="count-by-version" @@ -30,6 +44,88 @@ if [ -z "$report_format" ]; then report_format="txt" fi +# Validate that --resolve-shas only works with count-by-version +if [ "$resolve_shas" == "true" ] && [ "$count_method" == "count-by-action" ]; then + echo "Error: --resolve-shas can only be used with count-by-version (not count-by-action)" >&2 + exit 1 +fi + +# Validate that --dedupe-by-repo only works with count-by-action +if [ "$dedupe_by_repo" == "true" ] && [ "$count_method" != "count-by-action" ]; then + echo "Error: --dedupe-by-repo can only be used with count-by-action" >&2 + exit 1 +fi + +# Create temporary files for caching (compatible with bash 3.2) +sha_cache_file=$(mktemp) +action_tags_cache_dir=$(mktemp -d) + +# Cleanup function to remove temp files +cleanup_cache() { + rm -f "$sha_cache_file" 2>/dev/null + rm -rf "$action_tags_cache_dir" 2>/dev/null +} +trap cleanup_cache EXIT + +# Function to resolve SHA to tag for a given action (with caching) +resolve_sha_to_tag() { + local action_with_sha="$1" + local action_name + local sha + + action_name=$(echo "$action_with_sha" | cut -d'@' -f1) + sha=$(echo "$action_with_sha" | cut -d'@' -f2) + + # Create safe filename for cache (replace / with _) + local safe_action_name=$(echo "$action_name" | tr '/' '_') + local cache_key="${safe_action_name}@${sha}" + + # Check SHA cache first + if grep -q "^${cache_key}|" "$sha_cache_file" 2>/dev/null; then + grep "^${cache_key}|" "$sha_cache_file" | cut -d'|' -f2- | head -1 + return + fi + + # Only process if it looks like a SHA (40 character hex string) + if [[ ${#sha} -eq 40 && "$sha" =~ ^[a-f0-9]+$ ]]; then + local action_cache_file="${action_tags_cache_dir}/${safe_action_name}" + + # Check if we have tags cached for this action + if [ ! -f "$action_cache_file" ]; then + # Fetch and cache all tags for this action + gh api repos/"$action_name"/git/refs/tags --paginate 2>/dev/null | \ + jq -r '.[] | "\(.object.sha)|\(.ref | sub("refs/tags/"; ""))"' 2>/dev/null > "$action_cache_file" || \ + touch "$action_cache_file" + fi + + # Look up the SHA in the cached tags + local tag_name="" + if [ -s "$action_cache_file" ]; then + # First try to find a semantic version tag (prefer v1.2.3 over v1) + tag_name=$(grep "^${sha}|" "$action_cache_file" | cut -d'|' -f2 | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+' | head -1) + + # If no semantic version found, fall back to any tag + if [ -z "$tag_name" ]; then + tag_name=$(grep "^${sha}|" "$action_cache_file" | cut -d'|' -f2 | head -1) + fi + fi + + if [ -n "$tag_name" ] && [ "$tag_name" != "null" ]; then + local result="$action_with_sha # $tag_name" + echo "${cache_key}|${result}" >> "$sha_cache_file" + echo "$result" + else + local result="$action_with_sha # sha not associated to tag" + echo "${cache_key}|${result}" >> "$sha_cache_file" + echo "$result" + fi + else + # Not a SHA, cache and return as-is + echo "${cache_key}|${action_with_sha}" >> "$sha_cache_file" + echo "$action_with_sha" + fi +} + repos=$(gh api graphql --paginate -F org="$org" -f query='query($org: String!$endCursor: String){ organization(login:$org) { repositories(first:100,after: $endCursor) { @@ -57,28 +153,125 @@ elif [ "$report_format" == "csv" ]; then echo "Count,Action" fi -actions=() +actions="" +repos_without_dependency_graph=() for repo in $repos; do - actions+=$(gh api repos/$repo/dependency-graph/sbom --jq '.sbom.packages[].externalRefs.[0].referenceLocator' 2>&1 | grep "pkg:githubactions" | sed 's/pkg:githubactions\///') || true - actions+="\n" + # Try to get SBOM data - if it fails, dependency graph is likely disabled + sbom_data=$(gh api repos/$repo/dependency-graph/sbom --jq '.sbom.packages[].externalRefs.[0].referenceLocator' 2>&1) + + # Also check if the API call returned an HTTP error code + if echo "$sbom_data" | grep -q "HTTP "; then + repos_without_dependency_graph+=("$repo") + continue + fi + + repo_actions=$(echo "$sbom_data" | grep "pkg:githubactions" | sed 's/pkg:githubactions\///' | sed 's/%2A/*/g' 2>/dev/null || true) + if [ "$dedupe_by_repo" == "true" ]; then + # For dedupe mode, prefix each action with the repo name so we can track repo usage + # Use awk to avoid sed delimiter issues with special characters + repo_actions=$(echo "$repo_actions" | awk -v repo="$repo" '{print repo "|" $0}') + fi + actions+="$repo_actions"$'\n' done # clean up extra spaces -results=$(echo -e "${actions[@]}" | tr -s '\n' '\n' | sed 's/\n\n/\n/g') +results=$(echo -e "$actions" | tr -s '\n' '\n' | sed 's/\n\n/\n/g') + +# convert version patterns like 4.*.* to v4 format +results=$(echo -e "$results" | sed 's/@\([0-9]\)\.\*\.\*/@v\1/g') + +# convert semantic version numbers like @4.3.0 to @v4.3.0 (but not if they already have v, are branches, or are SHAs) +results=$(echo -e "$results" | sed 's/@\([0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\)/@v\1/g') + +# resolve SHAs to tags if requested +if [ "$resolve_shas" == "true" ]; then + # Create temporary file to store resolved results + temp_results="" + + # Process each line and resolve SHAs + while IFS= read -r line; do + if [ -n "$line" ] && [ "$line" != " " ]; then + resolved_line=$(resolve_sha_to_tag "$line") + if [ -n "$resolved_line" ] && [ "$resolved_line" != " " ]; then + temp_results+="$resolved_line"$'\n' + fi + fi + done <<< "$results" + + # Clean up any trailing newlines + results=$(echo -e "$temp_results" | sed '/^$/d') +fi # if count_method=count-by-action, then remove the version from the action name if [ "$count_method" == "count-by-action" ]; then - results=$(echo -e "${results[@]}" | sed 's/@.*//g') + results=$(echo -e "$results" | sed 's/@.*//g') + + # If dedupe-by-repo is enabled, count unique repositories per action + if [ "$dedupe_by_repo" == "true" ]; then + # Each line now looks like: "repo|action" + # We want to count unique repos per action + temp_results="" + for action in $(echo -e "$results" | cut -d'|' -f2 | sort | uniq); do + repo_count=$(echo -e "$results" | grep "|$action$" | cut -d'|' -f1 | sort | uniq | wc -l) + temp_results+="$repo_count $action"$'\n' + done + results="$temp_results" + else + # Strip repo prefixes if they exist (but shouldn't in non-dedupe mode) + results=$(echo -e "$results" | sed 's/^[^|]*|//') + fi fi -results=$(echo -e "$results" | sort | uniq -c | sort -nr | awk '{print $1 " " $2}') +if [ "$count_method" == "count-by-action" ] && [ "$dedupe_by_repo" == "true" ]; then + # Results are already formatted as "count action" from the dedupe logic + results=$(echo -e "$results" | sed '/^$/d' | sort -nr | awk '{$1=$1; print $1 " " substr($0, index($0, $2))}') +else + # Standard processing: count occurrences + results=$(echo -e "$results" | sed '/^$/d' | sort | uniq -c | sort -nr | awk '{$1=$1; print $1 " " substr($0, index($0, $2))}') +fi # if report_format = md if [ "$report_format" == "md" ]; then - echo -e "${results[@]}" | awk '{print "| " $1 " | " $2 " |"}' + echo -e "$results" | awk '{print "| " $1 " | " substr($0, index($0, $2)) " |"}' elif [ "$report_format" == "csv" ]; then - echo -e "${results[@]}" | awk '{print $1 "," $2}' + echo -e "$results" | awk '{print $1 "," substr($0, index($0, $2))}' else - echo -e "${results[@]}" + echo -e "$results" +fi + +# Add explanatory note for count-by-action mode (but not for CSV) +if [ "$count_method" == "count-by-action" ] && [ "$report_format" != "csv" ]; then + if [ "$dedupe_by_repo" == "true" ]; then + note_text="Count represents the number of repositories using each action (deduplicated per repository)." + else + note_text="Count represents unique action@version combinations (versions stripped). Each repository using different versions of the same action contributes multiple counts." + fi + echo "" + if [ "$report_format" == "md" ]; then + echo "📝 **Note**: $note_text" + elif [ "$report_format" == "txt" ]; then + echo "📝 Note: $note_text" + fi +fi + +# Add explanatory note for count-by-version mode (but not for CSV) +if [ "$count_method" == "count-by-version" ] && [ "$report_format" != "csv" ]; then + note_text="Count represents unique action@version combinations (with each unique action@version combination only showing up once per repository)." + echo "" + if [ "$report_format" == "md" ]; then + echo "📝 **Note**: $note_text" + elif [ "$report_format" == "txt" ]; then + echo "📝 Note: $note_text" + fi +fi + +# Show warning about repos that couldn't be analyzed (but not for CSV) +if [ ${#repos_without_dependency_graph[@]} -gt 0 ] && [ "$report_format" != "csv" ]; then + echo "" >&2 + echo "⚠️ Warning: The following repositories could not be analyzed (likely due to disabled Dependency Graph or permissions):" >&2 + for repo in "${repos_without_dependency_graph[@]}"; do + echo " - $repo" >&2 + done + echo "" >&2 fi diff --git a/gh-cli/get-actions-usage-in-repository.sh b/gh-cli/get-actions-usage-in-repository.sh index 7fd3845..af8717f 100755 --- a/gh-cli/get-actions-usage-in-repository.sh +++ b/gh-cli/get-actions-usage-in-repository.sh @@ -4,13 +4,93 @@ # Example usage: # - ./get-actions-usage-in-repository.sh joshjohanning-org ghas-demo +# - ./get-actions-usage-in-repository.sh joshjohanning-org ghas-demo --resolve-shas -if [ $# -ne "2" ]; then - echo "Usage: $0 " +# Notes: +# - Using --resolve-shas will add significant time to resolve commit SHAs to their corresponding tags + +if [ $# -lt 2 ] || [ $# -gt 3 ]; then + echo "Usage: $0 [--resolve-shas]" exit 1 fi org=$1 repo=$2 +resolve_shas="" + +# Parse parameters and flags +for arg in "$@"; do + if [ "$arg" == "--resolve-shas" ]; then + resolve_shas="true" + fi +done + +# Function to resolve SHA to tag for a given action +resolve_sha_to_tag() { + local action_with_sha="$1" + local action_name + local sha + + action_name=$(echo "$action_with_sha" | cut -d'@' -f1) + sha=$(echo "$action_with_sha" | cut -d'@' -f2) + + # Only process if it looks like a SHA (40 character hex string) + if [[ ${#sha} -eq 40 && "$sha" =~ ^[a-f0-9]+$ ]]; then + # Try to find a tag that points to this commit SHA + local tag_name + # First try to find a semantic version tag (prefer v1.2.3 over v1) + tag_name=$(gh api repos/"$action_name"/git/refs/tags --paginate 2>/dev/null | jq -r --arg sha "$sha" '.[] | select(.object.sha == $sha) | .ref | sub("refs/tags/"; "")' 2>/dev/null | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+' | head -1) + + # If no semantic version found, fall back to any tag + if [ -z "$tag_name" ]; then + tag_name=$(gh api repos/"$action_name"/git/refs/tags --paginate 2>/dev/null | jq -r --arg sha "$sha" '.[] | select(.object.sha == $sha) | .ref | sub("refs/tags/"; "")' 2>/dev/null | head -1) + fi + + if [ -n "$tag_name" ] && [ "$tag_name" != "null" ]; then + echo "$action_with_sha # $tag_name" + else + echo "$action_with_sha # sha not associated to tag" + fi + else + echo "$action_with_sha" + fi +} + +# Try to get SBOM data - if it fails, dependency graph is likely disabled +sbom_data=$(gh api repos/$org/$repo/dependency-graph/sbom --jq '.sbom.packages[].externalRefs.[0].referenceLocator' 2>&1) + +# Also check if the API call returned an HTTP error code +if echo "$sbom_data" | grep -q "HTTP "; then + echo "❌ Error: Unable to access SBOM data for repository $org/$repo" >&2 + echo " This may be due to insufficient permissions or the Dependency Graph being disabled." >&2 + exit 1 +fi + +results=$(echo "$sbom_data" | grep "pkg:githubactions" | sed 's/pkg:githubactions\///' | sed 's/%2A/*/g' 2>/dev/null || true) + +# convert version patterns like 4.*.* to v4 format +results=$(echo -e "$results" | sed 's/@\([0-9]\)\.\*\.\*/@v\1/g') + +# convert semantic version numbers like @4.3.0 to @v4.3.0 (but not if they already have v, are branches, or are SHAs) +results=$(echo -e "$results" | sed 's/@\([0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\)/@v\1/g') + +# resolve SHAs to tags if requested +if [ "$resolve_shas" == "true" ]; then + # Create temporary file to store resolved results + temp_results="" + + # Process each line and resolve SHAs + while IFS= read -r line; do + if [ -n "$line" ] && [ "$line" != " " ]; then + resolved_line=$(resolve_sha_to_tag "$line") + if [ -n "$resolved_line" ] && [ "$resolved_line" != " " ]; then + temp_results+="$resolved_line"$'\n' + fi + fi + done <<< "$results" + + # Clean up any trailing newlines + results=$(echo -e "$temp_results" | sed '/^$/d') +fi -gh api repos/$org/$repo/dependency-graph/sbom --jq '.sbom.packages[].externalRefs.[0].referenceLocator' | grep "pkg:githubactions" | sed 's/pkg:githubactions\///' +echo -e "$results"