Skip to content

Commit

Permalink
Create tool for getting DCO sign off emails
Browse files Browse the repository at this point in the history
This tool digs through commit messages and parses out names and email
addresses from the `Signed-off-by:` tags, collects all unique email
addresses and outputs a CSV of name/email pairs.

Signed-off-by: Andrew Ross <andrross@amazon.com>
  • Loading branch information
andrross committed Nov 11, 2022
1 parent 8abfdfd commit 6ce2d02
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 0 deletions.
8 changes: 8 additions & 0 deletions bin/commands/contributors.rb
Expand Up @@ -32,4 +32,12 @@
end
end
end

g.desc 'Create name,email CSV list of all DCO signers'
g.command 'dco-csv' do |c|
c.action do |_global_options, options, _args|
org = GitHub::Organization.new(options)
puts org.commits(options).unique_dco_signers_csv
end
end
end
17 changes: 17 additions & 0 deletions lib/github/commit.rb
@@ -0,0 +1,17 @@
# frozen_string_literal: true

# frozen_string_literal: true

module GitHub
class Commit < Item
# Association list of all name/email pairs extracted from the DCO sign off
# in the commit message
def dco_signoff_names_and_mails
commit.message.scan(/Signed-off-by: (.+) <([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]+)>/)
end

def to_s
"#{repository.full_name} - #{commit.author.email} - #{sha}"
end
end
end
58 changes: 58 additions & 0 deletions lib/github/commits.rb
@@ -0,0 +1,58 @@
# frozen_string_literal: true

module GitHub
class Commits < Items
def initialize(arr_or_options)
super arr_or_options, GitHub::Commit
end

def unique_dco_signers_csv
# Create an association list of all name->email pairs, e.g:
# [['John Lennon', 'john@beatles.com'], ['Paul McCartney', 'paul@beatles.com']]
signers_list = each.map(&:dco_signoff_names_and_mails).flatten(1)

# De-dupe all entries by email address into an email=>[name1,name2] hash
email_to_names = {}
signers_list.each do |signer|
name = signer[0]
email = signer[1]
email_to_names[email] = [] unless email_to_names.include?(email)
email_to_names[email].push(name)
end

email_to_names.to_a
# For each email pick the best name and then reverse the association
.map { |e| [single_best_name(e[1]), e[0]] }
# Sort all "noreply" email addresses to the bottom (for manual curation), then sort by name
.sort_by { |e| [e[1].include?('noreply') ? 1 : 0, e[0].downcase] }
.map { |e| e.join(',') }
.join("\n")
end

def page(options)
data = $github.search_commits(query(options), per_page: 1000).items
raise 'There are 1000+ commits returned from a single query, reduce --page.' if data.size >= 1000

data.reject do |commit|
commit.commit.author.email.include?('[bot]')
end
end

def query(options = {})
GitHub::Searchables.new(options).to_a.concat(
[
"committer-date:#{options[:from]}..#{options[:to]}"
]
).compact.join(' ')
end

private

# This is a simple heuristic for picking the "best" name by choosing the
# one with the most words. For example, if we find both "paul" and
# "Paul McCartney" then we'll choose "Paul McCartney".
def single_best_name(names)
names.max { |a, b| a.split.length <=> b.split.length }
end
end
end
4 changes: 4 additions & 0 deletions lib/github/organization.rb
Expand Up @@ -28,6 +28,10 @@ def pull_requests(options = {})
@pull_requests ||= GitHub::PullRequests.new({ org: name, status: :merged }.merge(options))
end

def commits(options = {})
@commits ||= GitHub::Commits.new({ org: name }.merge(options))
end

def issues(options = {})
@issues ||= GitHub::Issues.new({ org: name }.merge(options))
end
Expand Down
2 changes: 2 additions & 0 deletions lib/tools.rb
Expand Up @@ -19,6 +19,8 @@
require_relative 'github/repos'
require_relative 'github/pull_requests'
require_relative 'github/pull_request'
require_relative 'github/commits'
require_relative 'github/commit'
require_relative 'github/contributors'
require_relative 'github/contributor'
require_relative 'github/maintainers'
Expand Down

0 comments on commit 6ce2d02

Please sign in to comment.