Permalink
Browse files

add support for multialiases, treat "Carlhuda" and "tomhuda" as multi…

…aliases
  • Loading branch information...
fxn committed Jan 11, 2015
1 parent 498012b commit d8b9749620bebabac7788e6ef1817de4ccb81a2e
@@ -123,7 +123,7 @@ def handle_false_positives(names)
end

def canonicalize(names)
names.map {|name| NamesManager.canonical_name_for(name, author_email)}
names.map {|name| NamesManager.canonical_name_for(name, author_email)}.flatten
end

def extract_contributor_names_from_message
@@ -1,5 +1,66 @@
module NamesManager
module CanonicalNames
# Returns the canonical name for +name+.
#
# Email addresses are removed, leading/trailing whitespace and
# surrounding Markdown *s are deleted. If no equivalence is known
# the canonical name is the resulting sanitized string by definition.
def canonical_name_for(name, email)
name = name.sub(/<[^>]+>/, '') # remove any email address in angles
name.strip!

# Commit 28d52c59f2cb32180ca24770bf95597ea3ad8198 for example uses
# Markdown in the commit message: [*Godfrey Chan*, *Aaron Patterson*].
# This is really exceptional so we special-case this instead of doing
# anything more generic.
name.sub!(/\A\*/, '')
name.sub!(/\*\z/, '')
disambiguate(name, email) || multialias(name) || CANONICAL_NAME_FOR[name] || name
end

private

def disambiguate(name, email)
case name
when 'abhishek'
case email
when 'abhishek.jain@vinsol.com' then 'Abhishek Jain'
when 'bigbeliever@gmail.com' then 'Abhishek Yadav'
end
when 'Sam'
case email
when 'sam.saffron@gmail.com' then 'Sam Saffron'
end
when 'root'
case email
when "mohamed.o.alnagdy\100gmail.com" then 'Mohamed Osama'
end
when 'unknown'
case email
when "agrimm\100.(none)" then 'Andrew Grimm'
when "jeko1\100.npfit.nhs.uk" then 'Jens Kolind'
end
when 'David'
case email
when "david\100loudthinking.com" then 'David Heinemeier Hansson'
when "DevilDavidWang\100gmail.com" then 'David Wang'
end
when ''
case email
when "JRadosz\100gmail.com" then 'Jarek Radosz'
end
end
end

def multialias(name)
case name
when 'Carlhuda'
['Yehuda Katz', 'Carl Lerche']
when 'tomhuda'
['Yehuda Katz', 'Tom Dale']
end
end

# canonical name => handlers, emails, typos, etc.
SEEN_ALSO_AS = {}
def self.map(canonical_name, *also_as)
@@ -958,55 +1019,5 @@ def self.map(canonical_name, *also_as)
end
end
end

# Returns the canonical name for +name+.
#
# Email addresses are removed, leading/trailing whitespace and
# surrounding Markdown *s are deleted. If no equivalence is known
# the canonical name is the resulting sanitized string by definition.
def canonical_name_for(name, email)
name = name.sub(/<[^>]+>/, '') # remove any email address in angles
name.strip!

# Commit 28d52c59f2cb32180ca24770bf95597ea3ad8198 for example uses
# Markdown in the commit message: [*Godfrey Chan*, *Aaron Patterson*].
# This is really exceptional so we special-case this instead of doing
# anything more generic.
name.sub!(/\A\*/, '')
name.sub!(/\*\z/, '')
disambiguate(name, email) || CANONICAL_NAME_FOR[name] || name
end

def disambiguate(name, email)
case name
when 'abhishek'
case email
when 'abhishek.jain@vinsol.com' then 'Abhishek Jain'
when 'bigbeliever@gmail.com' then 'Abhishek Yadav'
end
when 'Sam'
case email
when 'sam.saffron@gmail.com' then 'Sam Saffron'
end
when 'root'
case email
when "mohamed.o.alnagdy\100gmail.com" then 'Mohamed Osama'
end
when 'unknown'
case email
when "agrimm\100.(none)" then 'Andrew Grimm'
when "jeko1\100.npfit.nhs.uk" then 'Jens Kolind'
end
when 'David'
case email
when "david\100loudthinking.com" then 'David Heinemeier Hansson'
when "DevilDavidWang\100gmail.com" then 'David Wang'
end
when ''
case email
when "JRadosz\100gmail.com" then 'Jarek Radosz'
end
end
end
end
end
@@ -36,10 +36,6 @@ def handle_false_positives(name)
when /skip[ -]ci/i
when 'key'
when '.lock'
when 'Carlhuda'
['Yehuda Katz', 'Carl Lerche']
when 'tomhuda'
['Yehuda Katz', 'Tom Dale']
when "schoenm\100earthlink.net sandra.metz\100duke.edu"
name.split
when '=?utf-8?q?Adam=20Cig=C3=A1nek?='
@@ -556,6 +556,10 @@ class CanonicalNamesTest < ActiveSupport::TestCase
assert_contributor_names '50ee332', 'Dane Jensen'
end

test 'Carlhuda' do
assert_contributor_names 'c102db9', 'Yehuda Katz', 'Carl Lerche'
end

test 'CassioMarques' do
assert_contributor_names '053afbe', 'Cássio Marques'
end
@@ -4168,6 +4172,10 @@ class CanonicalNamesTest < ActiveSupport::TestCase
assert_contributor_names 'ccb87e2', 'Tom Ward'
end

test 'tomhuda' do
assert_contributor_names '00be5bd', 'Yehuda Katz', 'Tom Dale'
end

test 'TomK32' do
assert_contributor_names '235cd21', 'Thomas R. Koll'
end
@@ -149,14 +149,6 @@ class FalsePositivesTest < ActiveSupport::TestCase
assert_contributor_names 'c71b961', 'Ryan Bigg'
end

test 'Carlhuda' do
assert_contributor_names 'c102db9', 'Yehuda Katz', 'Carl Lerche'
end

test 'tomhuda' do
assert_contributor_names '00be5bd', 'Yehuda Katz', 'Tom Dale'
end

test "schoenm\100earthlink.net sandra.metz\100duke.edu" do
assert_contributor_names '242cd06', 'Michael Schoen', 'Sandi Metz'
end

0 comments on commit d8b9749

Please sign in to comment.