forked from scikit-image/scikit-image
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mailmap.py
executable file
·70 lines (50 loc) · 1.65 KB
/
mailmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
# Requires package 'editdistance'
# A mailmap file is used (by GitHub and other tools) to associate multiple
# commit emails with one user. This helps to count number of commits,
# contributors, etc.
import subprocess
import shlex
import numpy as np
from collections import defaultdict
from editdistance import eval as dist
threshold = 5
def call(cmd):
return subprocess.check_output(shlex.split(cmd), universal_newlines=True).split('\n')
def _clean_email(email):
if not '@' in email:
return
name, domain = email.split('@')
name = name.split('+', 1)[0]
return '{}@{}'.format(name, domain).lower()
call("rm -f .mailmap")
authors = call("git log --format='%aN::%aE'")
names, emails = [], []
for (name, email) in (author.split('::') for author in authors if author.strip()):
if email not in emails:
names.append(name)
emails.append(email)
N = len(names)
D = np.zeros((N, N)) + np.infty
for i in range(1, N):
for j in range(i):
D[i, j] = dist(names[i], names[j])
for i in range(N):
dupes, = np.where(D[:, i] < threshold)
for j in dupes:
names[j] = names[i]
mailmap = defaultdict(set)
for (name, email) in zip(names, emails):
email = _clean_email(email)
if email:
mailmap[name].add(email)
for key, value in list(mailmap.items()):
if len(value) < 2 or (len(key.split()) < 2):
mailmap.pop(key)
entries = []
for name, emails in mailmap.items():
entries.append([name])
entries[-1].extend(['<{}>'.format(email) for email in emails])
entries = sorted(entries, key=lambda x: x[0].split()[-1])
for entry in entries:
print(' '.join(entry))