In [1]:
import datetime
import pandas
import pathlib
from email.parser import HeaderParser

In [2]:
PEP_DIR = pathlib.Path.home() / "devel/peps"
_header_parser = HeaderParser()
print(datetime.datetime.utcnow())

2017-11-25 06:47:08.387126


In [3]:
import re
def _get_names_for_field(value):
    # Gets a list of names for the BDFL-Delegate or Author field
    lines = value.splitlines()
    entries = []
    for line in lines:
        entries.extend(line.split(","))
    names = []
    for entry in entries:
        entry = entry.strip().strip(',').strip()
        # Strip "name <email address>" email addresses
        entry = re.sub(" <.*?>", "", entry)
        # Strip "email address (name)" email addresses
        entry = re.sub(r"[^@]+?@.*? \((.*?)\)", r"\1", entry)
        if entry:
            names.append(entry)
    return names

def extract_headers(pep):   
    headers = {field.lower().replace("-", "_"):value for field, value in pep.items()}
    # First normalise BDFL delegate, and handle co-delegates
    bdfl_delegate = headers.get("bdfl_delegate")
    headers_by_bdfl_delegate = []
    if bdfl_delegate is not None:
        delegate_names = _get_names_for_field(bdfl_delegate)
        for delegate in delegate_names:
            delegate_headers = headers.copy()
            delegate_headers["bdfl_delegate"] = delegate
            headers_by_bdfl_delegate.append(delegate_headers)
    else:
        headers_by_bdfl_delegate.append(headers)
        
    authors = _get_names_for_field(headers["author"])
    headers_by_author = []
    for delegate_headers in headers_by_bdfl_delegate:
        for author in authors:
            author_headers = delegate_headers.copy()
            author_headers["author"] = author
            headers_by_author.append(author_headers)
    return headers_by_author

# With credit to http://beneathdata.com/how-to/email-behavior-analysis/
pep_headers = [_header_parser.parse(pep.open()) for pep in PEP_DIR.glob("*.txt") if not pep.name.endswith("0000.txt")]
all_peps = pandas.DataFrame(header_set for pep in pep_headers for header_set in extract_headers(pep))
final_or_active_peps = all_peps.query("status in ['Final', 'Active']")

In [4]:
def print_metrics(pep_data, prefix = "All"):
    print(prefix + " PEPs:", pep_data.pep.nunique())
    print(prefix + " PEPs authored or co-authored by Guido:", pep_data.author.value_counts()["Guido van Rossum"])
    print(prefix + " PEPs authored or co-authored by me:", pep_data.author.value_counts()["Nick Coghlan"])
    with_delegate = pep_data[pep_data["bdfl_delegate"].notnull()]
    print(prefix + " PEPs with BDFL-Delegate:", with_delegate.pep.nunique())
    print(prefix + " PEPs delegated to me:", with_delegate.bdfl_delegate.value_counts()["Nick Coghlan"])

print_metrics(all_peps)

All PEPs: 428
All PEPs authored or co-authored by Guido: 40
All PEPs authored or co-authored by me: 41
All PEPs with BDFL-Delegate: 49
All PEPs delegated to me: 27


In [5]:
print_metrics(final_or_active_peps, "Final or Active")

Final or Active PEPs: 221
Final or Active PEPs authored or co-authored by Guido: 31
Final or Active PEPs authored or co-authored by me: 15
Final or Active PEPs with BDFL-Delegate: 23
Final or Active PEPs delegated to me: 9


In [6]:
has_bdfl_delegate = all_peps["bdfl_delegate"].notnull()
all_peps[has_bdfl_delegate].bdfl_delegate.value_counts()

Nick Coghlan               27
Richard Jones              12
Donald Stufft               6
Barry Warsaw                6
Benjamin Peterson           5
Antoine Pitrou              4
Mark Shannon                3
Eric Snow                   3
INADA Naoki                 2
Martin von Löwis            2
Paul Moore                  2
Raymond Hettinger           1
Georg Brandl                1
Victor Stinner              1
Charles-François Natali     1
Brett Cannon                1
Name: bdfl_delegate, dtype: int64

In [7]:
final_or_active_peps.author.value_counts()

Guido van Rossum                             31
Barry Warsaw                                 20
Nick Coghlan                                 15
Brett Cannon                                 15
Raymond Hettinger                             9
Victor Stinner                                8
Georg Brandl                                  8
Martin von Löwis                              8
Tim Peters                                    7
Marc-André Lemburg                            6
Antoine Pitrou                                6
A.M. Kuchling                                 5
Eric Snow                                     5
Martin v. Löwis                               4
Talin                                         4
Ka-Ping Yee                                   4
Jeremy Hylton                                 4
Yury Selivanov                                4
Anthony Baxter                                4
Paul Moore                                    4
Collin Winter                           

In [8]:
withdrawn_or_deferred_peps = all_peps.query("status in ['Withdrawn', 'Deferred']")

In [9]:
withdrawn_or_deferred_peps.author.value_counts()

Nick Coghlan                            16
Barry Warsaw                             4
A.M. Kuchling                            4
Antoine Pitrou                           2
Georg Brandl                             2
Guido van Rossum                         2
Jeremy Hylton                            2
Skip Montanaro                           2
Donald Stufft                            2
Alex Martelli                            2
Paul Prescod                             2
Reid Kleckner                            1
Phillip J. Eby                           1
Sean Reifschneider                       1
Benoit Bryon                             1
Jack Diederich                           1
Victor Stinner                           1
Martin von Löwis                         1
Ben Finney                               1
Chris McDonough                          1
Clark C. Evans                           1
Armin Ronacher                           1
Jervis Whitley                           1
Collin Wint