In [4]:
import pandas
import pathlib
from email.parser import HeaderParser

In [15]:
PEP_DIR = pathlib.Path.home() / "devel/peps"
_header_parser = HeaderParser()

In [134]:
import re
def _get_names_for_field(value):
    # Gets a list of names for the BDFL-Delegate or Author field
    lines = value.splitlines()
    entries = []
    for line in lines:
        entries.extend(line.split(","))
    names = []
    for entry in entries:
        entry = entry.strip().strip(',').strip()
        # Strip "name <email address>" email addresses
        entry = re.sub(" <.*?>", "", entry)
        # Strip "email address (name)" email addresses
        entry = re.sub(r"[^@]+?@.*? \((.*?)\)", r"\1", entry)
        if entry:
            names.append(entry)
    return names

def extract_headers(pep):   
    headers = {field.lower().replace("-", "_"):value for field, value in pep.items()}
    # First normalise BDFL delegate, and handle co-delegates
    bdfl_delegate = headers.get("bdfl_delegate")
    headers_by_bdfl_delegate = []
    if bdfl_delegate is not None:
        delegate_names = _get_names_for_field(bdfl_delegate)
        for delegate in delegate_names:
            delegate_headers = headers.copy()
            delegate_headers["bdfl_delegate"] = delegate
            headers_by_bdfl_delegate.append(delegate_headers)
    else:
        headers_by_bdfl_delegate.append(headers)
        
    authors = _get_names_for_field(headers["author"])
    headers_by_author = []
    for delegate_headers in headers_by_bdfl_delegate:
        for author in authors:
            author_headers = delegate_headers.copy()
            author_headers["author"] = author
            headers_by_author.append(author_headers)
    return headers_by_author

# With credit to http://beneathdata.com/how-to/email-behavior-analysis/
pep_headers = [_header_parser.parse(pep.open()) for pep in PEP_DIR.glob("*.txt") if not pep.name.endswith("0000.txt")]
all_peps = pandas.DataFrame(header_set for pep in pep_headers for header_set in extract_headers(pep))
final_or_active_peps = all_peps.query("status in ['Final', 'Active']")

In [155]:
def print_metrics(pep_data, prefix = "All"):
    print(prefix + " PEPs:", pep_data.pep.nunique())
    print(prefix + " PEPs authored or co-authored by Guido:", pep_data.author.value_counts()["Guido van Rossum"])
    print(prefix + " PEPs authored or co-authored by me:", pep_data.author.value_counts()["Nick Coghlan"])
    with_delegate = pep_data[pep_data["bdfl_delegate"].notnull()]
    print(prefix + " PEPs with BDFL-Delegate:", with_delegate.pep.nunique())
    print(prefix + " PEPs delegated to me:", with_delegate.bdfl_delegate.value_counts()["Nick Coghlan"])

print_metrics(all_peps)

All PEPs: 426
All PEPs authored or co-authored by Guido: 40
All PEPs authored or co-authored by me: 41
All PEPs with BDFL-Delegate: 47
All PEPs delegated to me: 27


In [156]:
print_metrics(final_or_active_peps, "Final or Active")

Final or Active PEPs: 217
Final or Active PEPs authored or co-authored by Guido: 31
Final or Active PEPs authored or co-authored by me: 14
Final or Active PEPs with BDFL-Delegate: 22
Final or Active PEPs delegated to me: 9


In [158]:
has_bdfl_delegate = all_peps["bdfl_delegate"].notnull()
all_peps[has_bdfl_delegate].bdfl_delegate.value_counts()

Nick Coghlan               27
Richard Jones              12
Barry Warsaw                7
Donald Stufft               6
Antoine Pitrou              4
Eric Snow                   3
Mark Shannon                3
Benjamin Peterson           3
Martin von Löwis            2
Paul Moore                  2
Charles-François Natali     1
Raymond Hettinger           1
Brett Cannon                1
Victor Stinner              1
Georg Brandl                1
Name: bdfl_delegate, dtype: int64

In [159]:
final_or_active_peps.author.value_counts()

Guido van Rossum      31
Barry Warsaw          20
Brett Cannon          15
Nick Coghlan          14
Raymond Hettinger      9
Martin von Löwis       8
Victor Stinner         7
Georg Brandl           7
Tim Peters             6
Marc-André Lemburg     6
Antoine Pitrou         6
A.M. Kuchling          5
Eric Snow              5
Ka-Ping Yee            4
Paul Moore             4
Talin                  4
Anthony Baxter         4
Jeremy Hylton          4
Collin Winter          4
Yury Selivanov         4
David Goodger          4
Martin v. Löwis        4
Benjamin Peterson      3
Steve Dower            3
Ethan Furman           3
Larry Hastings         3
Moshe Zadka            3
Neil Schemenauer       3
Neal Norwitz           3
Steven Bethard         2
                      ..
Jason Orendorff        1
Jeffrey Yasskin        1
Koos Zevenhoven        1
Daniel Holth           1
Jim J. Jewett          1
Kevin Altis            1
Ivan Levkivskyi        1
Christopher Barker     1
Mike Verdone           1


In [160]:
withdrawn_or_deferred_peps = all_peps.query("status in ['Withdrawn', 'Deferred']")

In [161]:
withdrawn_or_deferred_peps.author.value_counts()

Nick Coghlan                            15
A.M. Kuchling                            4
Barry Warsaw                             4
Skip Montanaro                           2
Guido van Rossum                         2
Jeremy Hylton                            2
Georg Brandl                             2
Antoine Pitrou                           2
Alex Martelli                            2
Paul Prescod                             2
Greg Wilson                              1
Ben Finney                               1
Collin Winter                            1
Clark C. Evans                           1
Steven Bethard                           1
Reid Kleckner                            1
Victor Stinner                           1
Roman Suzi                               1
Jonathan Riehl                           1
Thomas Heller                            1
Paul Barrett                             1
Lennart Regebro                          1
xscottg at yahoo.com (Scott Gilbert)     1
Oleg Broytm