In [3]:
import datetime
import pandas
import pathlib
from email.parser import HeaderParser

In [4]:
PEP_DIR = pathlib.Path.home() / "devel/peps"
_header_parser = HeaderParser()
print(datetime.datetime.utcnow())

2019-01-08 12:27:50.649452


In [5]:
import re
def _get_names_for_field(value):
    # Gets a list of names for the BDFL-Delegate or Author field
    lines = value.splitlines()
    entries = []
    for line in lines:
        entries.extend(line.split(","))
    names = []
    for entry in entries:
        entry = entry.strip().strip(',').strip()
        # Strip "name <email address>" email addresses
        entry = re.sub(" <.*?>", "", entry)
        # Strip "email address (name)" email addresses
        entry = re.sub(r"[^@]+?@.*? \((.*?)\)", r"\1", entry)
        if entry:
            names.append(entry)
    return names

def extract_headers(pep):   
    headers = {field.lower().replace("-", "_"):value for field, value in pep.items()}
    # First normalise BDFL delegate, and handle co-delegates
    bdfl_delegate = headers.get("bdfl_delegate")
    headers_by_bdfl_delegate = []
    if bdfl_delegate is not None:
        delegate_names = _get_names_for_field(bdfl_delegate)
        for delegate in delegate_names:
            delegate_headers = headers.copy()
            delegate_headers["bdfl_delegate"] = delegate
            headers_by_bdfl_delegate.append(delegate_headers)
    else:
        headers_by_bdfl_delegate.append(headers)
        
    authors = _get_names_for_field(headers["author"])
    headers_by_author = []
    for delegate_headers in headers_by_bdfl_delegate:
        for author in authors:
            author_headers = delegate_headers.copy()
            author_headers["author"] = author
            headers_by_author.append(author_headers)
    return headers_by_author

# With credit to http://beneathdata.com/how-to/email-behavior-analysis/
pep_headers = [_header_parser.parse(pep.open()) for pep in PEP_DIR.glob("*.txt") if not pep.name.endswith("0000.txt")]
all_peps = pandas.DataFrame(header_set for pep in pep_headers for header_set in extract_headers(pep))
final_or_active_peps = all_peps.query("status in ['Final', 'Active']")

In [6]:
def print_metrics(pep_data, prefix = "All"):
    print(prefix + " PEPs:", pep_data.pep.nunique())
    print(prefix + " PEPs authored or co-authored by Guido:", pep_data.author.value_counts()["Guido van Rossum"])
    print(prefix + " PEPs authored or co-authored by me:", pep_data.author.value_counts()["Nick Coghlan"])
    with_delegate = pep_data[pep_data["bdfl_delegate"].notnull()]
    print(prefix + " PEPs with BDFL-Delegate:", with_delegate.pep.nunique())
    print(prefix + " PEPs delegated to me:", with_delegate.bdfl_delegate.value_counts()["Nick Coghlan"])

print_metrics(all_peps)

All PEPs: 429
All PEPs authored or co-authored by Guido: 40
All PEPs authored or co-authored by me: 41
All PEPs with BDFL-Delegate: 49
All PEPs delegated to me: 27


In [7]:
print_metrics(final_or_active_peps, "Final or Active")

Final or Active PEPs: 221
Final or Active PEPs authored or co-authored by Guido: 31
Final or Active PEPs authored or co-authored by me: 15
Final or Active PEPs with BDFL-Delegate: 23
Final or Active PEPs delegated to me: 9


In [8]:
has_bdfl_delegate = all_peps["bdfl_delegate"].notnull()
all_peps[has_bdfl_delegate].bdfl_delegate.value_counts()

Nick Coghlan               27
Richard Jones              12
Barry Warsaw                6
Donald Stufft               6
Benjamin Peterson           5
Antoine Pitrou              4
Eric Snow                   3
Mark Shannon                3
Paul Moore                  2
Martin von Löwis            2
INADA Naoki                 2
Charles-François Natali     1
Brett Cannon                1
Raymond Hettinger           1
Victor Stinner              1
Georg Brandl                1
Name: bdfl_delegate, dtype: int64

In [9]:
final_or_active_peps.author.value_counts()

Guido van Rossum        31
Barry Warsaw            21
Brett Cannon            15
Nick Coghlan            15
Raymond Hettinger        9
Georg Brandl             8
Martin von Löwis         8
Tim Peters               7
Victor Stinner           7
Antoine Pitrou           6
Marc-André Lemburg       6
Eric Snow                5
A.M. Kuchling            5
Jeremy Hylton            4
David Goodger            4
Anthony Baxter           4
Paul Moore               4
Talin                    4
Ka-Ping Yee              4
Yury Selivanov           4
Martin v. Löwis          4
Collin Winter            4
Benjamin Peterson        3
Moshe Zadka              3
Larry Hastings           3
Neil Schemenauer         3
Ethan Furman             3
Neal Norwitz             3
Steve Dower              3
Thomas Wouters           2
                        ..
Neil Hodgson             1
Peter Moody              1
Ben Hoyt                 1
Walter Dörwald           1
Atsuo Ishimoto           1
Alexander Belopolsky     1
T

In [10]:
withdrawn_or_deferred_peps = all_peps.query("status in ['Withdrawn', 'Deferred']")

In [11]:
withdrawn_or_deferred_peps.author.value_counts()

Nick Coghlan                            15
A.M. Kuchling                            4
Barry Warsaw                             4
Guido van Rossum                         2
Skip Montanaro                           2
Antoine Pitrou                           2
Jeremy Hylton                            2
Alex Martelli                            2
Paul Prescod                             2
Georg Brandl                             2
Donald Stufft                            1
Jack Diederich                           1
xscottg at yahoo.com (Scott Gilbert)     1
Victor Stinner                           1
Paul Barrett                             1
Peter Schneider-Kamp                     1
Michael P. Dubner                        1
Gordon McMillan                          1
Clark C. Evans                           1
Lennart Regebro                          1
Charles R. McCreary                      1
Benoit Bryon                             1
Terence Way                              1
Travis Olip