<a href="https://colab.research.google.com/github/psb-david-petty/google-colaboratory/blob/master/odrisamer_despaigne_club.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Unique MLB Players' Names

The [*Odrisamer Despaigne Club*](https://www.baseball-reference.com/players/d/despaod01.shtml) is a club of [Major League Basball](https://www.mlb.com/) players whose first and last names are unique. As of 2021/08 there are 399 members.

- This [Jupyter](http://jupyter.org/) notebook accesses and parses the player database on [https://www.baseball-reference.com/players/](https://www.baseball-reference.com/players/) and creates sets of first and last names.
- It then looks for players whose first names are unique among first names *and* whose last names are unique among last names (including the 405 players listed by last name only).
- Finally, it prints the names of the club members &mdash; strict and non-strict.

Some favorites: Creepy Crespi, Pickles Dillhoefer, Pretzels Getzien, Vinegar Bend Mizell, and Twink Twining. Non-strict members Cisco Carlos and Ewing Waddy have unique first and last names, but both their first names are not unique as a last name *and* their last names are not unique as a first name.

In [1]:
#!/usr/bin/env python3
#
# Look up baseball players and find members of the Odrisamer Despaigne Club.
#
import lxml.etree, lxml.html, os, sys, urllib.request

!pip install cssselect

def names(alphabet='abcdefghijklmnopqrstuvwxyz', verbose=False):
    """Return dictionary from baseball-reference.com alpahabetical listings."""
    result, parser = dict(), lxml.etree.HTMLParser(encoding='latin1')
    for letter in alphabet.lower():
        url = f"https://www.baseball-reference.com/players/{letter}/index.shtml"
        names = list()
        with urllib.request.urlopen(url) as response:
            doc = lxml.html.parse(response, parser=parser).getroot()
            for elem in doc.cssselect('#div_players_ a'):
                names.append(elem.text)
        if verbose: print(f"{len(names):5d} '{letter.upper()}' {names}")
        result[letter] = names
    return result

# Return l with two separated initial letters formatted into a single name."""
initials = \
    lambda l: [f"{'.'.join([ s.replace('.', '') for s in l[ :2]])}.", ] + l[2: ] \
        if all([ len(s.replace('.', '')) == 1 for s in l[ :2]]) else l

def parse(name):
    """Return first, last parsed from name on spaces. There are a number of 
    special cases:
    - When there are 2 spaces, it often means multiple first names, except a 
      last name w/ one of prefix3 or suffix3.
    - When there are 3 spaces, it often means multiple first names, or a middle
      initial, except a  last name w/ one of prefix4.
    - There is some attempt at normalization (for comparison), including:
    -- 'J C Hartman' -> 'J.C. HARTMAN'
    -- 'Steel Arm Johnny Taylor' -> 'STEEL-ARM-JOHNNY TAYLOR'
    - Formatting on baseball-reference.com is fairly consistent, so there is no 
      attempt to comprehensively cover all cases."""
    prefix3 = ['DE', 'DEL', 'DEN', 'DES', 'LA', 'LO', 'SANTO', 'ST.', 
        'VAN', 'VANDE', 'VANDER', 'VON', 'YELLOW', ]
    suffix3 = ['III', 'JR', 'JR.', 'SR.', ]
    prefix4 = ['DE', 'PONCE', 'VAN', ]
    names = initials([ n.upper() for n in name.split(' ') ])
    if len(names) == 1:
        # Last name only
        return '', name.upper()
    if len(names) == 2:
        # First Last
        return names[0], names[1]
    if len(names) == 3:
        # Three names
        if names[1] in prefix3 or names[2] in suffix3:
            return names[0], ' '.join(names[1: ])
    if len(names) == 4:
        # Four names
        if names[1] in prefix4:
            return names[0], ' '.join(names[1: ])
    # Single Last name
    return '-'.join(names[ :-1]), names[-1]

def odrisamer_despaigne(names, strict=False):
    """Return dictionaries for first and last, and list of unique, names."""
    # Create first and last name dictionaries.
    first_dict, last_dict = dict(), dict()
    for letter in players:
        for name in players[letter]:
            first, last = parse(name)
            first_dict[first] = first_dict.get(first, list()) + [last]
            last_dict[last] = last_dict.get(last, list()) + [first]
    # Calculate unique names where 1 LN for a FN and 1 FN for that LN. 
    unique = [(k, v[0], ) for k, v in first_dict.items() 
        if len(v) == 1 and len(last_dict[v[0]]) == 1 and v[0] not in first_dict['']
            and (not strict or k not in last_dict and v[0] not in first_dict)]
    return first_dict, last_dict, unique

if __name__ == '__main__':
    is_idle, is_pycharm, is_jupyter = (
        'idlelib' in sys.modules,
        int(os.getenv('PYCHARM', 0)),
        '__file__' not in globals()
    )
    if any((is_idle, is_pycharm, is_jupyter,)):
        # Parse MLB players from https://www.baseball-reference.com/players/.
        players = names(verbose=True)
        total = sum([len(players[letter]) for letter in players])
        print(f"{total:5d} {players}")
        # Find members of the Odrisamer Despaigne club and print them.
        first, last, unique = odrisamer_despaigne(names)
        print(f"{len(first):5d}     FIRST: {first}")
        print(f"{len(last):5d}      LAST: {last}")
        print(f"{len(first['']):5d} LAST ONLY: {first['']}")

        print(f"################ Odrisamer Despaigne Club members:")
        print(f"{len(unique):5d} {unique}")
        for fn, ln in unique:
            print(f"* {fn} {ln}"
                f"{'' if fn not in last else f' ({fn} is a last name)'}"
                f"{'' if ln not in first else f' ({ln} is a first name)'}")
        first, last, strict = odrisamer_despaigne(names, strict=True)
        print(f"{len(strict):5d} strict members: {strict}")
        nonstrict = set(unique) - set(strict)
        print(f"{len(nonstrict):5d} non-strict members: {nonstrict}")

  694 'A' ['David Aardsma', 'Henry Aaron', 'Tommie Aaron', 'Don Aase', 'Andy Abad', 'Fernando Abad', 'John Abadie', 'Ed Abbaticchio', 'Bert Abbey', 'Charlie Abbey', 'Cory Abbott', 'Dan Abbott', 'Fred Abbott', 'Glenn Abbott', 'Jeff Abbott', 'Jim Abbott', 'Kurt Abbott', 'Kyle Abbott', 'Ody Abbott', 'Paul Abbott', 'Al Aber', 'Frank Abercrombie', 'Reggie Abercrombie', 'Bill Abernathie', 'Brent Abernathy', 'James Abernathy', 'Ted Abernathy', 'Ted Abernathy', 'Woody Abernathy', 'Cliff Aberson', 'Harry Ables', 'Shawn Abner', 'Cal Abrams', 'George Abrams', 'Johnny Abrego', 'Albert Abreu', 'Bobby Abreu', 'Bryan Abreu', 'Eufemio Abreu', 'Joe Abreu', 'José Abreu', 'Juan Abreu', 'Tony Abreu', 'Winston Abreu', 'Bill Abstein', 'Jeremy Accardo', 'Domingo Acevedo', 'Jose Acevedo', 'Juan Acevedo', 'Alfredo Aceves', 'A.J. Achter', 'Jim Acker', 'Tom Acker', 'Dustin Ackley', 'Fritz Ackley', 'Cy Acosta', 'Ed Acosta', 'José Acosta', 'Manny Acosta', 'Merito Acosta', 'Mark Acre', 'Ronald Acuna Jr.', 'Jerry Ad