# Which player has played in the most no-hitters?

Kyle Seager played in his 8th no-hitter this week against John Means and the Orioles.  Is that a record?  That can be straightforward to query from retrosheet data:

* Generate a list of no-hitters (any row in gamelog-teams where a team had 0 hits)
* Get all the player appearances for those games (by merging the list of no-hitters with the dailies table)
* Count up each player's appearances and sort

In [1]:
import pandas as pd
import numpy as np
import boxball_loader as bbl
import baseball_stats_utils as  utils

In [2]:
# Get a list of no-hitters (from gamelog-teams)

def get_no_hitters():
    glt = bbl.load_gamelog_teams(bbl.GameType.ALL, seasons=bbl.Eras.All)
    nonos = glt[glt['h']==0]
    cols = ['game_id', 'date', 'yr', 'game_type', 'team', 'opp', 'HA']
    return nonos[cols].rename(columns={'team': 'tm_bat', 'opp': 'tm_pit'})


In [3]:
nonos = get_no_hitters()
nonos

Unnamed: 0,game_id,date,yr,game_type,tm_bat,tm_pit,HA
67264,MLA190106300,1901-06-30,1901,RS,MLA,CLE,H
67373,SLN190107150,1901-07-15,1901,RS,SLN,NY1,H
215669,CHA190209201,1902-09-20,1902,RS,DET,CHA,A
193594,SLN190309142,1903-09-14,1903,RS,SLN,NY1,H
193617,CHN190309182,1903-09-18,1903,RS,CHN,PHI,H
...,...,...,...,...,...,...,...
7867,ANA201907120,2019-07-12,2019,RS,SEA,ANA,A
8167,HOU201908030,2019-08-03,2019,RS,SEA,HOU,A
8566,TOR201909010,2019-09-01,2019,RS,TOR,HOU,H
161088,CHA202008250,2020-08-25,2020,RS,PIT,CHA,A


In [4]:
# Find all players who've played in no-hitters (from dailies)
def get_players_in_games(games):
    df = bbl.load_dailies(game_types=bbl.GameType.ALL)
    appearances = pd.merge(left=games, right=df[['game_id', 'player_id', 'game_dt', 'team_id']], on='game_id')
    return appearances

In [5]:
nono_players = get_players_in_games(nonos)
nono_players

Unnamed: 0,game_id,date,yr,game_type,tm_bat,tm_pit,HA,player_id,game_dt,team_id
0,MLA190106300,1901-06-30,1901,RS,MLA,CLE,H,andej101,1901-06-30,MLA
1,MLA190106300,1901-06-30,1901,RS,MLA,CLE,H,burkj104,1901-06-30,MLA
2,MLA190106300,1901-06-30,1901,RS,MLA,CLE,H,conrw101,1901-06-30,MLA
3,MLA190106300,1901-06-30,1901,RS,MLA,CLE,H,frieb103,1901-06-30,MLA
4,MLA190106300,1901-06-30,1901,RS,MLA,CLE,H,gilbb103,1901-06-30,MLA
...,...,...,...,...,...,...,...,...,...,...
6419,MIL202009130,2020-09-13,2020,RS,MIL,CHN,H,heywj001,2020-09-13,CHN
6420,MIL202009130,2020-09-13,2020,RS,MIL,CHN,H,kipnj001,2020-09-13,CHN
6421,MIL202009130,2020-09-13,2020,RS,MIL,CHN,H,milla003,2020-09-13,CHN
6422,MIL202009130,2020-09-13,2020,RS,MIL,CHN,H,rizza001,2020-09-13,CHN


In [6]:
# Aggregate the appearances by player
# Select the first and latest nono appearance
# Count the number of no-hitters "for" and "against"
def get_nono_leaders(nono_players):
    nono_players['dt2'] = nono_players['game_dt']
    nono_players['for'] = nono_players['team_id'] == nono_players['tm_pit']
    nono_players['against'] = nono_players['team_id'] == nono_players['tm_bat']
    nono_agg = nono_players.groupby('player_id').agg({'game_id': len, 'game_dt': min, 'dt2': max, 'for': sum, 'against': sum})
    return nono_agg.rename(columns={'game_id': 'num', 'game_dt': 'first', 'dt2': 'latest'}).sort_values(by='num', ascending=False)

In [7]:
nono_leaders = get_nono_leaders(nono_players)
nono_leaders['name'] = utils.get_player_names_df(nono_leaders, 'retro_id')
nono_leaders

Unnamed: 0_level_0,num,first,latest,for,against,name
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
campb101,11,1968-05-08,1983-07-04,5,6,Bert Campaneris
jackr001,9,1968-05-08,1986-09-19,4,5,Reggie Jackson
alouf101,9,1959-06-12,1970-09-21,4,5,Felipe Alou
hooph101,8,1911-07-29,1922-04-30,6,2,Harry Hooper
davig102,8,1901-07-15,1908-10-02,5,3,George Davis
...,...,...,...,...,...,...
cunnb102,1,1924-07-17,1924-07-17,0,1,Bill Cunningham
mannr101,1,1908-06-30,1908-06-30,0,1,Rube Manning
manoj001,1,2006-10-01,2006-10-01,0,1,Julio Manon
mansj001,1,2014-05-25,2014-05-25,0,1,Jeff Manship


In [8]:
# Format into markdown
def print_nono_leaders(nono_leaders):
    df = nono_leaders[['name', 'num', 'first', 'latest', 'for', 'against']]
    df['first'] = df['first'].apply(lambda x: x.date())
    df['latest'] = df['latest'].apply(lambda x: x.date())
    md = df.to_markdown(index=False)
    print(md)

In [9]:
# Players in most no-hitters
print_nono_leaders(nono_leaders.head(20))

| name            |   num | first      | latest     |   for |   against |
|:----------------|------:|:-----------|:-----------|------:|----------:|
| Bert Campaneris |    11 | 1968-05-08 | 1983-07-04 |     5 |         6 |
| Reggie Jackson  |     9 | 1968-05-08 | 1986-09-19 |     4 |         5 |
| Felipe Alou     |     9 | 1959-06-12 | 1970-09-21 |     4 |         5 |
| Harry Hooper    |     8 | 1911-07-29 | 1922-04-30 |     6 |         2 |
| George Davis    |     8 | 1901-07-15 | 1908-10-02 |     5 |         3 |
| Orlando Cepeda  |     8 | 1959-06-12 | 1969-08-19 |     4 |         4 |
| Johnny Callison |     8 | 1960-08-18 | 1971-06-03 |     2 |         6 |
| Pee Wee Reese   |     8 | 1940-04-30 | 1956-10-08 |     6 |         2 |
| Pete Runnels    |     8 | 1952-05-15 | 1964-04-23 |     4 |         4 |
| Billy Maloney   |     8 | 1901-06-30 | 1908-08-06 |     1 |         7 |
| Burt Shotton    |     8 | 1911-07-29 | 1919-05-11 |     3 |         5 |
| Chase Utley     |     8 | 2003-04-27

In [10]:
# Most no-hitters against
print_nono_leaders(nono_leaders.sort_values(by='against', ascending=False).head(10))

| name            |   num | first      | latest     |   for |   against |
|:----------------|------:|:-----------|:-----------|------:|----------:|
| Billy Maloney   |     8 | 1901-06-30 | 1908-08-06 |     1 |         7 |
| Bert Campaneris |    11 | 1968-05-08 | 1983-07-04 |     5 |         6 |
| Johnny Callison |     8 | 1960-08-18 | 1971-06-03 |     2 |         6 |
| Doc Casey       |     7 | 1903-09-18 | 1907-09-20 |     1 |         6 |
| Tony Taylor     |     7 | 1960-08-18 | 1969-04-17 |     1 |         6 |
| Sal Bando       |     8 | 1968-05-08 | 1976-07-28 |     3 |         5 |
| John Hummel     |     6 | 1906-05-01 | 1915-04-15 |     1 |         5 |
| Deron Johnson   |     7 | 1964-04-23 | 1973-07-30 |     2 |         5 |
| Phil Lewis      |     7 | 1906-05-01 | 1908-09-05 |     2 |         5 |
| Tim Jordan      |     7 | 1906-05-01 | 1908-09-05 |     2 |         5 |


In [11]:
# Look at Campy's 11 no-hitters
nono_players[nono_players['player_id']=='campb101']

Unnamed: 0,game_id,date,yr,game_type,tm_bat,tm_pit,HA,player_id,game_dt,team_id,dt2,for,against
2892,OAK196805080,1968-05-08,1968,RS,MIN,OAK,A,campb101,1968-05-08,OAK,1968-05-08,True,False
3040,BAL196908130,1969-08-13,1969,RS,OAK,BAL,A,campb101,1969-08-13,OAK,1969-08-13,False,True
3141,CAL197007030,1970-07-03,1970,RS,OAK,CAL,A,campb101,1970-07-03,OAK,1970-07-03,False,True
3189,OAK197009210,1970-09-21,1970,RS,MIN,OAK,A,campb101,1970-09-21,OAK,1970-09-21,True,False
3403,OAK197307300,1973-07-30,1973,RS,OAK,TEX,H,campb101,1973-07-30,OAK,1973-07-30,False,True
3472,CLE197407190,1974-07-19,1974,RS,OAK,CLE,A,campb101,1974-07-19,OAK,1974-07-19,False,True
3565,OAK197509280,1975-09-28,1975,RS,CAL,OAK,A,campb101,1975-09-28,OAK,1975-09-28,True,False
3625,OAK197607280,1976-07-28,1976,RS,OAK,CHA,H,campb101,1976-07-28,OAK,1976-07-28,False,True
3698,KCA197705140,1977-05-14,1977,RS,TEX,KCA,A,campb101,1977-05-14,TEX,1977-05-14,False,True
3755,CAL197709220,1977-09-22,1977,RS,CAL,TEX,H,campb101,1977-09-22,TEX,1977-09-22,True,False
