Mike Trout homered on his birthday tonight, and it is being reported as his 5th birthday HR.  How does that compare to baseball history?  Here are the leaders through 2019.

In [1]:
import pandas as pd
import calendar

In [2]:
# We need two tables: daily for the game-by-game stats, and people to get the players' birthdays (and their names)
dailies = pd.read_parquet('../data/mine/daily.parquet')
people = pd.read_parquet('../data/bd/people.parquet')[['retro_id', 'name_first', 'name_last', 'birth_month', 'birth_day']]

In [3]:
# Find games where batters homered, and add extract the month/day to join on birthday
hr_games=dailies[dailies['b_hr']>0][['game_dt', 'player_id', 'b_hr']]
hr_games['mo'] = hr_games['game_dt'].apply(lambda dt: dt.month)
hr_games['day'] = hr_games['game_dt'].apply(lambda dt: dt.day)
hr_games

Unnamed: 0,game_dt,player_id,b_hr,mo,day
10,1933-07-06,ruthb101,1.0,7,6
17,1933-07-06,frisf101,1.0,7,6
52,1935-07-08,foxxj101,1.0,7,8
68,1937-07-07,gehrl101,1.0,7,7
96,1939-07-11,dimaj101,1.0,7,11
...,...,...,...,...,...
5107018,1871-07-08,burrh101,1.0,7,8
5107021,1871-07-08,hallg101,2.0,7,8
5107023,1871-07-08,mille105,1.0,7,8
5107033,1871-07-08,selmf101,1.0,7,8


In [4]:
# Find HRs on players' birthdays, by merging the daily HR table with the people table
bd_hr = pd.merge(left=hr_games, right=people, left_on=['player_id', 'mo', 'day'], right_on=['retro_id', 'birth_month', 'birth_day'])

# Total up for careers, keeping the name and birthday fields (using max to get a value, but they should all be the same)
bd_hr_career = bd_hr.groupby(['player_id']) \
    .agg({'b_hr': sum, 'mo': max, 'day': max, 'name_first': max, 'name_last': max}) \
    .sort_values(by='b_hr', ascending=False)
bd_hr_career

Unnamed: 0_level_0,b_hr,mo,day,name_first,name_last
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rodra001,6.0,7,27,Alex,Rodriguez
reynm001,6.0,8,3,Mark,Reynolds
simma101,5.0,5,22,Al,Simmons
heltt001,5.0,8,20,Todd,Helton
lee-d002,5.0,9,6,Derrek,Lee
...,...,...,...,...,...
hayev001,1.0,8,31,Von,Hayes
harsj101,1.0,7,12,Jack,Harshman
harrw001,1.0,6,22,Willie,Harris
harrj101,1.0,9,13,John,Harris


In [5]:
# Write out the players with 4 or more birthday homers

def format_and_print_row(row):
    player_name = f'{row["name_first"]} {row["name_last"]}'
    bday = f'{calendar.month_abbr[row["mo"]]} {row["day"]:>2}'
    hr = int(row['b_hr'])
    print(f'{player_name:<17} {hr:>3}   ({bday:<6})')
    
bd_hr_career[bd_hr_career['b_hr']>=4].apply(format_and_print_row, axis=1)
None

Alex Rodriguez      6   (Jul 27)
Mark Reynolds       6   (Aug  3)
Al Simmons          5   (May 22)
Todd Helton         5   (Aug 20)
Derrek Lee          5   (Sep  6)
Chipper Jones       5   (Apr 24)
Jason Thompson      4   (Jul  6)
Lou Gehrig          4   (Jun 19)
Aramis Ramirez      4   (Jun 25)
Nomar Garciaparra   4   (Jul 23)
Duke Snider         4   (Sep 19)
Tony Perez          4   (May 14)
Andruw Jones        4   (Apr 23)
Lance Parrish       4   (Jun 15)
Kirk Gibson         4   (May 28)
Jose Canseco        4   (Jul  2)
Gus Zernial         4   (Jun 27)
Kendrys Morales     4   (Jun 20)
Mike Trout          4   (Aug  7)
Joe Morgan          4   (Sep 19)
Justin Morneau      4   (May 15)
Garret Anderson     4   (Jun 30)
