In [1]:
import pandas as pd
import boxball_loader as bbl
import baseball_stats_utils as bsu

In [2]:
# Get the MVP vote totals from the Lahman database
mvp_votes = pd.read_parquet('../data/baseballdatabank/awards_share_players.parquet').query('award_id=="MVP"')
mvp_votes.sample(10)

Unnamed: 0,award_id,year_id,lg_id,player_id,points_won,points_max,votes_first
2725,MVP,1956,NL,labincl01,1.0,336,0.0
5226,MVP,2003,NL,bondsba01,426.0,448,28.0
1818,MVP,1942,AL,hughste01,92.0,336,0.0
2686,MVP,1956,AL,foxne01,28.0,336,0.0
3280,MVP,1966,NL,brocklo01,2.0,280,0.0
1406,MVP,1934,NL,wanerpa01,50.0,80,
2178,MVP,1947,NL,kurowwh01,45.0,336,0.0
3097,MVP,1963,NL,callijo01,11.0,280,0.0
3523,MVP,1970,NL,grangwa01,1.0,336,0.0
6839,MVP,2016,NL,hendrky01,2.0,420,0.0


In [3]:
# What years do we have voting for?
max_year = mvp_votes['year_id'].max()
max_year

2016

In [4]:
# Generate a summary for each player (first yr on ballot, final yr, number of appearances)
vote_summary = mvp_votes.groupby('player_id').agg(first_yr = ('year_id', min), last_yr = ('year_id', max), appearances=('year_id', len))
vote_summary

Unnamed: 0_level_0,first_yr,last_yr,appearances
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aaronha01,1955,1973,19
abernte02,1967,1967,1
abramca01,1954,1954,1
abreubo01,1999,2009,7
abreujo02,2014,2015,2
...,...,...,...
zimmehe01,1912,1913,2
zimmery01,2009,2012,3
ziskri01,1974,1977,3
zitoba01,2001,2002,2


In [5]:
# Load the winners, and merge in the vote summary for each year's winner, and add player names
mvp_winners = pd.read_parquet('../data/baseballdatabank/awards_players.parquet').query('award_id=="Most Valuable Player" and year_id<=@max_year')
mvp_winners = pd.merge(left=mvp_winners, right=vote_summary, on='player_id')
mvp_winners['name'] = bsu.get_player_names_col(mvp_winners['player_id'], 'player_id')
mvp_winners


Unnamed: 0,player_id,award_id,year_id,lg_id,tie,notes,first_yr,last_yr,appearances,name
0,cobbty01,Most Valuable Player,1911,AL,,,1911,1914,4,Ty Cobb
1,schulfr01,Most Valuable Player,1911,NL,,,1911,1911,1,Frank Schulte
2,speaktr01,Most Valuable Player,1912,AL,,,1911,1914,4,Tris Speaker
3,doylela01,Most Valuable Player,1912,NL,,,1911,1913,3,Larry Doyle
4,johnswa01,Most Valuable Player,1913,AL,,,1911,1924,6,Walter Johnson
...,...,...,...,...,...,...,...,...,...,...
189,troutmi01,Most Valuable Player,2016,AL,,,2012,2016,5,Mike Trout
190,kershcl01,Most Valuable Player,2014,NL,,,2011,2015,5,Clayton Kershaw
191,donaljo02,Most Valuable Player,2015,AL,,,2013,2016,4,Josh Donaldson
192,harpebr03,Most Valuable Player,2015,NL,,,2012,2015,2,Bryce Harper


In [6]:
# Winners who were never on a ballot in another year
mvp_winners.query('appearances==1')

Unnamed: 0,player_id,award_id,year_id,lg_id,tie,notes,first_yr,last_yr,appearances,name
1,schulfr01,Most Valuable Player,1911,NL,,,1911,1911,1,Frank Schulte
9,sislege01,Most Valuable Player,1922,AL,,,1922,1922,1,George Sisler
16,ofarrbo01,Most Valuable Player,1926,NL,,,1926,1926,1,Bob O'Farrell
60,konstji01,Most Valuable Player,1950,NL,,,1950,1950,1,Jim Konstanty
131,hernawi01,Most Valuable Player,1984,AL,,,1984,1984,1,Willie Hernandez
134,mcgeewi01,Most Valuable Player,1985,NL,,,1985,1985,1,Willie McGee
158,caminke01,Most Valuable Player,1996,NL,,,1996,1996,1,Ken Caminiti


In [7]:
# Winners who had never been on a ballot before their win
firsts = mvp_winners.query('year_id==first_yr')
firsts

Unnamed: 0,player_id,award_id,year_id,lg_id,tie,notes,first_yr,last_yr,appearances,name
0,cobbty01,Most Valuable Player,1911,AL,,,1911,1914,4,Ty Cobb
1,schulfr01,Most Valuable Player,1911,NL,,,1911,1911,1,Frank Schulte
9,sislege01,Most Valuable Player,1922,AL,,,1922,1922,1,George Sisler
10,ruthba01,Most Valuable Player,1923,AL,,,1923,1932,3,Babe Ruth
11,vanceda01,Most Valuable Player,1924,NL,,,1924,1928,3,Dazzy Vance
16,ofarrbo01,Most Valuable Player,1926,NL,,,1926,1926,1,Bob O'Farrell
29,hubbeca01,Most Valuable Player,1933,NL,,,1933,1941,6,Carl Hubbell
41,waltebu01,Most Valuable Player,1939,NL,,,1939,1944,5,Bucky Walters
50,newhoha01,Most Valuable Player,1944,AL,,,1944,1948,4,Hal Newhouser
60,konstji01,Most Valuable Player,1950,NL,,,1950,1950,1,Jim Konstanty


In [8]:
# Pretty-print the modern ones
firsts[['name', 'year_id', 'lg_id']].query('year_id>1947')

Unnamed: 0,name,year_id,lg_id
60,Jim Konstanty,1950,NL
70,Willie Mays,1954,NL
81,Roger Maris,1960,AL
102,Vida Blue,1971,AL
107,Jeff Burroughs,1974,AL
108,Steve Garvey,1974,NL
109,Fred Lynn,1975,AL
118,Keith Hernandez,1979,NL
131,Willie Hernandez,1984,AL
132,Ryne Sandberg,1984,NL
