In [1]:
from pybaseball import bwar_bat
from pybaseball import batting_stats_range
from pybaseball import playerid_reverse_lookup
import pandas as pd 
import warnings

## Create the DataFrame

##### Timeframe

In [2]:
start_year = 2021
end_year = 2021

### Player Awards
Up-To-Date data from [Sean Lahman](https://www.seanlahman.com/baseball-archive/statistics/)

In [3]:
awards = pd.read_csv('Resources/AwardsPlayers.csv').drop(columns=['tie','notes'])
mvp_awards = awards.sort_values('yearID').loc[
    (awards['yearID'] >= start_year) & (awards['awardID'] == 'Most Valuable Player')]
mvp_awards.head()

Unnamed: 0,playerID,awardID,yearID,lgID
6460,harpebr03,Most Valuable Player,2021,NL
6459,ohtansh01,Most Valuable Player,2021,AL


#### MVP names based on ID

In [4]:
def reverseLookup(mvp_awards):
    player_names = playerid_reverse_lookup(mvp_awards['playerID'], key_type='bbref')
    player_names['Name'] = player_names['name_first'].map(
        lambda x: x.title()) + ' ' + player_names['name_last'].map(lambda x: x.title())
    player_names = player_names[['Name', 'key_bbref','mlb_played_first', 'mlb_played_last']]
    return player_names.merge(mvp_awards, left_on= 'key_bbref', right_on='playerID').drop(['key_bbref'], axis=1)

mvp_awards = reverseLookup(mvp_awards)
all_awards = reverseLookup(awards)
mvp_awards.head()

Gathering player lookup table. This may take a moment.


Unnamed: 0,Name,mlb_played_first,mlb_played_last,playerID,awardID,yearID,lgID
0,Bryce Harper,2012.0,2022.0,harpebr03,Most Valuable Player,2021,NL
1,Shohei Ohtani,2018.0,2022.0,ohtansh01,Most Valuable Player,2021,AL


In [5]:
all_awards.head()

Unnamed: 0,Name,mlb_played_first,mlb_played_last,playerID,awardID,yearID,lgID
0,Hank Aaron,1954.0,1976.0,aaronha01,TSN All-Star,1956,ML
1,Hank Aaron,1954.0,1976.0,aaronha01,TSN Player of the Year,1956,NL
2,Hank Aaron,1954.0,1976.0,aaronha01,Most Valuable Player,1957,NL
3,Hank Aaron,1954.0,1976.0,aaronha01,Gold Glove,1958,NL
4,Hank Aaron,1954.0,1976.0,aaronha01,TSN All-Star,1958,ML


### Merge awards and players dataframes

In [6]:
warnings.simplefilter('ignore')


def Merger(start_year, end_year, mvp_awards, all_awards, league_type):
    war = bwar_bat()  

    for i in range(end_year-start_year +1):
        current_year = start_year + i
        if i < 1:
            data = getPlayersYearStats(current_year, war, league_type)
            mvpChecker(data, 'Name', mvp_awards, current_year, league_type)
            data = addAllAwards(data, all_awards, current_year)
        else:
            new_data = getPlayersYearStats(current_year, war, league_type)
            mvpChecker(new_data, 'Name', mvp_awards, current_year, league_type)
            new_data = addAllAwards(new_data, all_awards, current_year)
            data = pd.concat([data, new_data])
    return teamClean(data)



def getPlayersYearStats(current_year, war, league_type):
    war = war.loc[war['year_ID'] == current_year][['year_ID','name_common','WAR']]
    batting_stats = batting_stats_range(str(current_year)+'-04-07', str(current_year)+'-10-05').merge(
        war, how='left', left_on='Name', right_on='name_common').drop(['name_common', 'GDP','SH'], axis=1)
    batting_stats['Lev'] = batting_stats['Lev'].map(lambda x: x.split(',')[-1])
    return batting_stats.loc[batting_stats['Lev'] == league_type]

def mvpChecker(data, column, mvp_awards, current_year, league_type):
    for mvp in mvp_awards[column].loc[
        (mvp_awards['yearID'] == current_year) & (mvp_awards['lgID'] == league_type.split('-')[-1])]:
        data['MVP'] = data[column].map(lambda x: categorizer(mvp, x))
        
def addAllAwards(data, all_awards, current_year):
    data = data.merge(all_awards.loc[all_awards['yearID'] == current_year][
        ['Name','awardID']], how='left', on='Name')
    data['awardID'] = data['awardID'].fillna('No Award')
    data['WAR'] = data['WAR'].fillna(0)
#     data['wRC+'] = data['wRC+'].fillna(0)    
    return data

def teamClean(data):
    data['Tm'] = data['Tm'].map(lambda x: x.split(',')[-1])
    return data

def categorizer(mvp, player):
        if mvp == player:
            return 1
        else:
            return 0

## Add: WRC+
data_NL = Merger(start_year, end_year, mvp_awards, all_awards, 'Maj-NL')
data_AL = Merger(start_year, end_year, mvp_awards, all_awards, 'Maj-AL')

data_NL.head()

Unnamed: 0,Name,Age,#days,Lev,Tm,G,PA,AB,R,H,...,SB,CS,BA,OBP,SLG,OPS,year_ID,WAR,MVP,awardID
0,Cory Abbott,25,310,Maj-NL,Chicago,2,3,3,0,1,...,0,0,0.333,0.333,0.333,0.667,2021,0.03,0,No Award
1,Ronald Acuna Jr.,23,394,Maj-NL,Atlanta,78,342,280,70,80,...,17,6,0.286,0.401,0.593,0.994,2021,3.57,0,No Award
2,Willy Adames,25,309,Maj-NL,Tampa Bay,135,538,481,77,126,...,5,3,0.262,0.338,0.484,0.823,2021,0.68,0,No Award
3,Willy Adames,25,309,Maj-NL,Tampa Bay,135,538,481,77,126,...,5,3,0.262,0.338,0.484,0.823,2021,3.49,0,No Award
4,Matt Adams,32,380,Maj-NL,Colorado,22,40,36,3,6,...,0,0,0.167,0.25,0.194,0.444,2021,-0.26,0,No Award


In [7]:
data_AL.head()

Unnamed: 0,Name,Age,#days,Lev,Tm,G,PA,AB,R,H,...,SB,CS,BA,OBP,SLG,OPS,year_ID,WAR,MVP,awardID
0,José Abreu,34,310,Maj-AL,Chicago,146,630,542,82,143,...,1,0,0.264,0.352,0.482,0.834,2021,3.01,0,No Award
1,Jo Adell,22,331,Maj-AL,Los Angeles,35,140,130,17,32,...,2,1,0.246,0.295,0.408,0.703,2021,0.3,0,No Award
2,Keegan Akin,26,320,Maj-AL,Baltimore,1,2,2,0,0,...,0,0,0.0,0.0,0.0,0.0,2021,-0.02,0,No Award
3,Hanser Alberto,28,309,Maj-AL,Kansas City,99,242,228,23,61,...,3,0,0.268,0.29,0.395,0.685,2021,0.37,0,No Award
4,Tyler Alexander,26,339,Maj-AL,Detroit,1,2,2,0,0,...,0,0,0.0,0.0,0.0,0.0,2021,-0.02,0,No Award


### NL MVP Check


In [8]:
def mvpChecker(data):
    mvp_check = data.loc[data['MVP'] == 1]
    print('There are/is '+str(mvp_check['Name'].nunique())+' unique mvps')
    print(mvp_check['Name'].unique())
    return mvp_check
mvpChecker(data_NL)

There are/is 1 unique mvps
['Bryce Harper']


Unnamed: 0,Name,Age,#days,Lev,Tm,G,PA,AB,R,H,...,SB,CS,BA,OBP,SLG,OPS,year_ID,WAR,MVP,awardID
275,Bryce Harper,28,309,Maj-NL,Philadelphia,136,577,473,99,148,...,13,3,0.313,0.428,0.628,1.056,2021,5.86,1,Most Valuable Player
276,Bryce Harper,28,309,Maj-NL,Philadelphia,136,577,473,99,148,...,13,3,0.313,0.428,0.628,1.056,2021,5.86,1,Hank Aaron Award
277,Bryce Harper,28,309,Maj-NL,Philadelphia,136,577,473,99,148,...,13,3,0.313,0.428,0.628,1.056,2021,5.86,1,Silver Slugger
278,Bryce Harper,28,309,Maj-NL,Philadelphia,136,577,473,99,148,...,13,3,0.313,0.428,0.628,1.056,2021,5.86,1,TSN All-Star


### AL MVP Check 


In [9]:
mvpChecker(data_AL)

There are/is 1 unique mvps
['Shohei Ohtani']


Unnamed: 0,Name,Age,#days,Lev,Tm,G,PA,AB,R,H,...,SB,CS,BA,OBP,SLG,OPS,year_ID,WAR,MVP,awardID
310,Shohei Ohtani,26,309,Maj-AL,Los Angeles,149,618,517,97,132,...,24,10,0.255,0.374,0.588,0.962,2021,4.93,1,Most Valuable Player
311,Shohei Ohtani,26,309,Maj-AL,Los Angeles,149,618,517,97,132,...,24,10,0.255,0.374,0.588,0.962,2021,4.93,1,Silver Slugger
312,Shohei Ohtani,26,309,Maj-AL,Los Angeles,149,618,517,97,132,...,24,10,0.255,0.374,0.588,0.962,2021,4.93,1,Outstanding DH Award
313,Shohei Ohtani,26,309,Maj-AL,Los Angeles,149,618,517,97,132,...,24,10,0.255,0.374,0.588,0.962,2021,4.93,1,TSN All-Star


## Export to Postgre