In [1]:
## Let's try to figure out who the shortest, heaviest baseball players are.

## Here's our overall plan of attack

# Setting Things Up ✅
## Import CSVs ✅
### Separate CSVs --> DataFrames for People, Pitching Data, Batting Data ✅
## Squish everything into one mondo DF ✅
## Add Physical Data ✅
### Height ✅
### Weight ✅
## Calculate BMI ✅
### Convert Imperial to Metric ✅
### BMI-ify ✅
### Throw BMI back into df ✅
## Calculate mean fWAR/bWAR (mWAR)
## Assemble per-position lists sorted by BMI, then mWAR

# Knocking Things Down
## Find worst team that made playoffs in 2021
### Describe team fWAR/bWAR
### Describe individual fWAR/bWAR
## Pull from BMI lists per position until high BMI roster is full
### mWAR shall be higher on a team basis.
### mWAR shall be higher per position.
# If there's enough time:
## Repeat for:
### Tallness
### Shortness
### Heavy
### Light

# If there's enough time:
## Repeat for:
### Tallness
### Shortness
### Heavy
### Light

## Setting Things Up

### Import the necessaries

In [2]:
import pandas as pd
import pybaseball
from pybaseball import bwar_pitch
from pybaseball import bwar_bat
from pybaseball import cache
from pybaseball.lahman import *
from pybaseball import chadwick_register

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 100)


In [3]:
# Constants
KG_TO_LB = 0.453592
M_TO_IN = 0.0254

In [4]:
chadwick = chadwick_register(save=True)
chadwick = pd.DataFrame(chadwick)



In [5]:
download_lahman()

## Read infinity baseball data to DataFrames

In [6]:
# a table of all player biographical info and ids
people = pd.DataFrame(people())

# park id, name, alias, city, state, and country
parks = pd.DataFrame(parks())

# all star roster data: player, year, team, league, position
allstar = pd.DataFrame(all_star_full())

# each player's games played per position for each season
appearances = pd.DataFrame(appearances())

# batting stats by year, regular season
batting = pd.DataFrame(batting())

# batting stats by year, post season
batting_post = pd.DataFrame(batting_post())

# fielding stats by year 
fielding = pd.DataFrame(fielding())

# games played in left, center, right field 
fielding_of = pd.DataFrame(fielding_of())

# LF/CF/RF splits
fielding_of_split = pd.DataFrame(fielding_of_split())

# postseason fielding 
fielding_post = pd.DataFrame(fielding_post())

# home game attendance by park by year 
home_games = pd.DataFrame(home_games())

# historical player pitching stats
pitching = pd.DataFrame(pitching())

# postseason pitching stats
pitching_post = pd.DataFrame(pitching_post())

# playoff series winners and losers 
series_post = pd.DataFrame(series_post())

# data on teams by year: record, division, stadium, attendance, etc
teams = pd.DataFrame(teams())

# current and historical franchises, whether they're still active, and their ids
teams_franchises = pd.DataFrame(teams_franchises())

# split season data for teams
teams_half = pd.DataFrame(teams_half()) 

# fangraphs batting since 2008
fangraphs_batting = pd.DataFrame(pybaseball.batting_stats_range(
    start_dt="2008-01-01", end_dt="2021-12-31"))

# fangraphs pitching since 2008
fangraphs_pitching = pd.DataFrame(pybaseball.pitching_stats_range(
    start_dt="2008-01-01", end_dt="2021-12-31"))

# fangraphs team pitching since 2008
fangraphs_team_pitching = pd.DataFrame(
    pybaseball.team_pitching(start_season="2008", end_season="2021"))

# fangraphs team batting since 2008
fangraphs_team_batting = pd.DataFrame(
    pybaseball.team_batting(start_season="2008", end_season="2021"))

# fangraphs team fielding since 2008
fangraphs_team_fielding = pd.DataFrame(
    pybaseball.team_fielding(start_season="2008", end_season="2021"))

# bref pitching WAR
bwar_pitch = pd.DataFrame(bwar_pitch(return_all=True))

# bref batting WAR
bwar_bat = pd.DataFrame(bwar_bat(return_all=True))


  table = table.drop('', 1)


In [61]:
people['KG'] = people['weight'] * KG_TO_LB
people['meters'] = people['height'] * M_TO_IN
people['BMI'] = people['KG'] / people['meters'] ** 2
people['ratio'] = people['meters'] * people['BMI']


In [69]:
chadwick.columns

Index(['nameLast', 'nameFirst', 'key_mlbam', 'retroID', 'playerID',
       'key_fangraphs', 'mlb_played_first', 'mlb_played_last'],
      dtype='object')

It's joinin' time.

In [84]:
people.columns

Index(['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry',
       'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay',
       'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast',
       'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame',
       'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio'],
      dtype='object')

In [85]:
chadwick.columns

Index(['nameLast', 'nameFirst', 'key_mlbam', 'retroID', 'playerID',
       'key_fangraphs', 'mlb_played_first', 'mlb_played_last'],
      dtype='object')

In [88]:
df = pd.merge(
    people,
    chadwick,
    left_on=["playerID", "retroID", "nameFirst", "nameLast"],
    right_on=["playerID", "retroID", "nameFirst", "nameLast"],
)

In [91]:
df.columns

Index(['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry',
       'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay',
       'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast',
       'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame',
       'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'key_mlbam',
       'key_fangraphs', 'mlb_played_first', 'mlb_played_last'],
      dtype='object')

In [90]:
bwar_bat.columns

Index(['name_common', 'age', 'mlb_ID', 'player_ID', 'year_ID', 'team_ID',
       'stint_ID', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp',
       'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher',
       'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p',
       'runs_replacement', 'runs_above_rep', 'runs_above_avg',
       'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def',
       'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG',
       'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent',
       'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off',
       'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_plus', 'TOB_lg', 'TB_lg'],
      dtype='object')

In [92]:
df = df.merge(
    bwar_bat,
    left_on=["playerID", "key_mlbam"],
    right_on=["player_ID", "mlb_ID"]
)

In [96]:
df.columns

Index(['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry',
       'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay',
       'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast',
       'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame',
       'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'key_mlbam',
       'key_fangraphs', 'mlb_played_first', 'mlb_played_last', 'name_common',
       'age', 'mlb_ID', 'player_ID', 'year_ID', 'team_ID', 'stint_ID', 'lg_ID',
       'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp', 'runs_field',
       'runs_infield', 'runs_outfield', 'runs_catcher', 'runs_good_plays',
       'runs_defense', 'runs_position', 'runs_position_p', 'runs_replacement',
       'runs_above_rep', 'runs_above_avg', 'runs_above_avg_off',
       'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def', 'WAR', 'WAR_def',
       'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG', 'oppRpG',
       'oppRpPA_rep', 'oppR

In [97]:
bwar_bat.columns

Index(['name_common', 'age', 'mlb_ID', 'player_ID', 'year_ID', 'team_ID',
       'stint_ID', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp',
       'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher',
       'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p',
       'runs_replacement', 'runs_above_rep', 'runs_above_avg',
       'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def',
       'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG',
       'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent',
       'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off',
       'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_plus', 'TOB_lg', 'TB_lg'],
      dtype='object')

In [98]:
df = df.merge(
    bwar_bat,
    left_on=[
        "G",
        "Inn",
        "OPS_plus",
        "PA",
        "TB_lg",
        "TOB_lg",
        "WAA",
        "WAA_def",
        "WAA_off",
        "WAR",
        "WAR_def",
        "WAR_off",
        "WAR_rep",
        "age",
        "lg_ID",
        "mlb_ID",
        "oppRpG",
        "oppRpG_rep",
        "oppRpPA_rep",
        "pitcher",
        "player_ID",
        "pyth_exponent",
        "pyth_exponent_rep",
        "runs_above_avg",
        "runs_above_avg_def",
        "runs_above_avg_off",
        "runs_above_rep",
        "runs_bat",
        "runs_br",
        "runs_catcher",
        "runs_defense",
        "runs_dp",
        "runs_field",
        "runs_good_plays",
        "runs_infield",
        "runs_outfield",
        "runs_position",
        "runs_position_p",
        "runs_replacement",
        "salary",
        "stint_ID",
        "teamRpG",
        "team_ID",
        "waa_win_perc",
        "waa_win_perc_def",
        "waa_win_perc_off",
        "waa_win_perc_rep",
        "year_ID",
    ],
    right_on=[
        "G",
        "Inn",
        "OPS_plus",
        "PA",
        "TB_lg",
        "TOB_lg",
        "WAA",
        "WAA_def",
        "WAA_off",
        "WAR",
        "WAR_def",
        "WAR_off",
        "WAR_rep",
        "age",
        "lg_ID",
        "mlb_ID",
        "oppRpG",
        "oppRpG_rep",
        "oppRpPA_rep",
        "pitcher",
        "player_ID",
        "pyth_exponent",
        "pyth_exponent_rep",
        "runs_above_avg",
        "runs_above_avg_def",
        "runs_above_avg_off",
        "runs_above_rep",
        "runs_bat",
        "runs_br",
        "runs_catcher",
        "runs_defense",
        "runs_dp",
        "runs_field",
        "runs_good_plays",
        "runs_infield",
        "runs_outfield",
        "runs_position",
        "runs_position_p",
        "runs_replacement",
        "salary",
        "stint_ID",
        "teamRpG",
        "team_ID",
        "waa_win_perc",
        "waa_win_perc_def",
        "waa_win_perc_off",
        "waa_win_perc_rep",
        "year_ID",
    ],
)

In [100]:
df.sample(25)

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,key_mlbam,key_fangraphs,mlb_played_first,mlb_played_last,name_common_x,age,mlb_ID,player_ID,year_ID,team_ID,stint_ID,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y
1816,alvarto01,1979.0,5.0,10.0,Venezuela,Distrito Federal,Caracas,,,,,,,Tony,Alvarez,Antonio Enrique,200.0,73.0,R,R,2002-09-04,2004-10-03,alvat002,alvarto01,90.7184,1.8542,26.386527,48.925898,407887,1587,2002.0,2004.0,Tony Alvarez,25.0,407887.0,alvarto01,2004,PIT,1,NL,45.0,24,86.0,-2.42,0.1,-0.03,-1.0,0.0,0.0,0.0,0.0,-1.0,-0.41,0.0,1.4,-2.4,-3.8,-2.8,-1.4,-0.39,-0.29,-0.15,-0.26,-0.15,-0.16,0.13,,N,4.55592,4.67092,0.08651,4.61247,1.884,1.887,0.4841,0.4883,0.4941,0.4941,63.639985,15.295,16.53,Tony Alvarez
57836,lindeji01,1962.0,1.0,10.0,USA,IL,Evanston,,,,,,,Jim,Lindeman,James William,200.0,73.0,R,R,1986-09-03,1994-08-11,lindj001,lindeji01,90.7184,1.8542,26.386527,48.925898,117781,1007618,1986.0,1994.0,Jim Lindeman,27.0,117781.0,lindeji01,1989,STL,1,NL,50.0,73,78.7,-5.51,0.23,-0.26,2.4,0.0,0.0,0.0,,2.4,-0.68,0.0,1.61,-2.2,-3.8,-6.2,1.7,-0.46,-0.73,0.16,-0.3,0.16,-0.57,0.16,,N,3.8352,3.92041,0.06954,3.89837,1.793,1.797,0.4939,0.4902,0.5027,0.4975,-14.985578,15.93,17.244,Jim Lindeman
36552,gonzaen01,1982.0,7.0,14.0,Venezuela,Bolivar,Ciudad Bolivar,,,,,,,Enrique,Gonzalez,Enrique Cesar,225.0,70.0,R,R,2006-05-28,2011-06-08,gonze002,gonzaen01,102.0582,1.778,32.283781,57.400562,434619,2568,2006.0,2011.0,Enrique Gonzalez,26.0,434619.0,gonzaen01,2009,BOS,1,AL,0.0,0,3.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,Y,,4.79788,0.08302,,,,,,,,,0.0,0.0,Enrique Gonzalez
64794,mcgratu01,1944.0,8.0,30.0,USA,CA,Martinez,2004.0,1.0,5.0,USA,TN,Brentwood,Tug,McGraw,Frank Edwin,170.0,72.0,R,L,1965-04-18,1984-09-25,mcgrt001,mcgratu01,77.11064,1.8288,23.055889,42.164611,118724,1008545,1965.0,1984.0,Tug McGraw,20.0,118724.0,mcgratu01,1965,NYM,1,NL,23.0,38,97.7,-2.88,0.02,-0.06,0.0,0.0,0.0,0.0,,0.0,0.01,2.64,0.01,-0.3,-0.3,-0.3,0.0,-0.03,-0.03,0.0,-0.03,0.0,-0.03,0.0,,Y,4.00933,4.01644,0.06668,3.99364,1.81,1.809,0.4992,0.4992,0.5,0.4974,-24.403834,7.247,8.772,Tug McGraw
101448,varnepe01,1949.0,4.0,10.0,USA,MA,Roxbury,,,,,,,Pete,Varney,Richard Fred,235.0,75.0,R,R,1973-08-26,1976-09-12,varnp101,varnepe01,106.59412,1.905,29.372661,55.954919,123663,1013372,1973.0,1976.0,Pete Varney,25.0,123663.0,varnepe01,1974,CHW,1,AL,30.0,9,67.0,-1.46,-0.3,0.0,0.0,0.0,0.0,1.0,,1.0,0.51,0.0,1.07,0.8,-0.3,-1.3,1.5,-0.03,-0.13,0.17,0.07,0.17,-0.03,0.1,,N,3.95986,4.09875,0.06918,3.97932,1.813,1.814,0.4968,0.4844,0.5187,0.4866,48.044288,9.801,10.539,Pete Varney
43839,hernaru01,1931.0,12.0,10.0,D.R.,Santiago,Santiago,,,,,,,Rudy,Hernandez,Rudolph Albert,185.0,75.0,R,R,1960-07-03,1961-05-04,hernr101,hernaru01,83.91452,1.905,23.123158,44.049617,115834,1005711,1960.0,1961.0,Rudy Hernandez,29.0,115834.0,hernaru01,1961,WSA,1,AL,0.0,7,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,-0.01,0.0,0.0,,Y,4.56197,4.56197,0.08485,4.56197,1.878,1.878,0.5,0.5,0.5,0.5,,0.0,0.0,Rudy Hernandez
85665,rowedo01,1936.0,4.0,3.0,USA,CA,Brawley,2005.0,10.0,15.0,USA,CA,Newport Beach,Don,Rowe,Donald Howard,180.0,72.0,L,L,1963-04-09,1963-07-18,rowed101,rowedo01,81.64656,1.8288,24.412118,44.644882,121503,1011253,1963.0,1963.0,Don Rowe,27.0,121503.0,rowedo01,1963,NYM,1,NL,13.0,26,54.7,-1.14,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.01,1.44,0.0,0.3,0.3,0.3,0.0,0.04,0.04,0.01,0.04,0.01,0.04,0.0,,Y,3.82016,3.80823,0.06198,3.7894,1.784,1.782,0.5014,0.5014,0.5,0.4978,33.78706,4.107,4.939,Don Rowe
39592,habyajo01,1964.0,1.0,29.0,USA,NY,Bay Shore,,,,,,,John,Habyan,John Gabriel,195.0,73.0,R,R,1985-09-29,1996-06-08,habyj001,habyajo01,88.45044,1.8542,25.726864,47.702751,115281,1005176,1985.0,1996.0,John Habyan,31.0,115281.0,habyajo01,1995,CAL,2,AL,0.0,0,32.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,Y,,5.10078,0.08892,,,,,,,,,0.0,0.0,John Habyan
103809,warstra01,1903.0,9.0,13.0,USA,OH,North Canton,1964.0,5.0,31.0,USA,OH,North Canton,Rabbit,Warstler,Harold Burton,150.0,67.0,R,R,1930-07-24,1940-09-29,warsr101,warstra01,68.0388,1.7018,23.493061,39.980491,123950,1013643,1930.0,1940.0,Rabbit Warstler,28.0,123950.0,warstra01,1932,BOS,1,AL,427.0,115,,-33.16,1.71,-0.1,-3.0,,,,,-3.0,7.18,0.0,16.01,-11.4,-27.4,-24.4,4.2,-2.68,-2.35,0.39,-1.12,0.39,-0.79,1.56,5000.0,N,5.02283,5.23474,0.09205,5.0955,1.942,1.945,0.4775,0.4799,0.5034,0.4869,42.168402,143.476,158.304,Rabbit Warstler
10188,boylebu01,1908.0,2.0,9.0,USA,OH,Cincinnati,1978.0,11.0,12.0,USA,OH,Cincinnati,Buzz,Boyle,Ralph Francis,170.0,71.0,L,L,1929-09-11,1935-09-29,boylb101,boylebu01,77.11064,1.8034,23.709925,42.758478,111328,1001285,1929.0,1935.0,Buzz Boyle,21.0,111328.0,boylebu01,1929,BSN,1,NL,66.0,17,,-1.91,0.05,0.0,0.0,,,,,0.0,-0.77,0.0,2.03,-0.6,-2.6,-2.6,-0.8,-0.24,-0.24,-0.07,-0.03,-0.07,-0.03,0.21,1950.0,N,5.26538,5.42008,0.10238,5.30039,1.964,1.966,0.4858,0.4858,0.4959,0.489,81.476199,22.737,24.687,Buzz Boyle


In [80]:
df = people.merge(chadwick, left_on='playerID', right_on='playerID', how='outer')

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst_x,nameLast_x,nameGiven,weight,height,bats,throws,debut,finalGame,retroID_x,bbrefID,KG,meters,BMI,ratio,nameLast_y,nameFirst_y,key_mlbam,retroID_y,key_fangraphs,mlb_played_first,mlb_played_last
21969,ashbyea01,,,,,,,,,,,,,,,,,,,,,,,,,,,,Ashby,Earl,-1.0,ashbe101,-1.0,1945.0,1948.0
6501,gausmke01,1991.0,1.0,6.0,USA,CO,Centennial,,,,,,,Kevin,Gausman,Kevin John,205.0,74.0,L,R,2013-05-23,2022-06-27,gausk001,gausmke01,92.98636,1.8796,26.320151,49.471356,Gausman,Kevin,592332.0,gausk001,14107.0,2013.0,2022.0
17964,stvraji01,1871.0,6.0,6.0,USA,MO,Ralls County,1937.0,6.0,12.0,USA,MT,Butte,Jim,St. Vrain,James Marcellin,175.0,69.0,R,L,1902-04-20,1902-06-14,stvrj101,st.vrji01,79.3786,1.7526,25.842696,45.291909,,,,,,,
22204,burrisa01,,,,,,,,,,,,,,,,,,,,,,,,,,,,Burris,Samuel,-1.0,,-1.0,1940.0,1940.0
24025,smithcl02,,,,,,,,,,,,,,,,,,,,,,,,,,,,Smith,Clarence,-1.0,,-1.0,1921.0,1933.0
15207,rebelar01,1914.0,3.0,4.0,USA,OH,Cincinnati,2004.0,7.0,10.0,USA,FL,Tampa,Art,Rebel,Arthur Anthony,180.0,68.0,L,L,1938-04-19,1945-09-30,rebea101,rebelar01,81.64656,1.7272,27.368603,47.271051,Rebel,Art,120973.0,rebea101,1010744.0,1938.0,1945.0
6084,frankry01,1973.0,3.0,5.0,USA,AR,Fort Smith,,,,,,,Ryan,Franklin,Ryan Ray,190.0,75.0,R,R,1999-05-15,2011-06-28,franr001,frankry01,86.18248,1.905,23.748109,45.240147,Franklin,Ryan,211041.0,franr001,1076.0,1999.0,2011.0
9403,judefr01,1884.0,11.0,11.0,USA,MN,Libby,1961.0,5.0,4.0,USA,TX,Brownsville,Frank,Jude,Frank,150.0,67.0,R,R,1906-07-09,1906-10-07,judef101,judefr01,68.0388,1.7018,23.493061,39.980491,Jude,Frank,116784.0,judef101,1006642.0,1906.0,1906.0
15048,raglato01,1946.0,6.0,16.0,USA,AL,Talladega,,,,,,,Tom,Ragland,Thomas,155.0,70.0,R,R,1971-04-05,1973-09-29,raglt101,raglato01,70.30676,1.778,22.239938,39.54261,Ragland,Tom,120888.0,raglt101,1010652.0,1971.0,1973.0
11472,marshja01,1983.0,2.0,25.0,USA,MO,St. Louis,,,,,,,Jay,Marshall,Jay William,205.0,77.0,L,L,2007-04-02,2009-09-01,marsj001,marshja01,92.98636,1.9558,24.309183,47.5439,Marshall,Jay,457444.0,marsj001,5546.0,2007.0,2009.0


In [73]:
bwar_bat.columns

Index(['name_common', 'age', 'mlb_ID', 'player_ID', 'year_ID', 'team_ID',
       'stint_ID', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp',
       'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher',
       'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p',
       'runs_replacement', 'runs_above_rep', 'runs_above_avg',
       'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def',
       'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG',
       'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent',
       'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off',
       'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_plus', 'TOB_lg', 'TB_lg'],
      dtype='object')

In [74]:
df = df.merge(bwar_bat, left_on='playerID', right_on='player_ID', how='outer')

In [77]:
df = df.merge(bwar_pitch, left_on='playerID', right_on='player_ID', how='outer')

In [79]:
df.sample(25)

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst_x,nameLast_x,nameGiven,weight,height,bats,throws,debut,finalGame,retroID_x,bbrefID,KG,meters,BMI,ratio,nameLast_y,nameFirst_y,key_mlbam,retroID_y,key_fangraphs,mlb_played_first,mlb_played_last,name_common_x,age_x,mlb_ID_x,player_ID_x,year_ID_x,team_ID_x,stint_ID_x,lg_ID_x,PA,G_x,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep_x,runs_above_avg_x,runs_above_avg_off,runs_above_avg_def,WAA_x,WAA_off,WAA_def,WAR_x,WAR_def,WAR_off,WAR_rep_x,salary_x,pitcher,teamRpG_x,oppRpG_x,oppRpPA_rep,oppRpG_rep_x,pyth_exponent_x,pyth_exponent_rep_x,waa_win_perc_x,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep_x,OPS_plus,TOB_lg,TB_lg,name_common_y,age_y,mlb_ID_y,player_ID_y,year_ID_y,team_ID_y,stint_ID_y,lg_ID_y,G_y,GS,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_y,runs_above_avg_adj,runs_above_rep_y,RpO_replacement,GR_leverage_index_avg,WAR_y,salary_y,teamRpG_y,oppRpG_y,pyth_exponent_y,waa_win_perc_y,WAA_y,WAA_adj,oppRpG_rep_y,pyth_exponent_rep_y,waa_win_perc_rep_y,WAR_rep_y,ERA_plus,ER_lg
246662,kershcl01,1988.0,3.0,19.0,USA,TX,Dallas,,,,,,,Clayton,Kershaw,Clayton Edward,225.0,76.0,L,L,2008-05-25,2022-06-28,kersc001,kershcl01,102.0582,1.9304,27.387557,52.868939,Kershaw,Clayton,477132.0,kersc001,2036.0,2008.0,2022.0,Clayton Kershaw,30.0,477132.0,kershcl01,2018.0,LAD,1.0,NL,57.0,25.0,161.3,-1.74,-0.53,0.38,0.0,0.0,0.0,0.0,0.0,0.0,0.11,8.44,0.07,6.7,6.7,6.7,0.1,0.67,0.69,-0.01,0.68,-0.01,0.7,0.01,35571429.0,Y,4.58977,4.32337,0.07868,4.24927,1.865,1.845,0.5279,0.5279,0.5005,0.492,73.132566,17.183,19.012,Clayton Kershaw,23.0,477132.0,kershcl01,2011.0,LAD,1.0,NL,33.0,33.0,700.0,700.0,0.0,66.0,107.014,3.96,,-1.262,96.0,95.441,107.119,607.0,0.1451,-8.7,41.119,40.685,59.393,0.181,1.0,6.75,500000.0,4.15524,2.92236,1.747,0.6491,4.9203,-0.1369,4.718,1.863,0.4411,1.964,161.420339,95.238
507942,whiteel04,1994.0,6.0,26.0,USA,SC,Greenville,,,,,,,Eli,White,Elijah Thomas,195.0,75.0,R,R,2020-09-01,2022-06-12,white006,whiteel04,88.45044,1.905,24.373059,46.430677,White,Eli,642201.0,white006,19346.0,2020.0,2022.0,Eli White,26.0,642201.0,whiteel04,2020.0,TEX,1.0,AL,52.0,19.0,137.0,-5.02,-0.83,0.0,1.0,0.0,1.0,0.0,0.0,2.0,-0.64,0.0,1.78,-2.7,-4.5,-6.5,1.4,-0.46,-0.66,0.13,-0.28,0.13,-0.48,0.18,,N,4.43398,4.77556,0.08767,4.68204,1.883,1.897,0.4757,0.4651,0.5072,0.4906,28.323317,16.494,19.795,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
536449,byrdbi01,,,,,,,,,,,,,,,,,,,,,,,,,,,,Byrd,Bill,-1.0,byrdb101,-1.0,1933.0,1948.0,Bill Byrd,38.0,,byrdbi01,1946.0,BEG,0.0,NN2,43.0,21.0,,-7.54,0.01,0.0,0.0,,,,,0.0,0.01,3.04,0.42,-4.1,-4.5,-4.5,0.0,-0.43,-0.43,0.0,-0.39,0.0,-0.39,0.04,,Y,4.97164,5.18498,0.09603,5.11501,1.936,1.944,0.4797,0.4797,0.5,0.4934,-20.17071,14.887,15.24,Bill Byrd,28.0,,byrdbi01,1936.0,WEG,0.0,NN2,16.0,10.0,288.0,,,51.0,61.284,0.0,,-3.598,100.0,,64.882,358.0,0.257,-14.0,13.882,15.304,25.825,0.263,1.0,2.16,,5.96508,5.00858,1.979,0.5856,1.3696,-0.1449,6.70758,2.062,0.4398,0.9367,143.319444,51.595
235794,johnto01,1943.0,5.0,22.0,USA,IN,Terre Haute,,,,,,,Tommy,John,Thomas Edward,180.0,75.0,R,L,1963-09-06,1989-05-25,johnt001,johnto01,81.64656,1.905,22.498208,42.859087,John,Tommy,116550.0,johnt001,1006515.0,1963.0,1989.0,Tommy John,34.0,116550.0,johnto01,1977.0,LAD,1.0,NL,89.0,31.0,220.3,-9.47,-1.23,0.17,0.0,0.0,0.0,0.0,,0.0,0.18,10.57,0.0,0.2,0.2,0.2,0.2,0.0,0.0,0.01,0.0,0.01,0.0,0.0,,Y,4.38346,4.37636,0.07911,4.284,1.856,1.85,0.5008,0.5008,0.5006,0.4901,16.807969,27.855,32.271,Tommy John,31.0,116550.0,johnto01,1974.0,LAD,1.0,NL,22.0,22.0,459.0,459.0,0.0,51.0,68.96,2.11,,4.076,94.0,94.89,63.571,492.0,0.1084,37.6,12.571,11.739,25.064,0.181,1.0,2.63,77000.0,4.13641,3.60282,1.792,0.5616,1.3552,-0.0727,4.70453,1.861,0.4404,1.3425,132.206818,58.171
501145,weathda01,1969.0,9.0,25.0,USA,TN,Lawrenceburg,,,,,,,David,Weathers,John David,205.0,75.0,R,R,1991-08-02,2009-10-03,weatd001,weathda01,92.98636,1.905,25.622959,48.811738,Weathers,David,124000.0,weatd001,902.0,1991.0,2009.0,David Weathers,34.0,124000.0,weathda01,2004.0,NYM,1.0,NL,0.0,29.0,33.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,-0.01,0.0,0.0,3933333.0,Y,4.67092,4.67092,0.08651,4.67092,1.89,1.89,0.5,0.5,0.5,0.5,,0.0,0.0,David Weathers,34.0,124000.0,weathda01,2004.0,NYM,1.0,NL,32.0,0.0,101.0,0.0,101.0,19.0,17.767,-1.309,,-0.864,99.0,99.628,17.258,114.0,0.0242,-35.7,-1.742,-1.867,1.222,0.203,1.5988,-0.02,3933333.0,4.67092,4.72926,1.894,0.4941,-0.1888,-0.126,4.76358,1.896,0.4907,0.2963,100.91875,16.147
368703,perezod01,1978.0,6.0,11.0,D.R.,San Juan,Las Matas de Farfan,2022.0,3.0,10.0,D.R.,Santo Domingo,Santo Domingo Este,Odalis,Perez,Odalis Amadol,225.0,72.0,L,L,1998-09-01,2008-09-28,pereo001,perezod01,102.0582,1.8288,30.515148,55.806102,Perez,Odalis,136602.0,pereo001,668.0,1998.0,2008.0,Odalis Perez,21.0,136602.0,perezod01,1999.0,ATL,1.0,NL,34.0,17.0,93.0,-6.02,-0.05,0.41,0.0,0.0,0.0,0.0,,0.0,0.06,5.02,0.0,-0.6,-0.6,-0.6,0.1,-0.07,-0.07,0.0,-0.07,0.0,-0.07,0.0,200000.0,Y,4.93969,4.97381,0.09274,4.91147,1.923,1.921,0.4967,0.4967,0.5003,0.4939,-31.692512,10.491,13.254,Odalis Perez,28.0,136602.0,perezod01,2006.0,LAD,1.0,NL,20.0,8.0,178.0,114.0,64.0,49.0,31.989,-0.053,,-1.162,100.0,98.56,32.621,227.0,0.0484,-24.0,-16.379,-16.128,-10.937,0.211,0.5425,-0.91,8750000.0,4.86675,5.67315,1.957,0.4255,-1.49,0.0615,5.13446,1.928,0.4742,0.5162,66.22,29.799
111809,detwiro01,1986.0,3.0,6.0,USA,MO,St. Louis,,,,,,,Ross,Detwiler,Ross Emery,210.0,77.0,R,L,2007-09-07,2022-06-30,detwr001,detwiro01,95.25432,1.9558,24.90209,48.703508,Detwiler,Ross,446321.0,detwr001,2859.0,2007.0,2022.0,Ross Detwiler,28.0,446321.0,detwiro01,2014.0,WSN,1.0,NL,4.0,44.0,63.0,-0.3,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.54,0.0,0.3,0.3,0.3,0.0,0.04,0.04,-0.04,0.04,-0.04,0.04,0.0,3000000.0,Y,3.99762,3.99035,0.07088,3.98739,1.808,1.807,0.5008,0.5008,0.5,0.4997,2.564103,0.975,0.805,Ross Detwiler,25.0,446321.0,detwiro01,2011.0,WSN,1.0,NL,15.0,10.0,198.0,168.0,30.0,26.0,30.853,0.616,,1.125,100.0,102.816,31.198,213.0,0.0467,24.1,5.198,5.075,10.367,0.181,1.09,1.09,,4.15524,3.81691,1.807,0.5383,0.5745,-0.0532,4.50544,1.85,0.4626,0.5667,128.095455,28.181
529139,zardojo01,1923.0,5.0,20.0,Cuba,La Habana,La Habana,2017.0,3.0,21.0,USA,FL,Tamarac,Jose,Zardon,Jose Antonio,150.0,72.0,R,R,1945-04-18,1945-09-16,zardj101,zardojo01,68.0388,1.8288,20.343432,37.204068,Zardon,Jose,124744.0,zardj101,1014419.0,1945.0,1945.0,Jose Zardon,22.0,124744.0,zardojo01,1945.0,WSH,1.0,AL,142.0,54.0,,1.42,-0.42,-0.51,0.0,,,,,0.0,-1.04,0.0,4.09,3.5,-0.6,-0.6,-1.0,-0.07,-0.08,-0.14,0.38,-0.14,0.37,0.45,,N,3.89973,3.90992,0.07109,3.8341,1.796,1.792,0.4988,0.4988,0.4978,0.4912,111.181638,44.339,44.671,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
319072,milletr02,1973.0,5.0,29.0,USA,KY,Louisville,,,,,,,Trever,Miller,Trever Douglas,200.0,75.0,R,L,1996-09-04,2011-09-24,millt002,milletr02,90.7184,1.905,24.998009,47.621207,Miller,Trever,119125.0,millt002,1672.0,1996.0,2011.0,Trever Miller,38.0,119125.0,milletr02,2011.0,STL,1.0,NL,0.0,35.0,15.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2000000.0,Y,4.15524,4.15524,0.07554,4.15524,1.828,1.828,0.5,0.5,0.5,0.5,,0.0,0.0,Trever Miller,27.0,119125.0,milletr02,2000.0,LAD,2.0,NL,2.0,0.0,7.0,0.0,7.0,6.0,1.378,-0.106,,0.018,93.0,99.0,1.241,13.0,0.0029,6.3,-4.759,-4.766,-4.537,0.219,0.36,-0.24,,5.05048,7.43347,2.053,0.3114,-0.3772,0.116,5.15985,1.939,0.4896,0.0206,21.466667,1.288
87187,cokeph01,1982.0,7.0,19.0,USA,CA,Sonora,,,,,,,Phil,Coke,Phillip Douglas,210.0,73.0,L,L,2008-09-01,2016-09-26,cokep001,cokeph01,95.25432,1.8542,27.705853,51.372193,Coke,Phil,457435.0,cokep001,5535.0,2008.0,2016.0,Phil Coke,29.0,457435.0,cokeph01,2012.0,DET,1.0,AL,0.0,3.0,54.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1100000.0,Y,4.41052,4.41052,0.07575,4.41052,1.86,1.86,0.5,0.5,0.5,0.5,,0.0,0.0,Phil Coke,25.0,457435.0,cokeph01,2008.0,NYY,1.0,AL,12.0,0.0,44.0,0.0,44.0,1.0,7.825,-0.581,,-0.299,101.0,103.731,7.824,36.0,0.008,-37.4,6.824,6.571,8.417,0.211,1.0375,0.82,,4.704,4.15642,1.862,0.5574,0.6888,-0.0297,4.83648,1.902,0.4868,0.1566,734.0,7.34


In [34]:
df_pitchers = df[df[]]

0    0.0
Name: WAR, dtype: float64

In [248]:
people = people.join(chadwick, how='left', rsuffix='_r')

In [249]:
people.head(3)

Unnamed: 0_level_0,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,nameLast_r,nameFirst_r,key_mlbam,retroID_r,key_fangraphs,mlb_played_first,mlb_played_last
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
aardsda01,1981.0,12.0,27.0,USA,CO,Denver,,,,,,,David,Aardsma,David Allan,215.0,75.0,R,R,2004-04-06,2015-08-23,aardd001,aardsda01,Aardsma,David,430911.0,aardd001,1902.0,2004.0,2015.0
aaronha01,1934.0,2.0,5.0,USA,AL,Mobile,2021.0,1.0,22.0,USA,GA,Atlanta,Hank,Aaron,Henry Louis,180.0,72.0,R,R,1954-04-13,1976-10-03,aaroh101,aaronha01,Aaron,Hank,110001.0,aaroh101,1000001.0,1954.0,1976.0
aaronto01,1939.0,8.0,5.0,USA,AL,Mobile,1984.0,8.0,16.0,USA,GA,Atlanta,Tommie,Aaron,Tommie Lee,190.0,75.0,R,R,1962-04-10,1971-09-26,aarot101,aaronto01,Aaron,Tommie,110002.0,aarot101,1000002.0,1962.0,1971.0


Now let's repeat the process with oodles of performance data from df.bwar_bat and df.bwar_pitch.

In [250]:
bwar_bat.set_index('player_ID')

Unnamed: 0_level_0,name_common,age,mlb_ID,year_ID,team_ID,stint_ID,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg
player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1
aardsda01,David Aardsma,22.0,430911.0,2004,SFG,1,NL,0.0,11,10.7,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,-0.01,0.00,-0.01,0.00,0.00,300000.0,Y,4.67092,4.67092,0.08651,4.67092,1.890,1.890,0.5000,0.5000,0.5000,0.5000,,0.000,0.000
aardsda01,David Aardsma,24.0,430911.0,2006,CHC,1,NL,3.0,43,53.0,-0.90,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.46,0.00,-0.4,-0.4,-0.4,0.0,-0.04,-0.04,-0.01,-0.04,-0.01,-0.04,0.00,,Y,4.85675,4.86675,0.09085,4.86457,1.912,1.913,0.4990,0.4990,0.5000,0.4998,-100.000000,0.694,0.896
aardsda01,David Aardsma,25.0,430911.0,2007,CHW,1,AL,0.0,2,32.3,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,387500.0,Y,4.85895,4.85895,0.08422,4.85895,1.912,1.912,0.5000,0.5000,0.5000,0.5000,,0.000,0.000
aardsda01,David Aardsma,26.0,430911.0,2008,BOS,1,AL,1.0,5,48.7,-0.29,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.14,0.00,-0.2,-0.2,-0.2,0.0,-0.02,-0.02,0.00,-0.02,0.00,-0.02,0.00,403250.0,Y,4.67400,4.70400,0.08092,4.69650,1.893,1.894,0.4970,0.4970,0.5000,0.4992,-100.000000,0.345,0.434
aardsda01,David Aardsma,27.0,430911.0,2009,SEA,1,AL,0.0,3,71.3,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,419000.0,Y,4.79788,4.79788,0.08302,4.79788,1.905,1.905,0.5000,0.5000,0.5000,0.5000,,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zwilldu01,Dutch Zwilling,26.0,124791.0,1915,CHI,1,FL,636.0,150,,28.08,-0.52,0.0,4.0,,,,,4.0,-3.95,0.00,10.44,38.1,27.6,23.6,0.1,3.17,2.65,0.06,4.14,0.06,3.62,0.97,,N,4.02777,3.87037,0.08398,3.80077,1.802,1.787,0.5210,0.5180,0.5000,0.4919,142.103386,199.044,188.238
zwilldu01,Dutch Zwilling,27.0,124791.0,1916,CHC,1,NL,59.0,35,,-5.43,0.07,0.0,-1.0,,,,,-1.0,-0.34,0.00,1.82,-4.9,-6.7,-5.7,-1.3,-0.86,-0.73,-0.17,-0.67,-0.17,-0.54,0.19,3250.0,N,3.26340,3.42625,0.06014,3.37428,1.719,1.727,0.4755,0.4791,0.4952,0.4934,7.343668,18.514,18.751
zychto01,Tony Zych,24.0,543964.0,2015,SEA,1,AL,0.0,0,18.3,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,,,,,,,,,Y,,4.30400,0.07510,,,,,,,,,0.000,0.000
zychto01,Tony Zych,25.0,543964.0,2016,SEA,1,AL,0.0,0,13.7,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,,,,,,,,511000.0,Y,,4.50306,0.07941,,,,,,,,,0.000,0.000


In [266]:
people.shape

(20543, 30)

Unnamed: 0,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,nameLast_r,nameFirst_r,key_mlbam,retroID_r,key_fangraphs,mlb_played_first,mlb_played_last,name_common,age,mlb_ID,player_ID,year_ID,team_ID,stint_ID,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg
0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,David Aardsma,22.0,430911.0,aardsda01,2004,SFG,1,NL,0.0,11,10.7,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,-0.01,0.00,-0.01,0.00,0.00,300000.0,Y,4.67092,4.67092,0.08651,4.67092,1.890,1.890,0.5000,0.5000,0.5000,0.5000,,0.000,0.000
1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,David Aardsma,24.0,430911.0,aardsda01,2006,CHC,1,NL,3.0,43,53.0,-0.90,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.46,0.00,-0.4,-0.4,-0.4,0.0,-0.04,-0.04,-0.01,-0.04,-0.01,-0.04,0.00,,Y,4.85675,4.86675,0.09085,4.86457,1.912,1.913,0.4990,0.4990,0.5000,0.4998,-100.000000,0.694,0.896
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,David Aardsma,25.0,430911.0,aardsda01,2007,CHW,1,AL,0.0,2,32.3,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,387500.0,Y,4.85895,4.85895,0.08422,4.85895,1.912,1.912,0.5000,0.5000,0.5000,0.5000,,0.000,0.000
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,David Aardsma,26.0,430911.0,aardsda01,2008,BOS,1,AL,1.0,5,48.7,-0.29,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.14,0.00,-0.2,-0.2,-0.2,0.0,-0.02,-0.02,0.00,-0.02,0.00,-0.02,0.00,403250.0,Y,4.67400,4.70400,0.08092,4.69650,1.893,1.894,0.4970,0.4970,0.5000,0.4992,-100.000000,0.345,0.434
4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,David Aardsma,27.0,430911.0,aardsda01,2009,SEA,1,AL,0.0,3,71.3,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,419000.0,Y,4.79788,4.79788,0.08302,4.79788,1.905,1.905,0.5000,0.5000,0.5000,0.5000,,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119718,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Dutch Zwilling,26.0,124791.0,zwilldu01,1915,CHI,1,FL,636.0,150,,28.08,-0.52,0.0,4.0,,,,,4.0,-3.95,0.00,10.44,38.1,27.6,23.6,0.1,3.17,2.65,0.06,4.14,0.06,3.62,0.97,,N,4.02777,3.87037,0.08398,3.80077,1.802,1.787,0.5210,0.5180,0.5000,0.4919,142.103386,199.044,188.238
119719,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Dutch Zwilling,27.0,124791.0,zwilldu01,1916,CHC,1,NL,59.0,35,,-5.43,0.07,0.0,-1.0,,,,,-1.0,-0.34,0.00,1.82,-4.9,-6.7,-5.7,-1.3,-0.86,-0.73,-0.17,-0.67,-0.17,-0.54,0.19,3250.0,N,3.26340,3.42625,0.06014,3.37428,1.719,1.727,0.4755,0.4791,0.4952,0.4934,7.343668,18.514,18.751
119720,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Tony Zych,24.0,543964.0,zychto01,2015,SEA,1,AL,0.0,0,18.3,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,,,,,,,,,Y,,4.30400,0.07510,,,,,,,,,0.000,0.000
119721,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Tony Zych,25.0,543964.0,zychto01,2016,SEA,1,AL,0.0,0,13.7,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,,,,,,,,511000.0,Y,,4.50306,0.07941,,,,,,,,,0.000,0.000


In [264]:
df

Unnamed: 0,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,nameLast_r,nameFirst_r,key_mlbam,retroID_r,key_fangraphs,mlb_played_first,mlb_played_last,name_common,age,mlb_ID,player_ID,year_ID,team_ID,stint_ID,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg
0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,David Aardsma,22.0,430911.0,aardsda01,2004,SFG,1,NL,0.0,11,10.7,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,-0.01,0.00,-0.01,0.00,0.00,300000.0,Y,4.67092,4.67092,0.08651,4.67092,1.890,1.890,0.5000,0.5000,0.5000,0.5000,,0.000,0.000
1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,David Aardsma,24.0,430911.0,aardsda01,2006,CHC,1,NL,3.0,43,53.0,-0.90,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.46,0.00,-0.4,-0.4,-0.4,0.0,-0.04,-0.04,-0.01,-0.04,-0.01,-0.04,0.00,,Y,4.85675,4.86675,0.09085,4.86457,1.912,1.913,0.4990,0.4990,0.5000,0.4998,-100.000000,0.694,0.896
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,David Aardsma,25.0,430911.0,aardsda01,2007,CHW,1,AL,0.0,2,32.3,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,387500.0,Y,4.85895,4.85895,0.08422,4.85895,1.912,1.912,0.5000,0.5000,0.5000,0.5000,,0.000,0.000
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,David Aardsma,26.0,430911.0,aardsda01,2008,BOS,1,AL,1.0,5,48.7,-0.29,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.14,0.00,-0.2,-0.2,-0.2,0.0,-0.02,-0.02,0.00,-0.02,0.00,-0.02,0.00,403250.0,Y,4.67400,4.70400,0.08092,4.69650,1.893,1.894,0.4970,0.4970,0.5000,0.4992,-100.000000,0.345,0.434
4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,David Aardsma,27.0,430911.0,aardsda01,2009,SEA,1,AL,0.0,3,71.3,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,419000.0,Y,4.79788,4.79788,0.08302,4.79788,1.905,1.905,0.5000,0.5000,0.5000,0.5000,,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119718,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Dutch Zwilling,26.0,124791.0,zwilldu01,1915,CHI,1,FL,636.0,150,,28.08,-0.52,0.0,4.0,,,,,4.0,-3.95,0.00,10.44,38.1,27.6,23.6,0.1,3.17,2.65,0.06,4.14,0.06,3.62,0.97,,N,4.02777,3.87037,0.08398,3.80077,1.802,1.787,0.5210,0.5180,0.5000,0.4919,142.103386,199.044,188.238
119719,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Dutch Zwilling,27.0,124791.0,zwilldu01,1916,CHC,1,NL,59.0,35,,-5.43,0.07,0.0,-1.0,,,,,-1.0,-0.34,0.00,1.82,-4.9,-6.7,-5.7,-1.3,-0.86,-0.73,-0.17,-0.67,-0.17,-0.54,0.19,3250.0,N,3.26340,3.42625,0.06014,3.37428,1.719,1.727,0.4755,0.4791,0.4952,0.4934,7.343668,18.514,18.751
119720,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Tony Zych,24.0,543964.0,zychto01,2015,SEA,1,AL,0.0,0,18.3,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,,,,,,,,,Y,,4.30400,0.07510,,,,,,,,,0.000,0.000
119721,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Tony Zych,25.0,543964.0,zychto01,2016,SEA,1,AL,0.0,0,13.7,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,,,,,,,,511000.0,Y,,4.50306,0.07941,,,,,,,,,0.000,0.000


In [140]:
bwar_bat.rename(columns={'player_ID': 'playerID'})
bwar_bat.set_index('playerID')

KeyError: "None of ['playerID'] are in the columns"

In [54]:
df.sort_values('BMI').head(25)

Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,name_common_x,age_x,mlb_ID_x,player_ID_x,year_ID_x,team_ID_x,stint_ID_x,lg_ID_x,G_x,GS,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_x,runs_above_avg_adj,runs_above_rep_x,RpO_replacement,GR_leverage_index_avg,WAR_x,salary_x,teamRpG_x,oppRpG_x,pyth_exponent_x,waa_win_perc_x,WAA_x,WAA_adj,oppRpG_rep_x,pyth_exponent_rep_x,waa_win_perc_rep_x,WAR_rep_x,ERA_plus,ER_lg,name_common_y,age_y,mlb_ID_y,player_ID_y,year_ID_y,team_ID_y,stint_ID_y,lg_ID_y,PA,G_y,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep_y,runs_above_avg_y,runs_above_avg_off,runs_above_avg_def,WAA_y,WAA_off,WAA_def,WAR_y,WAR_def,WAR_off,WAR_rep_y,salary_y,pitcher,teamRpG_y,oppRpG_y,oppRpPA_rep,oppRpG_rep_y,pyth_exponent_y,pyth_exponent_rep_y,waa_win_perc_y,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep_y,OPS_plus,TOB_lg,TB_lg,KG,meters,BMI
107848,Cummings,Candy,112939,cummc101,cummica01,1002872,1872.0,1877.0,cummica01,1848.0,10.0,18.0,USA,MA,Ware,1924.0,5.0,16.0,USA,OH,Toledo,Candy,Cummings,William Arthur,120.0,69.0,R,R,1872-04-22,1877-08-18,cummc101,cummica01,Candy Cummings,26.0,112939.0,cummica01,1875.0,HAR,1.0,,48.0,47.0,1248.0,,,184.0,281.387,0.0,,18.544,105.0,,275.985,1610.0,0.5454,34.0,91.985,96.854,142.063,0.255,1.0,12.33,,5.7722,3.75441,1.901,0.6937,9.2976,-0.7948,6.82312,2.059,0.4147,3.8296,146.216216,108.2,Candy Cummings,27.0,112939.0,cummica01,1876.0,HAR,1.0,NL,105.0,24.0,,-10.54,0.0,0.0,0.0,,,,,0.0,0.01,1.76,0.0,-8.8,-8.8,-8.8,0.0,-0.82,-0.79,-0.01,-0.82,-0.01,-0.79,0.0,,Y,5.31506,5.68047,0.11716,5.53098,1.98,1.991,0.4671,0.4671,0.5,0.4867,12.969261,30.334,35.133,54.43104,1.7526,17.720706
107827,Cummings,Candy,112939,cummc101,cummica01,1002872,1872.0,1877.0,cummica01,1848.0,10.0,18.0,USA,MA,Ware,1924.0,5.0,16.0,USA,OH,Toledo,Candy,Cummings,William Arthur,120.0,69.0,R,R,1872-04-22,1877-08-18,cummc101,cummica01,Candy Cummings,23.0,112939.0,cummica01,1872.0,NYU,1.0,,55.0,55.0,1491.0,,,347.0,510.712,0.0,,34.844,89.0,,423.523,2173.0,0.9679,36.0,76.523,79.557,170.833,0.402,1.0,8.48,,9.09893,7.65243,2.233,0.5955,5.2525,-1.2312,10.82713,2.346,0.3994,4.4601,108.358434,179.875,Candy Cummings,24.0,112939.0,cummica01,1873.0,BAL,1.0,,197.0,42.0,,-10.76,0.54,0.0,0.0,,,,,0.0,-0.01,12.79,0.0,2.6,2.6,2.6,0.0,0.14,0.16,-0.01,0.14,-0.01,0.16,0.0,1800.0,Y,8.86156,8.8006,0.18628,8.64033,2.267,2.259,0.5039,0.5039,0.5,0.4896,60.823375,61.444,71.078,54.43104,1.7526,17.720706
107828,Cummings,Candy,112939,cummc101,cummica01,1002872,1872.0,1877.0,cummica01,1848.0,10.0,18.0,USA,MA,Ware,1924.0,5.0,16.0,USA,OH,Toledo,Candy,Cummings,William Arthur,120.0,69.0,R,R,1872-04-22,1877-08-18,cummc101,cummica01,Candy Cummings,23.0,112939.0,cummica01,1872.0,NYU,1.0,,55.0,55.0,1491.0,,,347.0,510.712,0.0,,34.844,89.0,,423.523,2173.0,0.9679,36.0,76.523,79.557,170.833,0.402,1.0,8.48,,9.09893,7.65243,2.233,0.5955,5.2525,-1.2312,10.82713,2.346,0.3994,4.4601,108.358434,179.875,Candy Cummings,25.0,112939.0,cummica01,1874.0,PHI,1.0,,231.0,54.0,,-14.92,-0.29,0.0,0.0,,,,,0.0,-0.06,10.57,0.0,-4.7,-4.7,-4.7,-0.1,-0.38,-0.35,-0.02,-0.38,-0.02,-0.35,0.0,,Y,7.23084,7.31788,0.15575,7.1717,2.145,2.142,0.4936,0.4936,0.4999,0.4892,53.654794,66.69,79.279,54.43104,1.7526,17.720706
107829,Cummings,Candy,112939,cummc101,cummica01,1002872,1872.0,1877.0,cummica01,1848.0,10.0,18.0,USA,MA,Ware,1924.0,5.0,16.0,USA,OH,Toledo,Candy,Cummings,William Arthur,120.0,69.0,R,R,1872-04-22,1877-08-18,cummc101,cummica01,Candy Cummings,23.0,112939.0,cummica01,1872.0,NYU,1.0,,55.0,55.0,1491.0,,,347.0,510.712,0.0,,34.844,89.0,,423.523,2173.0,0.9679,36.0,76.523,79.557,170.833,0.402,1.0,8.48,,9.09893,7.65243,2.233,0.5955,5.2525,-1.2312,10.82713,2.346,0.3994,4.4601,108.358434,179.875,Candy Cummings,26.0,112939.0,cummica01,1875.0,HAR,1.0,,224.0,53.0,,-12.34,0.46,0.0,-1.0,,,,,-1.0,-0.32,6.0,0.72,-6.5,-7.2,-6.2,-1.3,-0.67,-0.53,-0.14,-0.61,-0.14,-0.47,0.06,,Y,5.65522,5.7722,0.12232,5.62779,2.002,2.001,0.4881,0.4898,0.4978,0.4873,53.700935,60.995,71.759,54.43104,1.7526,17.720706
107830,Cummings,Candy,112939,cummc101,cummica01,1002872,1872.0,1877.0,cummica01,1848.0,10.0,18.0,USA,MA,Ware,1924.0,5.0,16.0,USA,OH,Toledo,Candy,Cummings,William Arthur,120.0,69.0,R,R,1872-04-22,1877-08-18,cummc101,cummica01,Candy Cummings,23.0,112939.0,cummica01,1872.0,NYU,1.0,,55.0,55.0,1491.0,,,347.0,510.712,0.0,,34.844,89.0,,423.523,2173.0,0.9679,36.0,76.523,79.557,170.833,0.402,1.0,8.48,,9.09893,7.65243,2.233,0.5955,5.2525,-1.2312,10.82713,2.346,0.3994,4.4601,108.358434,179.875,Candy Cummings,27.0,112939.0,cummica01,1876.0,HAR,1.0,NL,105.0,24.0,,-10.54,0.0,0.0,0.0,,,,,0.0,0.01,1.76,0.0,-8.8,-8.8,-8.8,0.0,-0.82,-0.79,-0.01,-0.82,-0.01,-0.79,0.0,,Y,5.31506,5.68047,0.11716,5.53098,1.98,1.991,0.4671,0.4671,0.5,0.4867,12.969261,30.334,35.133,54.43104,1.7526,17.720706
107831,Cummings,Candy,112939,cummc101,cummica01,1002872,1872.0,1877.0,cummica01,1848.0,10.0,18.0,USA,MA,Ware,1924.0,5.0,16.0,USA,OH,Toledo,Candy,Cummings,William Arthur,120.0,69.0,R,R,1872-04-22,1877-08-18,cummc101,cummica01,Candy Cummings,23.0,112939.0,cummica01,1872.0,NYU,1.0,,55.0,55.0,1491.0,,,347.0,510.712,0.0,,34.844,89.0,,423.523,2173.0,0.9679,36.0,76.523,79.557,170.833,0.402,1.0,8.48,,9.09893,7.65243,2.233,0.5955,5.2525,-1.2312,10.82713,2.346,0.3994,4.4601,108.358434,179.875,Candy Cummings,28.0,112939.0,cummica01,1877.0,CIN,1.0,NL,74.0,19.0,,-1.86,0.0,0.0,0.0,,,,,0.0,-0.19,3.42,0.0,1.4,1.4,1.4,-0.2,0.12,0.13,-0.02,0.12,-0.02,0.13,0.0,,Y,5.40745,5.33535,0.10709,5.20226,1.967,1.957,0.5066,0.5066,0.4991,0.4876,70.934129,20.609,22.729,54.43104,1.7526,17.720706
107832,Cummings,Candy,112939,cummc101,cummica01,1002872,1872.0,1877.0,cummica01,1848.0,10.0,18.0,USA,MA,Ware,1924.0,5.0,16.0,USA,OH,Toledo,Candy,Cummings,William Arthur,120.0,69.0,R,R,1872-04-22,1877-08-18,cummc101,cummica01,Candy Cummings,24.0,112939.0,cummica01,1873.0,BAL,1.0,,42.0,42.0,1146.0,,,292.0,365.448,0.0,,11.594,102.0,,360.931,1697.0,0.7246,16.0,68.931,72.017,139.042,0.388,1.0,7.79,1800.0,8.8006,7.08591,2.199,0.6169,4.9098,-0.6558,10.46067,2.323,0.401,3.5397,123.727731,147.236,Candy Cummings,23.0,112939.0,cummica01,1872.0,NYU,1.0,,253.0,55.0,,-14.77,-0.08,0.0,0.0,,,,,0.0,-0.15,7.54,0.0,-7.5,-7.5,-7.5,-0.2,-0.51,-0.47,-0.02,-0.51,-0.02,-0.47,0.0,,Y,8.96329,9.09893,0.19447,8.94174,2.281,2.281,0.4914,0.4914,0.4998,0.4901,56.033704,73.117,84.336,54.43104,1.7526,17.720706
107833,Cummings,Candy,112939,cummc101,cummica01,1002872,1872.0,1877.0,cummica01,1848.0,10.0,18.0,USA,MA,Ware,1924.0,5.0,16.0,USA,OH,Toledo,Candy,Cummings,William Arthur,120.0,69.0,R,R,1872-04-22,1877-08-18,cummc101,cummica01,Candy Cummings,24.0,112939.0,cummica01,1873.0,BAL,1.0,,42.0,42.0,1146.0,,,292.0,365.448,0.0,,11.594,102.0,,360.931,1697.0,0.7246,16.0,68.931,72.017,139.042,0.388,1.0,7.79,1800.0,8.8006,7.08591,2.199,0.6169,4.9098,-0.6558,10.46067,2.323,0.401,3.5397,123.727731,147.236,Candy Cummings,24.0,112939.0,cummica01,1873.0,BAL,1.0,,197.0,42.0,,-10.76,0.54,0.0,0.0,,,,,0.0,-0.01,12.79,0.0,2.6,2.6,2.6,0.0,0.14,0.16,-0.01,0.14,-0.01,0.16,0.0,1800.0,Y,8.86156,8.8006,0.18628,8.64033,2.267,2.259,0.5039,0.5039,0.5,0.4896,60.823375,61.444,71.078,54.43104,1.7526,17.720706
107834,Cummings,Candy,112939,cummc101,cummica01,1002872,1872.0,1877.0,cummica01,1848.0,10.0,18.0,USA,MA,Ware,1924.0,5.0,16.0,USA,OH,Toledo,Candy,Cummings,William Arthur,120.0,69.0,R,R,1872-04-22,1877-08-18,cummc101,cummica01,Candy Cummings,24.0,112939.0,cummica01,1873.0,BAL,1.0,,42.0,42.0,1146.0,,,292.0,365.448,0.0,,11.594,102.0,,360.931,1697.0,0.7246,16.0,68.931,72.017,139.042,0.388,1.0,7.79,1800.0,8.8006,7.08591,2.199,0.6169,4.9098,-0.6558,10.46067,2.323,0.401,3.5397,123.727731,147.236,Candy Cummings,25.0,112939.0,cummica01,1874.0,PHI,1.0,,231.0,54.0,,-14.92,-0.29,0.0,0.0,,,,,0.0,-0.06,10.57,0.0,-4.7,-4.7,-4.7,-0.1,-0.38,-0.35,-0.02,-0.38,-0.02,-0.35,0.0,,Y,7.23084,7.31788,0.15575,7.1717,2.145,2.142,0.4936,0.4936,0.4999,0.4892,53.654794,66.69,79.279,54.43104,1.7526,17.720706
107835,Cummings,Candy,112939,cummc101,cummica01,1002872,1872.0,1877.0,cummica01,1848.0,10.0,18.0,USA,MA,Ware,1924.0,5.0,16.0,USA,OH,Toledo,Candy,Cummings,William Arthur,120.0,69.0,R,R,1872-04-22,1877-08-18,cummc101,cummica01,Candy Cummings,24.0,112939.0,cummica01,1873.0,BAL,1.0,,42.0,42.0,1146.0,,,292.0,365.448,0.0,,11.594,102.0,,360.931,1697.0,0.7246,16.0,68.931,72.017,139.042,0.388,1.0,7.79,1800.0,8.8006,7.08591,2.199,0.6169,4.9098,-0.6558,10.46067,2.323,0.401,3.5397,123.727731,147.236,Candy Cummings,26.0,112939.0,cummica01,1875.0,HAR,1.0,,224.0,53.0,,-12.34,0.46,0.0,-1.0,,,,,-1.0,-0.32,6.0,0.72,-6.5,-7.2,-6.2,-1.3,-0.67,-0.53,-0.14,-0.61,-0.14,-0.47,0.06,,Y,5.65522,5.7722,0.12232,5.62779,2.002,2.001,0.4881,0.4898,0.4978,0.4873,53.700935,60.995,71.759,54.43104,1.7526,17.720706


In [97]:
df = df[df['year_ID_y'] >= 1921]
# Throw out dead ball era

 7.48       1
-2.80       1
 7.66       1
-3.01       2
 8.90       2
         ... 
 0.02    2909
 0.04    3074
 0.01    3105
-0.02    3118
 0.03    3339
Name: WAR_x, Length: 1168, dtype: int64

### Set constants we'll need

### Squish everything into one mondo DF

In [121]:
df_pitchers = pd.concat([df_people, df_pitching], join='inner', axis=1)

In [129]:
frames = [df_batters, df_pitchers]

In [130]:
for player in frames:
    player['KG'] = player['weight'] * KG_TO_LB
    player['meters'] = player['height'] * M_TO_IN
    player['BMI'] = player['KG'] / player['meters'] ** 2

In [131]:
df_batters.dtypes

playerID         object
birthYear       float64
birthMonth      float64
birthDay        float64
birthCountry     object
birthState       object
birthCity        object
deathYear       float64
deathMonth      float64
deathDay        float64
deathCountry     object
deathState       object
deathCity        object
nameFirst        object
nameLast         object
nameGiven        object
weight          float64
height          float64
bats             object
throws           object
debut            object
finalGame        object
retroID          object
bbrefID          object
playerID         object
yearID            int64
stint             int64
teamID           object
lgID             object
G                 int64
AB                int64
R                 int64
H                 int64
2B                int64
3B                int64
HR                int64
RBI             float64
SB              float64
CS              float64
BB                int64
SO              float64
IBB             

In [132]:
df_huskiesBatters = df_batters.loc[df_batters.BMI >= 34.55]

In [133]:
df_huskiesBatters.sort_values('BMI').describe()

Unnamed: 0,birthYear,birthMonth,birthDay,deathYear,deathMonth,deathDay,weight,height,yearID,stint,...,BB,SO,IBB,HBP,SH,SF,GIDP,KG,meters,BMI
count,26.0,25.0,25.0,5.0,5.0,5.0,26.0,26.0,26.0,26.0,...,26.0,22.0,0.0,24.0,17.0,0.0,1.0,26.0,26.0,26.0
mean,1969.153846,5.8,17.52,1952.4,7.6,8.6,269.961538,72.115385,1901.730769,1.115385,...,9.884615,14.909091,,0.875,6.0,,0.0,122.452394,1.831731,36.408324
std,40.878055,3.316625,8.529947,49.45503,3.646917,7.602631,30.1801,4.348121,14.17761,0.325813,...,16.310308,13.606403,,1.650099,8.951257,,,13.689452,0.110442,1.728873
min,1853.0,1.0,1.0,1891.0,2.0,2.0,155.0,55.0,1872.0,1.0,...,0.0,0.0,,0.0,0.0,,0.0,70.30676,1.397,34.622243
25%,1977.0,4.0,11.0,1915.0,6.0,2.0,261.25,71.0,1890.25,1.0,...,1.0,3.25,,0.0,0.0,,0.0,118.50091,1.8034,34.891189
50%,1983.5,6.0,18.0,1966.0,9.0,6.0,270.0,72.0,1903.5,1.0,...,3.5,11.0,,0.0,2.0,,0.0,122.46984,1.8288,35.875979
75%,1989.0,8.0,24.0,1975.0,10.0,14.0,283.75,75.0,1912.0,1.0,...,9.25,25.25,,1.0,7.0,,0.0,128.70673,1.905,37.928366
max,1998.0,12.0,30.0,2015.0,11.0,19.0,320.0,78.0,1924.0,2.0,...,68.0,43.0,,6.0,33.0,,0.0,145.14944,1.9812,40.292666


In [134]:
df_huskiesPitchers = df[df_pitchers.BMI > 34.55]

NameError: name 'df' is not defined

In [None]:
df_huskiesPitchers.head()

In [None]:
df_huskies = pd.merge(df_huskiesBatters, df_huskiesPitchers, how='right', on='playerID')

In [None]:
df_huskies.columns

In [None]:
sns.jointplot(data=df_simple, x="height", y="weight", kind = "reg", truncate = False)

In [None]:
sns.choose_diverging_palette()

In [None]:

# Compute the correlation matrix
corr = df_huskiesBatters.corr(method="spearman")

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(290, 10, n=40, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(
    corr,
    mask=mask,
    cmap=cmap,
    vmax=1,
    center=0,
    square=True,
    linewidths=0.25,
    cbar_kws={"shrink": .5},
)



In [None]:
corr_mat = df.corr().stack().reset_index(name="correlation")