In [72]:
import numpy as np
import pandas as pd
import pybaseball
import seaborn as sns
import matplotlib.pyplot as plot
from deepdiff import DeepDiff
from pybaseball import bwar_pitch
from pybaseball import bwar_bat
from pybaseball import cache
from pybaseball.lahman import *
from pybaseball import chadwick_register


In [73]:
pd.set_option("display.max_columns", 1000)
pd.set_option("display.max_rows", 100)

In [74]:
# Constants
KG_TO_LB = 0.453592
M_TO_IN = 0.0254


In [75]:
chadwick = chadwick_register(save=True)
chadwick = pd.DataFrame(chadwick)
download_lahman()
### Read infinity baseball data to DataFrames
# a table of all player biographical info and ids
people = pd.DataFrame(people())

# park id, name, alias, city, state, and country
parks = pd.DataFrame(parks())

# all star roster data: player, year, team, league, position
allstar = pd.DataFrame(all_star_full())

# each player's games played per position for each season
appearances = pd.DataFrame(appearances())

# batting stats by year, regular season
batting = pd.DataFrame(batting())

# batting stats by year, post season
batting_post = pd.DataFrame(batting_post())

# fielding stats by year
fielding = pd.DataFrame(fielding())

# games played in left, center, right field
fielding_of = pd.DataFrame(fielding_of())

# LF/CF/RF splits
fielding_of_split = pd.DataFrame(fielding_of_split())

# postseason fielding
fielding_post = pd.DataFrame(fielding_post())

# home game attendance by park by year
home_games = pd.DataFrame(home_games())

# historical player pitching stats
pitching = pd.DataFrame(pitching())

# postseason pitching stats
pitching_post = pd.DataFrame(pitching_post())

# playoff series winners and losers
series_post = pd.DataFrame(series_post())

# data on teams by year: record, division, stadium, attendance, etc
teams = pd.DataFrame(teams())

# current and historical franchises, whether they're still active, and their ids
teams_franchises = pd.DataFrame(teams_franchises())

# split season data for teams
teams_half = pd.DataFrame(teams_half())

# fangraphs batting since 2008
fangraphs_batting = pd.DataFrame(
    pybaseball.batting_stats_range(start_dt="2008-01-01", end_dt="2021-12-31")
)

# fangraphs pitching since 2008
fangraphs_pitching = pd.DataFrame(
    pybaseball.pitching_stats_range(start_dt="2008-01-01", end_dt="2021-12-31")
)

# fangraphs team pitching since 2008
fangraphs_team_pitching = pd.DataFrame(
    pybaseball.team_pitching(start_season="2008", end_season="2021")
)

# fangraphs team batting since 2008
fangraphs_team_batting = pd.DataFrame(
    pybaseball.team_batting(start_season="2008", end_season="2021")
)

# fangraphs team fielding since 2008
fangraphs_team_fielding = pd.DataFrame(
    pybaseball.team_fielding(start_season="2008", end_season="2021")
)

# bref pitching WAR
bwar_pitch = pd.DataFrame(bwar_pitch(return_all=True))

# bref batting WAR
bwar_bat = pd.DataFrame(bwar_bat(return_all=True))

  table = table.drop('', 1)


In [76]:
### Add BMI Calcs
# BMI Calculations
people["KG"] = people["weight"] * KG_TO_LB
people["meters"] = people["height"] * M_TO_IN
people["BMI"] = people["KG"] / people["meters"] ** 2
people["ratio"] = people["meters"] * people["BMI"]


## Assemble Monster DataFrame of Everything About Player Ever


In [77]:
# Let's change some of these column names to save ourselves some merging hassle
chadwick = chadwick.rename(
    columns={
        "name_last": "nameLast",
        "name_first": "nameFirst",
        "key_fangraphs": "fangraphsID",
        "key_bbref": "playerID",
        "key_retro": "retroID",
        "key_mlbam": "mlbID",
    }
)

# Merge chadwick into people
df = people.merge(
    chadwick,
    left_on=["playerID", "retroID", "nameLast", "nameFirst"],
    right_on=["playerID", "retroID", "nameLast", "nameFirst"],
    how="outer",
)

In [78]:
### Add BWAR Stats
#### Add BWAR Batting
##### BWAR Batting Prep
bwar_bat.columns
bwar_bat = bwar_bat.rename(columns={
    'player_ID': 'playerID',
    'mlb_ID': 'mlbID',
    'stint_ID': 'stint'
})

##### BWAR Batting Merge
df = df.merge(
    bwar_bat, left_on=["playerID", "mlbID"], right_on=["playerID", "mlbID"], how="outer"
)


In [79]:
#### Add BWAR Pitching
##### BWAR Pitching Prep

# merge bwar_pitch, check
bwar_pitch = bwar_pitch.rename(
    columns={"stint_ID": "stint", "mlb_ID": "mlbID", "player_ID": "playerID"}
)

##### BWAR Pitching Merge
print(df.columns.tolist())
df = df.merge(
    bwar_pitch,
    left_on=[
        "G",
        "WAA",
        "WAR",
        "WAR_rep",
        "age",
        "lg_ID",
        "mlbID",
        "oppRpG",
        "oppRpG_rep",
        "playerID",
        "pyth_exponent",
        "pyth_exponent_rep",
        "runs_above_avg",
        "runs_above_rep",
        "salary",
        "stint",
        "teamRpG",
        "team_ID",
        "waa_win_perc",
        "waa_win_perc_rep",
        "year_ID",
    ],
    right_on=[
        "G",
        "WAA",
        "WAR",
        "WAR_rep",
        "age",
        "lg_ID",
        "mlbID",
        "oppRpG",
        "oppRpG_rep",
        "playerID",
        "pyth_exponent",
        "pyth_exponent_rep",
        "runs_above_avg",
        "runs_above_rep",
        "salary",
        "stint",
        "teamRpG",
        "team_ID",
        "waa_win_perc",
        "waa_win_perc_rep",
        "year_ID",
    ],
    how="outer",
)

['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry', 'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay', 'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast', 'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame', 'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'mlbID', 'fangraphsID', 'mlb_played_first', 'mlb_played_last', 'name_common', 'age', 'year_ID', 'team_ID', 'stint', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp', 'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher', 'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p', 'runs_replacement', 'runs_above_rep', 'runs_above_avg', 'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def', 'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG', 'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent', 'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off', 'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_p

In [80]:
bwar_pitch.sample(10)

Unnamed: 0,name_common,age,mlbID,playerID,year_ID,team_ID,stint,lg_ID,G,GS,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg,runs_above_avg_adj,runs_above_rep,RpO_replacement,GR_leverage_index_avg,WAR,salary,teamRpG,oppRpG,pyth_exponent,waa_win_perc,WAA,WAA_adj,oppRpG_rep,pyth_exponent_rep,waa_win_perc_rep,WAR_rep,ERA_plus,ER_lg
23707,Syl Johnson,34.0,116630.0,johnssy01,1935,PHI,1,NL,37,18,524,436.0,88.0,79,93.51,0.0,,-7.113,113,113.054,113.758,602.0,0.1166,-61.0,34.758,33.535,50.201,0.206,1.9368,5.2,,4.74704,3.84069,1.846,0.5966,3.5742,0.1406,5.16525,1.923,0.4595,1.4824,127.972464,88.301
36920,Oliver Perez,22.0,424144.0,perezol01,2004,PIT,1,NL,30,30,588,588.0,0.0,71,104.221,4.044,,-2.275,99,99.327,109.796,476.0,0.1063,-21.4,38.796,38.071,56.051,0.203,1.0,5.72,321000.0,4.67092,3.40188,1.813,0.6399,4.197,-0.1356,5.24637,1.923,0.4444,1.6604,143.358462,93.183
33176,Yellow Horse Morris,27.0,,morriye01,1929,CAG,0,NNL,14,12,328,,,31,66.65,0.0,,,100,,66.65,,,,35.65,37.701,47.668,0.233,1.0,4.86,,5.27052,2.57759,1.799,0.7836,3.9704,-0.16,6.13901,2.001,0.4243,1.0487,217.3375,52.161
7569,Spoon Carter,33.0,,cartesp01,1936,PC,0,NN2,9,3,105,,,30,22.715,0.0,,0.111,100,,22.604,117.0,0.0618,1.8,-7.396,-6.878,-3.042,0.263,1.0,-0.26,,5.96508,6.7293,2.063,0.4381,-0.5571,-0.0528,6.44632,2.05,0.4603,0.3476,70.07037,18.919
39549,Alex Reyes,26.0,621052.0,reyesal02,2021,STL,1,NL,69,0,217,0.0,217.0,32,36.487,-2.379,4.352,2.93,92,92.233,32.77,168.0,0.0407,72.0,0.77,0.947,7.667,0.202,1.5593,0.64,900000.0,4.57192,4.5582,1.878,0.5014,0.0966,-0.1731,4.67269,1.885,0.4897,0.7123,121.553846,31.604
34107,Jeff Nelson,39.0,119704.0,nelsoje01,2006,CHW,1,AL,6,0,8,0.0,8.0,1,1.892,-0.141,,-0.057,104,101.067,1.827,8.0,0.0017,-33.7,0.827,0.805,1.13,0.221,1.12,0.11,,4.92028,4.78611,1.911,0.5132,0.0792,-0.0022,4.97107,1.922,0.4951,0.0286,157.6,1.576
1801,Homer Bailey,32.0,456701.0,baileho02,2018,CIN,1,NL,20,20,319,319.0,0.0,82,53.509,2.349,,-0.952,103,101.684,57.767,384.0,0.0865,-11.0,-24.233,-22.838,-15.158,0.189,1.0,-1.35,21000000.0,4.32337,5.46527,1.916,0.3896,-2.208,-0.083,4.77443,1.876,0.4536,0.9373,68.348611,49.211
41064,Randy Rosario,25.0,600968.0,rosarra01,2019,KCR,2,AL,6,0,11,0.0,11.0,1,2.348,-0.153,,-0.03,103,102.429,2.279,11.0,0.0025,-12.0,1.279,1.281,1.678,0.221,1.3433,0.18,,4.95606,4.74256,1.911,0.521,0.126,0.0126,5.02345,1.926,0.4935,0.0373,,2.107
39199,Addison Reed,25.0,592665.0,reedad01,2014,ARI,1,NL,62,0,178,0.0,178.0,31,26.726,-2.015,,0.728,102,102.254,24.524,167.0,0.0383,19.0,-6.476,-6.181,-1.802,0.175,1.6377,-0.51,538500.0,3.99035,4.09004,1.814,0.4888,-0.6944,-0.3619,4.06689,1.812,0.4914,0.5423,88.571429,24.8
29955,Brandon Maurer,23.0,543506.0,maurebr01,2014,SEA,1,AL,38,7,209,97.0,112.0,39,32.363,-0.721,,1.096,95,94.073,28.736,227.0,0.0548,20.0,-10.264,-11.115,-3.939,0.184,0.7919,-0.55,504600.0,4.13265,4.42515,1.844,0.4685,-1.197,-0.0526,4.29968,1.836,0.4818,0.7021,78.491667,28.257


In [81]:
#### Add Fielding_OF
##### Fielding_OF Prep
fielding_of.columns
fielding_of = fielding_of.rename(columns={"yearID": "year_ID"})
##### Fielding_OF Merge
df = df.merge(
    fielding_of,
    left_on=["playerID", "year_ID", "stint"],
    right_on=["playerID", "year_ID", "stint"],
    how="left",
)

In [82]:
#### Add Fielding
##### Fielding Check
fielding.columns
fielding = fielding.rename(columns={"yearID": "year_ID"})
##### Fielding Merge
df = df.merge(
    fielding,
    left_on=["playerID", "year_ID", "G", "stint"],
    right_on=["playerID", "year_ID", "G", "stint"],
    how="left",
)

In [34]:
### Meet the Monster
df.sample(25)  # works to here

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
54319,koplomi01,1976.0,8.0,30.0,USA,PA,Philadelphia,,,,,,,Mike,Koplove,Michael Paul,165.0,72.0,R,R,2001-09-06,2007-09-26,koplm001,koplomi01,74.84268,1.8288,22.377775,40.924475,407377.0,63.0,2001.0,2007.0,Mike Koplove,24.0,2001.0,ARI,1.0,NL,1.0,9.0,10.0,-0.3,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.14,0.0,-0.2,-0.2,-0.2,0.0,-0.02,-0.02,-0.01,-0.02,-0.01,-0.02,0.0,,Y,4.71582,4.73359,0.08856,4.73013,1.897,1.897,0.4982,0.4982,0.5,0.4997,-100.0,0.35,0.455,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,NL,P,0.0,30.0,0.0,1.0,0.0,0.0,,,,,
154792,mcdanbo01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,28.0,1942.0,KCM,0.0,NAL,,10.0,,,,,,,,,,,,,,14.179,8.823,,,1.369,,,1.7,,,0.4758,,,4.96332,3.66212,,5.50299,1.848,1.953,0.6369,,,0.4498,,,,Booker McDaniel,4.0,177.0,,,18.0,26.823,0.0,,,100.0,,26.823,,,,13.012,0.215,1.0,-0.1415,159.5875,25.534,,,,,,,,,,,,,,,,,
2162,anderjo01,1873.0,12.0,14.0,Norway,,Sarpsborg,1949.0,7.0,23.0,USA,MA,Worcester,John,Anderson,John Joseph,180.0,74.0,B,R,1894-09-08,1908-10-02,andej101,anderjo01,81.64656,1.8796,23.110376,43.438263,110244.0,1000232.0,1894.0,1908.0,John Anderson,28.0,1902.0,SLB,1.0,AL,543.0,126.0,,-2.53,-0.72,0.0,-3.0,,,,,-3.0,-3.65,0.0,19.38,9.5,-9.9,-6.9,-6.7,-1.02,-0.6,-0.64,0.89,-0.64,1.31,1.91,3500.0,N,4.86686,4.92162,0.08903,4.76781,1.916,1.91,0.4923,0.4946,0.4949,0.4848,94.870143,185.443,198.858,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,1.0,SLA,AL,1B,,3351.0,1361.0,47.0,22.0,78.0,,,,,
129256,bresnro01,,,,,,,,,,,,,,,,,,,,,,,,,,,,111433.0,,,,,31.0,1910.0,STL,1.0,NL,,1.0,,,,,,,,,,,,,,0.611,0.36,,,0.0397,,,0.07,,,0.0287,,,4.04737,3.70337,,4.30277,1.793,1.831,0.5397,,,0.472,,,,Roger Bresnahan,0.0,10.0,0.0,10.0,1.0,1.199,0.0,,-0.189,98.0,98.0,1.36,15.0,0.0032,-59.0,0.344,0.176,1.0,-0.0027,,1.268,1.0,1.0,0.0,SLN,NL,P,0.0,9.0,0.0,3.0,0.0,1.0,,,,,
129353,briceau01,,,,,,,,,,,,,,,,,,,,,,,,,,,,592169.0,,,,,27.0,2019.0,MIA,1.0,NL,,36.0,,,,,,,,,,,,,,5.192,1.03,,,0.1692,,,0.47,,,0.4082,555000.0,,4.72014,4.67334,,4.8375,1.893,1.903,0.5047,,,0.4883,,,,Austin Brice,0.0,134.0,0.0,134.0,21.0,24.451,-1.658,,0.184,97.0,97.437,22.03,128.0,0.0307,6.0,1.685,0.207,0.88,-0.103,125.435294,21.324,,,,MIA,NL,P,0.0,134.0,2.0,2.0,0.0,0.0,,,,,
29608,faatzja01,1859.0,10.0,24.0,USA,NY,Weedsport,1923.0,4.0,10.0,USA,NY,Syracuse,Jay,Faatz,Jacob S.,196.0,76.0,R,R,1884-08-22,1890-09-10,faatj101,faatzja01,88.904032,1.9304,23.857605,46.05472,113964.0,1003888.0,1884.0,1890.0,Jay Faatz,30.0,1890.0,BUF,1.0,PL,128.0,32.0,,-6.28,-0.27,0.0,-3.0,,,,,-3.0,-0.03,0.0,3.07,-6.5,-9.6,-6.6,-3.0,-0.71,-0.5,-0.19,-0.42,-0.19,-0.21,0.29,,N,6.80505,7.01068,0.14514,6.91468,2.114,2.118,0.4772,0.4843,0.4929,0.4927,52.151971,44.57,41.858,,,,,,,,,,,,,,,,,,,,,,,,,,BFP,PL,1B,,765.0,312.0,7.0,6.0,18.0,,,,,
39850,hackst01,1909.0,12.0,6.0,USA,CA,Sacramento,1979.0,12.0,15.0,USA,IL,Dixon,Stan,Hack,Stanley Camfield,170.0,72.0,L,R,1932-04-12,1947-09-24,hacks101,hackst01,77.11064,1.8288,23.055889,42.164611,115283.0,1005183.0,1932.0,1947.0,Stan Hack,37.0,1947.0,CHC,1.0,NL,283.0,76.0,,0.73,-2.65,0.16,3.0,,,,,3.0,0.03,0.0,11.13,12.4,1.3,-1.7,3.0,0.11,-0.18,0.34,1.23,0.34,0.94,1.12,,N,4.60888,4.63164,0.07677,4.48519,1.885,1.877,0.5017,0.4977,0.5041,0.4849,93.327589,96.552,95.76,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
87426,saltzja01,1903.0,1.0,23.0,USA,IA,Croton,1978.0,2.0,1.0,USA,IA,Keokuk,Jack,Saltzgaver,Otto Hamlin,165.0,71.0,L,R,1932-04-12,1945-09-30,saltj101,saltzja01,74.84268,1.8034,23.012574,41.500876,121641.0,1011388.0,1932.0,1945.0,Jack Saltzgaver,42.0,1945.0,PIT,1.0,NL,125.0,52.0,,2.16,-0.09,0.2,-1.0,,,,,-1.0,1.46,0.0,3.98,6.7,2.7,3.7,0.5,0.28,0.39,0.07,0.68,0.07,0.79,0.4,,N,4.52368,4.45195,0.08053,4.37544,1.869,1.86,0.5055,0.5075,0.5009,0.4919,114.832433,43.538,44.881,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
37766,graperi01,1958.0,4.0,16.0,USA,IA,Linn Grove,,,,,,,Rick,Grapenthin,Richard Ray,205.0,74.0,R,R,1983-05-03,1985-06-29,grapr001,graperi01,92.98636,1.8796,26.320151,49.471356,115050.0,1004954.0,1983.0,1985.0,Rick Grapenthin,27.0,1985.0,MON,1.0,NL,1.0,5.0,7.0,0.45,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.11,0.0,0.6,0.6,0.6,0.0,0.06,0.06,0.0,0.06,0.0,0.06,0.0,,Y,4.16556,4.05356,0.07022,4.04673,1.823,1.815,0.5124,0.5124,0.5,0.4992,475.378722,0.321,0.379,,,,,,,,,,,,,,,,,,,,,,,,,,MON,NL,P,0.0,21.0,0.0,1.0,0.0,0.0,,,,,
82366,reiniza01,1993.0,1.0,28.0,USA,TX,San Antonio,,,,,,,Zac,Reininger,Zachary Ryan,190.0,75.0,B,R,2017-08-27,2019-09-26,reinz001,reiniza01,86.18248,1.905,23.748109,45.240147,643617.0,15285.0,2017.0,2019.0,Zac Reininger,26.0,2019.0,DET,1.0,AL,0.0,5.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,,Y,4.95606,4.95606,0.09007,4.95606,1.923,1.923,0.5,0.5,0.5,0.5,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,



# OPEN TRYOUTS ARE HERE


In [35]:
df_save = df

In [71]:
df_pitchers

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
0,aardsda01,1981.0,12.0,27.0,USA,CO,Denver,,,,,,,David,Aardsma,David Allan,215.0,75.0,R,R,2004-04-06,2015-08-23,aardd001,aardsda01,97.52228,1.905,26.87286,51.192798,430911.0,1902.0,2004.0,2015.0,David Aardsma,22.0,2004.0,SFG,1.0,NL,0.0,11.0,10.7,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.00,0.00,-0.01,0.00,-0.01,0.00,0.0,300000.0,Y,4.67092,4.67092,0.08651,4.67092,1.890,1.890,0.5000,0.5000,0.5,0.5000,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,SFN,NL,P,0.0,32.0,0.0,0.0,0.0,0.0,,,,,
1,aardsda01,1981.0,12.0,27.0,USA,CO,Denver,,,,,,,David,Aardsma,David Allan,215.0,75.0,R,R,2004-04-06,2015-08-23,aardd001,aardsda01,97.52228,1.905,26.87286,51.192798,430911.0,1902.0,2004.0,2015.0,David Aardsma,24.0,2006.0,CHC,1.0,NL,3.0,43.0,53.0,-0.90,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.46,0.0,-0.4,-0.4,-0.4,0.0,-0.04,-0.04,-0.01,-0.04,-0.01,-0.04,0.0,,Y,4.85675,4.86675,0.09085,4.86457,1.912,1.913,0.4990,0.4990,0.5,0.4998,-100.000000,0.694,0.896,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,aardsda01,1981.0,12.0,27.0,USA,CO,Denver,,,,,,,David,Aardsma,David Allan,215.0,75.0,R,R,2004-04-06,2015-08-23,aardd001,aardsda01,97.52228,1.905,26.87286,51.192798,430911.0,1902.0,2004.0,2015.0,David Aardsma,25.0,2007.0,CHW,1.0,AL,0.0,2.0,32.3,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.0,387500.0,Y,4.85895,4.85895,0.08422,4.85895,1.912,1.912,0.5000,0.5000,0.5,0.5000,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,aardsda01,1981.0,12.0,27.0,USA,CO,Denver,,,,,,,David,Aardsma,David Allan,215.0,75.0,R,R,2004-04-06,2015-08-23,aardd001,aardsda01,97.52228,1.905,26.87286,51.192798,430911.0,1902.0,2004.0,2015.0,David Aardsma,26.0,2008.0,BOS,1.0,AL,1.0,5.0,48.7,-0.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.14,0.0,-0.2,-0.2,-0.2,0.0,-0.02,-0.02,0.00,-0.02,0.00,-0.02,0.0,403250.0,Y,4.67400,4.70400,0.08092,4.69650,1.893,1.894,0.4970,0.4970,0.5,0.4992,-100.000000,0.345,0.434,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,aardsda01,1981.0,12.0,27.0,USA,CO,Denver,,,,,,,David,Aardsma,David Allan,215.0,75.0,R,R,2004-04-06,2015-08-23,aardd001,aardsda01,97.52228,1.905,26.87286,51.192798,430911.0,1902.0,2004.0,2015.0,David Aardsma,27.0,2009.0,SEA,1.0,AL,0.0,3.0,71.3,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.0,419000.0,Y,4.79788,4.79788,0.08302,4.79788,1.905,1.905,0.5000,0.5000,0.5,0.5000,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124080,youngad01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,A.D. Young,,1925.0,ABC,0.0,NNL,1.0,1.0,,-0.29,0.0,0.0,0.0,,,,,0.0,0.00,0.10,0.0,-0.2,-0.2,-0.2,0.0,-0.02,-0.02,0.00,-0.02,0.00,-0.02,0.0,,Y,5.36055,5.55055,0.10647,5.51622,1.976,1.984,0.4828,0.4828,0.5,0.4969,-100.000000,0.342,0.405,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
124096,youngjo03,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,John Young,,1923.0,SLS,0.0,NNL,0.0,1.0,,0.00,0.0,0.0,0.0,,,,,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.0,,Y,5.70953,5.70953,0.10832,5.70953,2.002,2.002,0.5000,0.5000,0.5,0.5000,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
124103,youngma04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Maurice Young,22.0,1927.0,KCM,0.0,NNL,26.0,12.0,,-2.49,0.0,0.0,0.0,,,,,0.0,0.00,2.57,0.0,0.1,0.1,0.1,0.0,0.01,0.01,0.00,0.01,0.00,0.01,0.0,,Y,4.92775,4.92109,0.09081,4.84705,1.919,1.915,0.5006,0.5006,0.5,0.4927,27.657386,8.884,9.980,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
124120,youngwi01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Willie Young,32.0,1945.0,BBB,0.0,NAL,1.0,1.0,,-0.24,0.0,0.0,0.0,,,,,0.0,0.00,0.09,0.0,-0.2,-0.2,-0.2,0.0,-0.01,-0.01,0.00,-0.01,0.00,-0.01,0.0,,Y,4.88299,5.03299,0.09904,5.00266,1.923,1.929,0.4855,0.4855,0.5,0.4971,-100.000000,0.321,0.356,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [36]:
df["BMI"] = round(df.BMI, 2)  # clean up our BMI decimals
df = df.dropna(subset=["BMI"])  # Drop anyone without a weight.
df_huskies_tryouts = df[df["weight"] >= 225]

df_huskies_tryouts = df_huskies_tryouts.sort_values("weight", ascending=False)

In [37]:
# weight of our heaviest starting lineup
df_huskies_tryouts.groupby("POS").weight.max().sum()

1892.0

In [70]:
df_huskies_tryouts

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
109928,youngwa01,1980.0,2.0,18.0,USA,MS,Hattiesburg,2015.0,9.0,19.0,USA,MS,Purvis,Walter,Young,Walter Ernest,320.0,77.0,L,R,2005-09-06,2005-10-02,younw001,youngwa01,145.14944,1.9558,37.95,74.214869,425488.0,2112.0,2005.0,2005.0,Walter Young,25.0,2005.0,BAL,1.0,AL,37.0,14.0,81.0,0.47,-0.12,0.19,0.0,0.0,0.0,0.0,0.0,0.0,-0.81,0.00,1.39,1.1,-0.3,-0.3,-0.8,-0.03,-0.03,-0.07,0.10,-0.07,0.10,0.13,,N,4.69790,4.71718,0.08197,4.61808,1.895,1.890,0.4981,0.4981,0.4942,0.4900,115.029797,12.192,13.972,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25063,diazju03,1984.0,2.0,27.0,D.R.,La Romana,La Romana,,,,,,,Jumbo,Diaz,Jose Rafael,315.0,76.0,R,R,2014-06-20,2017-07-16,diazj005,diazju03,142.88148,1.9304,38.34,74.016515,471822.0,3397.0,2014.0,2017.0,Jumbo Diaz,30.0,2014.0,CIN,1.0,NL,0.0,34.0,34.7,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,-0.03,0.00,-0.03,0.00,0.00,,Y,3.99035,3.99035,0.07088,3.99035,1.808,1.808,0.5000,0.5000,0.5000,0.5000,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25064,diazju03,1984.0,2.0,27.0,D.R.,La Romana,La Romana,,,,,,,Jumbo,Diaz,Jose Rafael,315.0,76.0,R,R,2014-06-20,2017-07-16,diazj005,diazju03,142.88148,1.9304,38.34,74.016515,471822.0,3397.0,2014.0,2017.0,Jumbo Diaz,31.0,2015.0,CIN,1.0,NL,0.0,58.0,60.3,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,-0.01,0.00,-0.01,0.00,0.00,510000.0,Y,4.22222,4.22222,0.07613,4.22222,1.837,1.837,0.5000,0.5000,0.5000,0.5000,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25065,diazju03,1984.0,2.0,27.0,D.R.,La Romana,La Romana,,,,,,,Jumbo,Diaz,Jose Rafael,315.0,76.0,R,R,2014-06-20,2017-07-16,diazj005,diazju03,142.88148,1.9304,38.34,74.016515,471822.0,3397.0,2014.0,2017.0,Jumbo Diaz,32.0,2016.0,CIN,1.0,NL,1.0,45.0,43.0,-0.28,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.14,0.00,-0.1,-0.1,-0.1,0.0,-0.01,-0.01,-0.02,-0.01,-0.02,-0.01,0.00,525000.0,Y,4.49699,4.50010,0.08241,4.49938,1.870,1.870,0.4997,0.4997,0.5000,0.4999,-100.000000,0.332,0.427,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,P,0.0,129.0,1.0,3.0,2.0,0.0,,,,,
25066,diazju03,1984.0,2.0,27.0,D.R.,La Romana,La Romana,,,,,,,Jumbo,Diaz,Jose Rafael,315.0,76.0,R,R,2014-06-20,2017-07-16,diazj005,diazju03,142.88148,1.9304,38.34,74.016515,471822.0,3397.0,2014.0,2017.0,Jumbo Diaz,33.0,2017.0,TBR,1.0,AL,0.0,2.0,30.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,557500.0,Y,4.69540,4.69540,0.08396,4.69540,1.893,1.893,0.5000,0.5000,0.5000,0.5000,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66089,meadoau01,1995.0,5.0,3.0,USA,GA,Atlanta,,,,,,,Austin,Meadows,Austin Wade,225.0,75.0,L,L,2018-05-18,2022-06-15,meada001,meadoau01,102.05820,1.9050,28.12,53.573858,640457.0,15672.0,2018.0,2022.0,Austin Meadows,23.0,2018.0,TBR,2.0,AL,26.0,10.0,62.7,-0.10,0.15,0.20,-2.0,0.0,0.0,0.0,0.0,-2.0,-0.52,0.00,0.93,-1.3,-2.3,-0.3,-2.5,-0.22,-0.02,-0.25,-0.13,-0.25,0.07,0.09,,N,4.55165,4.57865,0.08136,4.48549,1.878,1.874,0.4770,0.4972,0.4745,0.4904,99.118519,8.185,9.864,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
66088,meadoau01,1995.0,5.0,3.0,USA,GA,Atlanta,,,,,,,Austin,Meadows,Austin Wade,225.0,75.0,L,L,2018-05-18,2022-06-15,meada001,meadoau01,102.05820,1.9050,28.12,53.573858,640457.0,15672.0,2018.0,2022.0,Austin Meadows,23.0,2018.0,PIT,1.0,NL,165.0,49.0,314.7,2.77,-0.21,-0.01,-7.0,0.0,-1.0,0.0,0.0,-8.0,-0.61,0.00,5.36,-0.7,-6.1,1.9,-8.6,-0.71,0.18,-0.96,-0.18,-0.96,0.71,0.53,,N,4.36296,4.32337,0.07868,4.21393,1.852,1.843,0.4870,0.5042,0.4815,0.4882,115.966144,53.048,63.063,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
15560,carragi01,1968.0,3.0,4.0,Venezuela,Anzoategui,El Tigre,,,,,,,Giovanni,Carrara,Giovanni,225.0,74.0,R,R,1995-07-29,2006-09-23,carrg001,carragi01,102.05820,1.8796,28.89,54.297829,112030.0,643.0,1995.0,2006.0,Giovanni Carrara,28.0,1996.0,TOR,1.0,AL,0.0,0.0,15.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,,,,,,,,115000.0,Y,,5.39802,0.09509,,,,,,,,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
15559,carragi01,1968.0,3.0,4.0,Venezuela,Anzoategui,El Tigre,,,,,,,Giovanni,Carrara,Giovanni,225.0,74.0,R,R,1995-07-29,2006-09-23,carrg001,carragi01,102.05820,1.8796,28.89,54.297829,112030.0,643.0,1995.0,2006.0,Giovanni Carrara,28.0,1996.0,CIN,2.0,NL,7.0,8.0,23.0,-1.25,-0.09,0.00,0.0,0.0,0.0,0.0,,0.0,0.01,0.93,0.00,-0.4,-0.4,-0.4,0.0,-0.04,-0.04,-0.01,-0.04,-0.01,-0.04,0.00,,Y,4.64486,4.69486,0.08793,4.66788,1.890,1.892,0.4949,0.4949,0.5001,0.4973,-100.000000,2.374,2.955,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,P,5.0,69.0,2.0,3.0,0.0,0.0,,,,,


In [38]:
# Probably easiest if we make ourselves a dataframe for every position, since we'll need to fill up a 26-man roster, which is parsed into several discrete roles that each require a particular number of players. 

# These role counts aren't codified, but teams have nearly always carried the same distribution of player-roles.

df_huskies_tryouts_P = df_huskies_tryouts[df_huskies_tryouts['POS'] == "P"]
df_huskies_tryouts_C = df_huskies_tryouts[df_huskies_tryouts['POS'] == "C"]
df_huskies_tryouts_1B = df_huskies_tryouts[df_huskies_tryouts['POS'] == "1B"]
df_huskies_tryouts_2B = df_huskies_tryouts[df_huskies_tryouts['POS'] == "2B"]
df_huskies_tryouts_3B = df_huskies_tryouts[df_huskies_tryouts['POS'] == "3B"]
df_huskies_tryouts_SS = df_huskies_tryouts[df_huskies_tryouts['POS'] == "SS"]
df_huskies_tryouts_OF = df_huskies_tryouts[df_huskies_tryouts['POS'] == "OF"]


In [69]:
df_huskies_tryouts_P

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
25065,diazju03,1984.0,2.0,27.0,D.R.,La Romana,La Romana,,,,,,,Jumbo,Diaz,Jose Rafael,315.0,76.0,R,R,2014-06-20,2017-07-16,diazj005,diazju03,142.88148,1.9304,38.34,74.016515,471822.0,3397.0,2014.0,2017.0,Jumbo Diaz,32.0,2016.0,CIN,1.0,NL,1.0,45.0,43.0,-0.28,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.14,0.0,-0.1,-0.1,-0.1,0.0,-0.01,-0.01,-0.02,-0.01,-0.02,-0.01,0.0,525000.0,Y,4.49699,4.50010,0.08241,4.49938,1.870,1.870,0.4997,0.4997,0.5000,0.4999,-100.000000,0.332,0.427,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,P,0.0,129.0,1.0,3.0,2.0,0.0,,,,,
11968,brownju01,1907.0,4.0,30.0,USA,RI,Greene,1966.0,10.0,2.0,USA,NY,Freeport,Jumbo,Brown,Walter George,295.0,76.0,R,R,1925-08-26,1941-08-27,browj110,brownju01,133.80964,1.9304,35.91,69.317053,111597.0,1001533.0,1925.0,1941.0,Jumbo Brown,31.0,1938.0,NYG,1.0,NL,18.0,43.0,,-1.13,-0.52,-0.24,0.0,,,,,0.0,0.00,2.07,0.0,0.2,0.2,0.2,0.0,0.01,0.02,-0.03,0.01,-0.03,0.02,0.0,,Y,4.42789,4.42371,0.07970,4.41010,1.862,1.861,0.5004,0.5004,0.5000,0.4986,45.477489,6.116,6.277,,,,,,,,,,,,,,,,,,,,,,,,,,NY1,NL,P,,,5.0,11.0,2.0,1.0,,,,,
11967,brownju01,1907.0,4.0,30.0,USA,RI,Greene,1966.0,10.0,2.0,USA,NY,Freeport,Jumbo,Brown,Walter George,295.0,76.0,R,R,1925-08-26,1941-08-27,browj110,brownju01,133.80964,1.9304,35.91,69.317053,111597.0,1001533.0,1925.0,1941.0,Jumbo Brown,30.0,1937.0,NYG,2.0,NL,0.0,4.0,,0.00,0.00,0.00,0.0,,,,,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.0,,Y,4.55686,4.55686,0.08283,4.55686,1.877,1.877,0.5000,0.5000,0.5000,0.5000,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,NY1,NL,P,,,0.0,5.0,0.0,0.0,,,,,
11969,brownju01,1907.0,4.0,30.0,USA,RI,Greene,1966.0,10.0,2.0,USA,NY,Freeport,Jumbo,Brown,Walter George,295.0,76.0,R,R,1925-08-26,1941-08-27,browj110,brownju01,133.80964,1.9304,35.91,69.317053,111597.0,1001533.0,1925.0,1941.0,Jumbo Brown,32.0,1939.0,NYG,1.0,NL,12.0,31.0,,0.28,-0.46,0.17,0.0,,,,,0.0,0.00,1.32,0.0,1.3,1.3,1.3,0.0,0.14,0.14,-0.02,0.14,-0.02,0.14,0.0,7500.0,Y,4.51438,4.47213,0.08040,4.45954,1.870,1.866,0.5044,0.5044,0.5000,0.4987,110.357093,4.168,4.425,,,,,,,,,,,,,,,,,,,,,,,,,,NY1,NL,P,,,0.0,12.0,0.0,1.0,,,,,
11970,brownju01,1907.0,4.0,30.0,USA,RI,Greene,1966.0,10.0,2.0,USA,NY,Freeport,Jumbo,Brown,Walter George,295.0,76.0,R,R,1925-08-26,1941-08-27,browj110,brownju01,133.80964,1.9304,35.91,69.317053,111597.0,1001533.0,1925.0,1941.0,Jumbo Brown,33.0,1940.0,NYG,1.0,NL,10.0,41.0,,-1.89,0.00,0.00,0.0,,,,,0.0,0.00,1.10,0.0,-0.8,-0.8,-0.8,0.0,-0.08,-0.08,-0.01,-0.08,-0.01,-0.08,0.0,,Y,4.36516,4.38443,0.07749,4.37610,1.856,1.856,0.4980,0.4980,0.5000,0.4991,-44.916701,3.377,3.926,,,,,,,,,,,,,,,,,,,,,,,,,,NY1,NL,P,,,0.0,7.0,0.0,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84965,rodrihe03,1987.0,2.0,25.0,Venezuela,Zulia,Santa Barbara,,,,,,,Henry,Rodriguez,Henry Alberto,225.0,73.0,R,R,2009-09-21,2014-05-12,rodrh002,rodrihe03,102.05820,1.8542,29.68,55.041635,469159.0,6371.0,2009.0,2014.0,Henry Rodriguez,27.0,2014.0,MIA,1.0,NL,0.0,2.0,1.7,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.0,,Y,3.99035,3.99035,0.07088,3.99035,1.808,1.808,0.5000,0.5000,0.5000,0.5000,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,MIA,NL,P,0.0,5.0,0.0,0.0,0.0,0.0,,,,,
66187,medicdo01,1948.0,12.0,9.0,USA,PA,Aliquippa,,,,,,,Doc,Medich,George Francis,225.0,77.0,R,R,1972-09-05,1982-10-02,medid101,medicdo01,102.05820,1.9558,26.68,52.182329,118900.0,1008714.0,1972.0,1982.0,Doc Medich,28.0,1977.0,NYM,3.0,NL,2.0,1.0,7.0,-0.53,0.00,0.00,0.0,0.0,0.0,0.0,,0.0,0.00,0.24,0.0,-0.3,-0.3,-0.3,0.0,-0.03,-0.03,0.00,-0.03,0.00,-0.03,0.0,,Y,4.08636,4.37636,0.07911,4.31202,1.838,1.852,0.4685,0.4685,0.5000,0.4931,-100.000000,0.657,0.799,,,,,,,,,,,,,,,,,,,,,,,,,,NYN,NL,P,1.0,21.0,1.0,0.0,0.0,0.0,,,,,
66186,medicdo01,1948.0,12.0,9.0,USA,PA,Aliquippa,,,,,,,Doc,Medich,George Francis,225.0,77.0,R,R,1972-09-05,1982-10-02,medid101,medicdo01,102.05820,1.9558,26.68,52.182329,118900.0,1008714.0,1972.0,1982.0,Doc Medich,27.0,1976.0,PIT,1.0,NL,64.0,29.0,179.3,-9.55,-0.32,0.03,0.0,0.0,0.0,0.0,,0.0,0.12,6.92,0.0,-2.8,-2.8,-2.8,0.1,-0.33,-0.33,0.01,-0.33,0.01,-0.33,0.0,,Y,3.87846,3.97501,0.06970,3.90329,1.799,1.801,0.4889,0.4889,0.5005,0.4918,-25.454301,18.424,19.458,,,,,,,,,,,,,,,,,,,,,,,,,,PIT,NL,P,26.0,538.0,6.0,36.0,2.0,2.0,,,,,
66182,medicdo01,1948.0,12.0,9.0,USA,PA,Aliquippa,,,,,,,Doc,Medich,George Francis,225.0,77.0,R,R,1972-09-05,1982-10-02,medid101,medicdo01,102.05820,1.9558,26.68,52.182329,118900.0,1008714.0,1972.0,1982.0,Doc Medich,23.0,1972.0,NYY,1.0,AL,1.0,1.0,0.0,0.00,-0.01,0.00,0.0,0.0,0.0,0.0,,0.0,0.00,0.10,0.0,0.1,0.1,0.1,0.0,0.01,0.01,0.00,0.01,0.00,0.01,0.0,,Y,3.55795,3.46795,0.05508,3.43212,1.743,1.734,0.5112,0.5112,0.5000,0.4955,,0.000,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,NYA,AL,P,1.0,0.0,0.0,0.0,0.0,0.0,,,,,


We'll need:
- 5 starting pitchers
- 7 relievers
- 2 catchers
- 4 starting infielders
- 2 utility infielders
- 5 outfielders


In [39]:
huskies_P = (
    df_huskies_tryouts_P.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(13, "weight")
)  # 13 heaviest P

In [40]:
huskies_C = (
    df_huskies_tryouts_C.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(2, "weight")
)  # 2 heaviest catchers

In [41]:
huskies_1B = (
    df_huskies_tryouts_1B.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(1, "weight")
)  # heaviest 1B

In [42]:
huskies_2B = (
    df_huskies_tryouts_2B.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(2, "weight")
)  # 2 heaviest 2B

In [43]:
huskies_3B = (
    df_huskies_tryouts_3B.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(1, "weight")
)  # 1 heaviest 3B

In [44]:
huskies_SS = (
    df_huskies_tryouts_SS.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(2, "weight")
)  # 2 heaviest SS

In [45]:
huskies_OF = (
    df_huskies_tryouts_OF.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(5, "weight")
)  # 5 heaviest OF

In [46]:
huskies = pd.concat(
    [huskies_P, huskies_C, huskies_1B, huskies_2B, huskies_3B, huskies_SS, huskies_OF],
    ignore_index=True,
)

In [49]:
# BEHOLD. THE WONKAVILLE HUSKIES.
huskies.POS.value_counts().sum()

26

In [64]:
df = huskies.dropna(axis=1)

In [65]:
print(list(df.columns))

['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry', 'birthCity', 'nameFirst', 'nameLast', 'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame', 'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'mlbID', 'fangraphsID', 'mlb_played_first', 'mlb_played_last', 'name_common_x', 'age', 'year_ID', 'team_ID', 'stint', 'lg_ID', 'PA', 'G', 'runs_bat', 'runs_br', 'runs_dp', 'runs_field', 'runs_defense', 'runs_position', 'runs_position_p', 'runs_replacement', 'runs_above_rep', 'runs_above_avg', 'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def', 'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'pitcher', 'teamRpG', 'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent', 'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off', 'waa_win_perc_def', 'waa_win_perc_rep', 'TOB_lg', 'TB_lg', 'teamID', 'lgID', 'POS', 'PO', 'A', 'E', 'DP']


In [66]:
df.runs_position_p.value_counts()

0.00    18
0.14     2
2.57     1
5.86     1
0.27     1
0.15     1
0.16     1
0.29     1
Name: runs_position_p, dtype: int64

In [50]:
# check to make sure everyone's best season is in here
# make sure we've got a rotation and relievers
# clean up columns
huskies.weight.describe()


count     26.000000
mean     271.423077
std       18.465477
min      230.000000
25%      260.000000
50%      275.000000
75%      281.500000
max      315.000000
Name: weight, dtype: float64

In [None]:
roster_of = df_huskies_tryouts_OF.groupby("playerID", as_index=False)["WAR"].max()