In [193]:
import numpy as np
import pandas as pd
import pybaseball
import seaborn as sns
import matplotlib.pyplot as plot
from deepdiff import DeepDiff
from pybaseball import bwar_pitch
from pybaseball import bwar_bat
from pybaseball import cache
from pybaseball.lahman import *
from pybaseball import chadwick_register
from pybaseball import pitching_stats
from pybaseball import batting_stats
from time import sleep

cache.enable()
pd.set_option("display.max_columns", 1000)
pd.set_option("display.max_rows", 100)

# These are the people

In [194]:
people = pd.DataFrame(people())

sleep(1)  
# sleep calls to prevent the pybaseball scraper 
# from throwing errors when importing tons of stuff

people = people.convert_dtypes() # cleanup; thank you NumPy

print(people.shape)
print("---")
people.sample(3)

(20543, 24)
---


Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID
11431,marksma01,1990,8,25,USA,OH,Cincinnati,,,,,,,Matt,Marksberry,Matthew Gates,180,73,L,L,2015-07-31,2016-07-30,markm001,marksma01
6161,friedcy01,1897,7,23,USA,TX,San Antonio,1970.0,10.0,9.0,USA,TX,San Antonio,Cy,Fried,Arthur Edwin,150,71,L,L,1920-09-17,1920-09-23,friec101,friedcy01
11689,mattoji01,1896,12,17,USA,VA,Leesville,1973.0,10.0,12.0,USA,SC,Myrtle Beach,Jim,Mattox,James Powell,168,69,L,R,1922-04-30,1923-09-30,mattj101,mattoji01


We need to add more ID info about them so that we don't drown in merge errors.

In [195]:
rosetta = pd.DataFrame(chadwick_register()) 

sleep(1)

rosetta = rosetta.convert_dtypes()

print(rosetta.shape)
print('---')
rosetta.sample(3)

(24258, 8)
---


Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last
8667,Gutiérrez,José,-1,,gutiejo01,-1,1926,1926
16055,Nyce,Charlie,119873,nycec101,nycech01,1009666,1895,1895
4274,Connor,John,112587,connj105,connojo02,1002530,1884,1885


Merge people into rosetta

In [196]:
everyone = pd.merge(rosetta, people, left_on='key_bbref', right_on='playerID', how='left')

print(everyone.shape)
print('---')
everyone.sample(3)

(24258, 32)
---


Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID
16420,Ostrowski,Johnny,120104,ostrj102,ostrojo01,1009886,1943,1950,ostrojo01,1917,10,17,USA,IL,Chicago,1992.0,11.0,13.0,USA,IL,Chicago,Johnny,Ostrowski,John Thaddeus,170,70,R,R,1943-09-24,1950-10-01,ostrj102,ostrojo01
17046,Perzanowski,Stan,120449,perzs101,perzast01,1010218,1971,1978,perzast01,1950,8,25,USA,IN,East Chicago,,,,,,,Stan,Perzanowski,Stanley,170,74,B,R,1971-06-20,1978-09-27,perzs101,perzast01
9371,Hehl,Jake,115700,hehlj101,hehlja01,1005585,1918,1918,hehlja01,1899,12,10,USA,NY,Brooklyn,1961.0,7.0,4.0,USA,NY,Brooklyn,Jake,Hehl,Herman Charles,180,71,R,R,1918-06-20,1918-06-20,hehlj101,hehlja01


Add Fielding Data

In [197]:
# fielding stats by year 
fielding = pd.DataFrame(fielding()) # this is fine

sleep(1)

fielding = fielding.convert_dtypes()

print(fielding.shape)
print('---')
fielding.sample(3)

(147080, 18)
---


Unnamed: 0,playerID,yearID,stint,teamID,lgID,POS,G,GS,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
867,fergubo01,1874,1,BR2,,C,2,0.0,54.0,8,1,4,0,4.0,,0.0,0.0,
89361,robinro01,1988,1,CIN,NL,P,17,16.0,236.0,10,11,3,0,,,,,
50000,wyrosjo01,1951,1,CIN,NL,OF,139,,,255,8,8,2,,,,,


In [198]:
everyone = pd.merge(everyone, fielding, left_on='key_bbref', right_on='playerID', how='left')

print(everyone.shape)
print('---')
everyone.sample(3)

(150057, 50)
---


Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,playerID_x,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,playerID_y,yearID,stint,teamID,lgID,POS,G,GS,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
78693,Lewis,Buddy,117738,lewib103,lewisbu01,1007578,1935,1949,lewisbu01,1916,8,10,USA,NC,Gaston County,2011.0,2.0,18.0,USA,NC,Gastonia,Buddy,Lewis,John Kelly,175,73,L,R,1935-09-16,1949-09-30,lewib103,lewisbu01,lewisbu01,1947,1,WS1,AL,OF,130,,,259,11,9,2,,,,,
43144,Fogarty,Jim,114275,fogaj102,fogarji01,1004191,1884,1890,fogarji01,1864,2,12,USA,CA,San Francisco,1891.0,5.0,20.0,USA,PA,Philadelphia,Jim,Fogarty,James G.,180,70,R,R,1884-05-01,1890-10-04,fogaj102,fogarji01,fogarji01,1889,1,PHI,NL,P,4,,12.0,0,1,1,0,,,,,
115330,Rodriguez,Alex,121347,rodra001,rodrial01,1274,1994,2016,rodrial01,1975,7,27,USA,NY,New York,,,,,,,Alex,Rodriguez,Alexander Enmanuel,230,75,R,R,1994-07-08,2016-08-12,rodra001,rodrial01,rodrial01,1997,1,SEA,AL,SS,140,140.0,3701.0,209,394,24,83,,,,,


Add Batting data

In [199]:
# batting_stats stats by year 
batting_stats = pd.DataFrame(batting_stats(start_season=1921, end_season=2021)) # this is fine

sleep(1)

batting_stats = batting_stats.convert_dtypes()

print(batting_stats.shape)
print('---')
batting_stats.sample(3)

(6443, 319)
---


Unnamed: 0,IDfg,Season,Name,Team,Age,G,AB,PA,H,1B,2B,3B,HR,R,RBI,BB,IBB,SO,HBP,SF,SH,GDP,SB,CS,AVG,GB,FB,LD,IFFB,Pitches,Balls,Strikes,IFH,BU,BUH,BB%,K%,BB/K,OBP,SLG,OPS,ISO,BABIP,GB/FB,LD%,GB%,FB%,IFFB%,HR/FB,IFH%,BUH%,wOBA,wRAA,wRC,Bat,Fld,Rep,Pos,RAR,WAR,Dol,Spd,wRC+,WPA,-WPA,+WPA,RE24,REW,pLI,phLI,PH,WPA/LI,Clutch,FB% (Pitch),FBv,SL%,SLv,CT%,CTv,CB%,CBv,CH%,CHv,SF%,SFv,KN%,KNv,XX%,PO%,wFB,wSL,wCT,wCB,wCH,wSF,wKN,wFB/C,wSL/C,wCT/C,wCB/C,wCH/C,wSF/C,wKN/C,O-Swing%,Z-Swing%,Swing%,O-Contact%,Z-Contact%,Contact%,Zone%,F-Strike%,SwStr%,BsR,FA% (sc),FT% (sc),FC% (sc),FS% (sc),FO% (sc),SI% (sc),SL% (sc),CU% (sc),KC% (sc),EP% (sc),CH% (sc),SC% (sc),KN% (sc),UN% (sc),vFA (sc),vFT (sc),vFC (sc),vFS (sc),vFO (sc),vSI (sc),vSL (sc),vCU (sc),vKC (sc),vEP (sc),vCH (sc),vSC (sc),vKN (sc),FA-X (sc),FT-X (sc),FC-X (sc),FS-X (sc),FO-X (sc),SI-X (sc),SL-X (sc),CU-X (sc),KC-X (sc),EP-X (sc),CH-X (sc),SC-X (sc),KN-X (sc),FA-Z (sc),FT-Z (sc),FC-Z (sc),FS-Z (sc),FO-Z (sc),SI-Z (sc),SL-Z (sc),CU-Z (sc),KC-Z (sc),EP-Z (sc),CH-Z (sc),SC-Z (sc),KN-Z (sc),wFA (sc),wFT (sc),wFC (sc),wFS (sc),wFO (sc),wSI (sc),wSL (sc),wCU (sc),wKC (sc),wEP (sc),wCH (sc),wSC (sc),wKN (sc),wFA/C (sc),wFT/C (sc),wFC/C (sc),wFS/C (sc),wFO/C (sc),wSI/C (sc),wSL/C (sc),wCU/C (sc),wKC/C (sc),wEP/C (sc),wCH/C (sc),wSC/C (sc),wKN/C (sc),O-Swing% (sc),Z-Swing% (sc),Swing% (sc),O-Contact% (sc),Z-Contact% (sc),Contact% (sc),Zone% (sc),Pace,Def,wSB,UBR,Age Rng,Off,Lg,wGDP,Pull%,Cent%,Oppo%,Soft%,Med%,Hard%,TTO%,CH% (pi),CS% (pi),CU% (pi),FA% (pi),FC% (pi),FS% (pi),KN% (pi),SB% (pi),SI% (pi),SL% (pi),XX% (pi),vCH (pi),vCS (pi),vCU (pi),vFA (pi),vFC (pi),vFS (pi),vKN (pi),vSB (pi),vSI (pi),vSL (pi),vXX (pi),CH-X (pi),CS-X (pi),CU-X (pi),FA-X (pi),FC-X (pi),FS-X (pi),KN-X (pi),SB-X (pi),SI-X (pi),SL-X (pi),XX-X (pi),CH-Z (pi),CS-Z (pi),CU-Z (pi),FA-Z (pi),FC-Z (pi),FS-Z (pi),KN-Z (pi),SB-Z (pi),SI-Z (pi),SL-Z (pi),XX-Z (pi),wCH (pi),wCS (pi),wCU (pi),wFA (pi),wFC (pi),wFS (pi),wKN (pi),wSB (pi),wSI (pi),wSL (pi),wXX (pi),wCH/C (pi),wCS/C (pi),wCU/C (pi),wFA/C (pi),wFC/C (pi),wFS/C (pi),wKN/C (pi),wSB/C (pi),wSI/C (pi),wSL/C (pi),wXX/C (pi),O-Swing% (pi),Z-Swing% (pi),Swing% (pi),O-Contact% (pi),Z-Contact% (pi),Contact% (pi),Zone% (pi),Pace (pi),FRM,AVG+,BB%+,K%+,OBP+,SLG+,ISO+,BABIP+,LD+%,GB%+,FB%+,HR/FB%+,Pull%+,Cent%+,Oppo%+,Soft%+,Med%+,Hard%+,EV,LA,Barrels,Barrel%,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA
5408,1007409,1985,Vance Law,MON,28,147,519,621,138,92,30,6,10,75,52,86,0,96,2,6,8,11,6,5,0.266,,,,,,,,,,,0.138,0.155,0.9,0.369,0.405,0.773,0.139,0.305,,,,,,,,,0.35,15.6,86,16.9,-2.0,18.2,2.8,36.7,3.9,,4.4,124,1.86,-10.21,12.07,18.3,2.14,1.02,1.37,5.0,2.4,-0.58,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.8,-1.0,,28 - 28,15.9,1.8,,,,,,,,0.309,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,102,154,116,112,104,109,108,,,,,,,,,,,,,,,,,,0,,,,,
1627,1000189,1962,George Altman,CHC,29,147,534,603,170,116,27,5,22,74,74,62,14,89,5,2,0,8,19,7,0.318,,,,,,,,,,,0.103,0.148,0.7,0.393,0.511,0.904,0.193,0.348,,,,,,,,,0.393,34.2,104,26.2,-1.0,17.9,-7.6,39.1,4.0,,5.1,135,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-8.6,1.0,,29 - 29,27.3,2.5,,,,,,,,0.287,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,118,117,112,117,125,138,120,,,,,,,,,,,,,,,,,,0,,,,,
4172,1006931,1996,Jeff King,PIT,31,155,591,672,160,90,36,4,30,91,111,70,3,95,2,8,1,17,15,1,0.271,,,,,,,,,,,0.104,0.141,0.74,0.346,0.497,0.843,0.227,0.274,,,,,,,,,0.361,14.8,101,14.5,6.0,21.4,-3.8,39.9,3.8,,5.5,117,1.33,-11.86,13.18,24.31,2.31,1.0,1.57,6.0,2.49,-1.16,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.2,2.4,,31 - 31,16.9,-0.5,,,,,,,,0.29,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,100,117,86,102,118,149,91,,,,,,,,,,,,,,,,,,0,,,,,


In [200]:
batting_stats = batting_stats.rename(columns={
    "IDfg": "key_fangraphs"
})

In [201]:
a = np.intersect1d(everyone.columns, batting_stats.columns)
print(list(a))

['CS', 'G', 'SB', 'key_fangraphs']


In [202]:
everyone = pd.merge(everyone, batting_stats, left_on='key_fangraphs', right_on='key_fangraphs', how='left')

print(everyone.shape)
print('---')
everyone.sample(3)

(251554, 368)
---


Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,playerID_x,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,playerID_y,yearID,stint,teamID,lgID,POS,G_x,GS,InnOuts,PO,A,E,DP,PB,WP,SB_x,CS_x,ZR,Season,Name,Team,Age,G_y,AB,PA,H,1B,2B,3B,HR,R,RBI,BB,IBB,SO,HBP,SF,SH,GDP,SB_y,CS_y,AVG,GB,FB,LD,IFFB,Pitches,Balls,Strikes,IFH,BU,BUH,BB%,K%,BB/K,OBP,SLG,OPS,ISO,BABIP,GB/FB,LD%,GB%,FB%,IFFB%,HR/FB,IFH%,BUH%,wOBA,wRAA,wRC,Bat,Fld,Rep,Pos,RAR,WAR,Dol,Spd,wRC+,WPA,-WPA,+WPA,RE24,REW,pLI,phLI,PH,WPA/LI,Clutch,FB% (Pitch),FBv,SL%,SLv,CT%,CTv,CB%,CBv,CH%,CHv,SF%,SFv,KN%,KNv,XX%,PO%,wFB,wSL,wCT,wCB,wCH,wSF,wKN,wFB/C,wSL/C,wCT/C,wCB/C,wCH/C,wSF/C,wKN/C,O-Swing%,Z-Swing%,Swing%,O-Contact%,Z-Contact%,Contact%,Zone%,F-Strike%,SwStr%,BsR,FA% (sc),FT% (sc),FC% (sc),FS% (sc),FO% (sc),SI% (sc),SL% (sc),CU% (sc),KC% (sc),EP% (sc),CH% (sc),SC% (sc),KN% (sc),UN% (sc),vFA (sc),vFT (sc),vFC (sc),vFS (sc),vFO (sc),vSI (sc),vSL (sc),vCU (sc),vKC (sc),vEP (sc),vCH (sc),vSC (sc),vKN (sc),FA-X (sc),FT-X (sc),FC-X (sc),FS-X (sc),FO-X (sc),SI-X (sc),SL-X (sc),CU-X (sc),KC-X (sc),EP-X (sc),CH-X (sc),SC-X (sc),KN-X (sc),FA-Z (sc),FT-Z (sc),FC-Z (sc),FS-Z (sc),FO-Z (sc),SI-Z (sc),SL-Z (sc),CU-Z (sc),KC-Z (sc),EP-Z (sc),CH-Z (sc),SC-Z (sc),KN-Z (sc),wFA (sc),wFT (sc),wFC (sc),wFS (sc),wFO (sc),wSI (sc),wSL (sc),wCU (sc),wKC (sc),wEP (sc),wCH (sc),wSC (sc),wKN (sc),wFA/C (sc),wFT/C (sc),wFC/C (sc),wFS/C (sc),wFO/C (sc),wSI/C (sc),wSL/C (sc),wCU/C (sc),wKC/C (sc),wEP/C (sc),wCH/C (sc),wSC/C (sc),wKN/C (sc),O-Swing% (sc),Z-Swing% (sc),Swing% (sc),O-Contact% (sc),Z-Contact% (sc),Contact% (sc),Zone% (sc),Pace,Def,wSB,UBR,Age Rng,Off,Lg,wGDP,Pull%,Cent%,Oppo%,Soft%,Med%,Hard%,TTO%,CH% (pi),CS% (pi),CU% (pi),FA% (pi),FC% (pi),FS% (pi),KN% (pi),SB% (pi),SI% (pi),SL% (pi),XX% (pi),vCH (pi),vCS (pi),vCU (pi),vFA (pi),vFC (pi),vFS (pi),vKN (pi),vSB (pi),vSI (pi),vSL (pi),vXX (pi),CH-X (pi),CS-X (pi),CU-X (pi),FA-X (pi),FC-X (pi),FS-X (pi),KN-X (pi),SB-X (pi),SI-X (pi),SL-X (pi),XX-X (pi),CH-Z (pi),CS-Z (pi),CU-Z (pi),FA-Z (pi),FC-Z (pi),FS-Z (pi),KN-Z (pi),SB-Z (pi),SI-Z (pi),SL-Z (pi),XX-Z (pi),wCH (pi),wCS (pi),wCU (pi),wFA (pi),wFC (pi),wFS (pi),wKN (pi),wSB (pi),wSI (pi),wSL (pi),wXX (pi),wCH/C (pi),wCS/C (pi),wCU/C (pi),wFA/C (pi),wFC/C (pi),wFS/C (pi),wKN/C (pi),wSB/C (pi),wSI/C (pi),wSL/C (pi),wXX/C (pi),O-Swing% (pi),Z-Swing% (pi),Swing% (pi),O-Contact% (pi),Z-Contact% (pi),Contact% (pi),Zone% (pi),Pace (pi),FRM,AVG+,BB%+,K%+,OBP+,SLG+,ISO+,BABIP+,LD+%,GB%+,FB%+,HR/FB%+,Pull%+,Cent%+,Oppo%+,Soft%+,Med%+,Hard%+,EV,LA,Barrels,Barrel%,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA
14352,Bell,Jay,110826,bellj001,bellja01,48,1986,2003,bellja01,1965,12,11,USA,FL,Pensacola,,,,,,,Jay,Bell,Jay Stuart,180,73,R,R,1986-09-29,2003-09-28,bellj001,bellja01,bellja01,1997,1,KCA,AL,SS,149,144.0,3813,227,443,10,102,,,,,,1993.0,Jay Bell,PIT,27.0,154.0,604.0,701.0,187.0,137.0,32.0,9.0,9.0,102.0,51.0,77.0,6.0,122.0,6.0,1.0,13.0,16.0,16.0,10.0,0.31,,,,,,,,,,,0.11,0.174,0.63,0.392,0.437,0.83,0.127,0.376,,,,,,,,,0.371,25.2,109.0,22.1,11.0,21.3,8.3,65.3,6.6,,6.0,125.0,0.26,-11.73,11.99,12.3,1.08,1.0,,0.0,1.66,-1.39,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-0.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,19.3,-0.8,,27 - 27,21.3,3.4,,,,,,,,0.297,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,114.0,129.0,120.0,117.0,106.0,90.0,126.0,,,,,,,,,,,,,,,,,,0.0,,,,,
222600,Terry,Adonis,123202,terra101,terryad01,1012926,1884,1897,terryad01,1864,8,7,USA,MA,Westfield,1915.0,2.0,24.0,USA,WI,Milwaukee,Adonis,Terry,William H.,168,71,R,R,1884-05-01,1897-04-27,terra101,terryad01,terryad01,1884,1,BR3,AA,OF,13,,366,24,4,3,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
196769,Ruf,Darin,573131,ruf-d001,rufda01,9929,2012,2022,rufda01,1986,7,28,USA,NE,Omaha,,,,,,,Darin,Ruf,Darin Cortland,232,74,R,R,2012-09-14,2022-07-01,ruf-d001,rufda01,rufda01,2016,1,PHI,NL,OF,13,11.0,258,16,0,1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [203]:
# bref_batting stats by year 
bref_bat = pd.DataFrame(bwar_bat()) # this is fine

sleep(1)

bref_bat = bref_bat.convert_dtypes()

print(bref_bat.shape)
print('---')
bref_bat.sample(3)

(119731, 17)
---


Unnamed: 0,name_common,mlb_ID,player_ID,year_ID,team_ID,stint_ID,lg_ID,pitcher,G,PA,salary,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAR_rep,WAA,WAR
51671,Jae-gyun Hwang,666561,hwangja01,2017,SFG,1,NL,N,18,57,,-4.0,-5.0,1.1,0.18,-0.43,-0.25
37615,Ned Garver,114640,garvene01,1956,DET,1,AL,Y,6,6,,-0.5,-0.5,0.0,0.0,-0.05,-0.05
68519,Lee May,118474,mayle01,1977,BAL,1,AL,N,150,629,130000.0,-17.0,-12.2,-14.4,2.16,-1.65,0.51


In [204]:
bref_bat = bref_bat.rename(columns={
    "mlb_ID": "key_mlabm",
    "player_ID": "key_bbref",
    "year_ID": "Season"
})

In [205]:
a = np.intersect1d(everyone.columns, bref_bat.columns)
print(list(a))

['PA', 'Season', 'WAR', 'key_bbref']


In [206]:
everyone = pd.merge(everyone, bref_bat, left_on='key_bbref', right_on='key_bbref', how='left')

print(everyone.shape)
print('---')
everyone.sample(3)

(3244786, 384)
---


Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,playerID_x,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,playerID_y,yearID,stint,teamID,lgID,POS,G_x,GS,InnOuts,PO,A,E,DP,PB,WP,SB_x,CS_x,ZR,Season_x,Name,Team,Age,G_y,AB,PA_x,H,1B,2B,3B,HR,R,RBI,BB,IBB,SO,HBP,SF,SH,GDP,SB_y,CS_y,AVG,GB,FB,LD,IFFB,Pitches,Balls,Strikes,IFH,BU,BUH,BB%,K%,BB/K,OBP,SLG,OPS,ISO,BABIP,GB/FB,LD%,GB%,FB%,IFFB%,HR/FB,IFH%,BUH%,wOBA,wRAA,wRC,Bat,Fld,Rep,Pos,RAR,WAR_x,Dol,Spd,wRC+,WPA,-WPA,+WPA,RE24,REW,pLI,phLI,PH,WPA/LI,Clutch,FB% (Pitch),FBv,SL%,SLv,CT%,CTv,CB%,CBv,CH%,CHv,SF%,SFv,KN%,KNv,XX%,PO%,wFB,wSL,wCT,wCB,wCH,wSF,wKN,wFB/C,wSL/C,wCT/C,wCB/C,wCH/C,wSF/C,wKN/C,O-Swing%,Z-Swing%,Swing%,O-Contact%,Z-Contact%,Contact%,Zone%,F-Strike%,SwStr%,BsR,FA% (sc),FT% (sc),FC% (sc),FS% (sc),FO% (sc),SI% (sc),SL% (sc),CU% (sc),KC% (sc),EP% (sc),CH% (sc),SC% (sc),KN% (sc),UN% (sc),vFA (sc),vFT (sc),vFC (sc),vFS (sc),vFO (sc),vSI (sc),vSL (sc),vCU (sc),vKC (sc),vEP (sc),vCH (sc),vSC (sc),vKN (sc),FA-X (sc),FT-X (sc),FC-X (sc),FS-X (sc),FO-X (sc),SI-X (sc),SL-X (sc),CU-X (sc),KC-X (sc),EP-X (sc),CH-X (sc),SC-X (sc),KN-X (sc),FA-Z (sc),FT-Z (sc),FC-Z (sc),FS-Z (sc),FO-Z (sc),SI-Z (sc),SL-Z (sc),CU-Z (sc),KC-Z (sc),EP-Z (sc),CH-Z (sc),SC-Z (sc),KN-Z (sc),wFA (sc),wFT (sc),wFC (sc),wFS (sc),wFO (sc),wSI (sc),wSL (sc),wCU (sc),wKC (sc),wEP (sc),wCH (sc),wSC (sc),wKN (sc),wFA/C (sc),wFT/C (sc),wFC/C (sc),wFS/C (sc),wFO/C (sc),wSI/C (sc),wSL/C (sc),wCU/C (sc),wKC/C (sc),wEP/C (sc),wCH/C (sc),wSC/C (sc),wKN/C (sc),O-Swing% (sc),Z-Swing% (sc),Swing% (sc),O-Contact% (sc),Z-Contact% (sc),Contact% (sc),Zone% (sc),Pace,Def,wSB,UBR,Age Rng,Off,Lg,wGDP,Pull%,Cent%,Oppo%,Soft%,Med%,Hard%,TTO%,CH% (pi),CS% (pi),CU% (pi),FA% (pi),FC% (pi),FS% (pi),KN% (pi),SB% (pi),SI% (pi),SL% (pi),XX% (pi),vCH (pi),vCS (pi),vCU (pi),vFA (pi),vFC (pi),vFS (pi),vKN (pi),vSB (pi),vSI (pi),vSL (pi),vXX (pi),CH-X (pi),CS-X (pi),CU-X (pi),FA-X (pi),FC-X (pi),FS-X (pi),KN-X (pi),SB-X (pi),SI-X (pi),SL-X (pi),XX-X (pi),CH-Z (pi),CS-Z (pi),CU-Z (pi),FA-Z (pi),FC-Z (pi),FS-Z (pi),KN-Z (pi),SB-Z (pi),SI-Z (pi),SL-Z (pi),XX-Z (pi),wCH (pi),wCS (pi),wCU (pi),wFA (pi),wFC (pi),wFS (pi),wKN (pi),wSB (pi),wSI (pi),wSL (pi),wXX (pi),wCH/C (pi),wCS/C (pi),wCU/C (pi),wFA/C (pi),wFC/C (pi),wFS/C (pi),wKN/C (pi),wSB/C (pi),wSI/C (pi),wSL/C (pi),wXX/C (pi),O-Swing% (pi),Z-Swing% (pi),Swing% (pi),O-Contact% (pi),Z-Contact% (pi),Contact% (pi),Zone% (pi),Pace (pi),FRM,AVG+,BB%+,K%+,OBP+,SLG+,ISO+,BABIP+,LD+%,GB%+,FB%+,HR/FB%+,Pull%+,Cent%+,Oppo%+,Soft%+,Med%+,Hard%+,EV,LA,Barrels,Barrel%,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA,name_common,key_mlabm,Season_y,team_ID,stint_ID,lg_ID,pitcher,G,PA_y,salary,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAR_rep,WAA,WAR_y
364261,Brown,Kevin,111567,browk004,brownke04,178,1996,2002,brownke04,1973,4,21,USA,IN,Valparaiso,,,,,,,Kevin,Brown,Kevin Lee,200,74,R,R,1996-09-12,2002-09-22,browk004,brownke04,brownke04,2002,1,BOS,AL,C,2,0.0,9.0,1,0,0,0,0.0,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Kevin Brown,111567,1999,TOR,1,AL,N,2,9,,1.5,1.5,0.1,0.03,0.13,0.16
775381,Duncan,Mariano,113620,duncm001,duncama01,1003554,1985,1997,duncama01,1963,3,13,D.R.,San Pedro de Macoris,San Pedro de Macoris,,,,,,,Mariano,Duncan,Mariano,160,72,R,R,1985-04-09,1997-09-17,duncm001,duncama01,duncama01,1997,2,TOR,AL,2B,39,39.0,997.0,74,111,3,24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Mariano Duncan,113620,1991,CIN,1,NL,N,100,356,925000.0,-12.7,-0.8,-8.3,1.14,-1.45,-0.31
1819394,Martin,Pepper,118342,martp103,martipe01,1008189,1928,1944,martipe01,1904,2,29,USA,OK,Temple,1965.0,3.0,5.0,USA,OK,McAlester,Pepper,Martin,Johnny Leonard Roosevelt,170,68,R,R,1928-04-16,1944-10-01,martp103,martipe01,martipe01,1940,1,SLN,NL,OF,63,,,103,8,3,0,,,,,,1934.0,Pepper Martin,STL,30.0,110.0,454.0,488.0,131.0,90.0,25.0,11.0,5.0,74.0,49.0,32.0,,41.0,1.0,,1.0,1.0,23.0,,0.289,,,,,,,,,,,0.066,0.084,0.78,0.337,0.425,0.762,0.137,0.309,,,,,,,,,0.347,2.3,64.0,-0.2,0.0,15.3,2.0,22.4,2.1,,7.5,100.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,4.4,,30 - 30,4.2,0.9,,,,,,,,0.16,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,101.0,91.0,107.0,98.0,104.0,113.0,102.0,,,,,,,,,,,,,,,,,,0.0,,,,,,Pepper Martin,118342,1934,STL,1,NL,N,110,489,9000.0,4.3,4.3,2.1,1.49,0.37,1.86


In [207]:
# Constants
KG_TO_LB = 0.453592
M_TO_IN = 0.0254
# BMI Calculations
everyone["KG"] = everyone["weight"] * KG_TO_LB
everyone["meters"] = everyone["height"] * M_TO_IN
everyone["BMI"] = everyone["KG"] / everyone["meters"] ** 2
everyone["ratio"] = everyone["meters"] * everyone["BMI"]

In [53]:
everyone.shape

(3232861, 384)

In [209]:
everyone['dupe_delete'] = everyone['key_fangraphs'].astype(str) + everyone['yearID'].astype(str)

In [210]:
everyone = everyone.drop_duplicates(subset=['dupe_delete'])

# It's time to weigh everyone

In [212]:
df_save = everyone
df = df_save

In [213]:
df = df.dropna(subset=['BMI']) # Anyone without a BMI doesn't make the first round of cuts.

## Let's separate the meat from the chaff

In [188]:
df.shape

(98838, 389)

In [192]:
df.sample(20)

Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,playerID_x,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,playerID_y,yearID,stint,teamID,lgID,POS,G_x,GS,InnOuts,PO,A,E,DP,PB,WP,SB_x,CS_x,ZR,Season_x,Name,Team,Age,G_y,AB,PA_x,H,1B,2B,3B,HR,R,RBI,BB,IBB,SO,HBP,SF,SH,GDP,SB_y,CS_y,AVG,GB,FB,LD,IFFB,Pitches,Balls,Strikes,IFH,BU,BUH,BB%,K%,BB/K,OBP,SLG,OPS,ISO,BABIP,GB/FB,LD%,GB%,FB%,IFFB%,HR/FB,IFH%,BUH%,wOBA,wRAA,wRC,Bat,Fld,Rep,Pos,RAR,WAR_x,Dol,Spd,wRC+,WPA,-WPA,+WPA,RE24,REW,pLI,phLI,PH,WPA/LI,Clutch,FB% (Pitch),FBv,SL%,SLv,CT%,CTv,CB%,CBv,CH%,CHv,SF%,SFv,KN%,KNv,XX%,PO%,wFB,wSL,wCT,wCB,wCH,wSF,wKN,wFB/C,wSL/C,wCT/C,wCB/C,wCH/C,wSF/C,wKN/C,O-Swing%,Z-Swing%,Swing%,O-Contact%,Z-Contact%,Contact%,Zone%,F-Strike%,SwStr%,BsR,FA% (sc),FT% (sc),FC% (sc),FS% (sc),FO% (sc),SI% (sc),SL% (sc),CU% (sc),KC% (sc),EP% (sc),CH% (sc),SC% (sc),KN% (sc),UN% (sc),vFA (sc),vFT (sc),vFC (sc),vFS (sc),vFO (sc),vSI (sc),vSL (sc),vCU (sc),vKC (sc),vEP (sc),vCH (sc),vSC (sc),vKN (sc),FA-X (sc),FT-X (sc),FC-X (sc),FS-X (sc),FO-X (sc),SI-X (sc),SL-X (sc),CU-X (sc),KC-X (sc),EP-X (sc),CH-X (sc),SC-X (sc),KN-X (sc),FA-Z (sc),FT-Z (sc),FC-Z (sc),FS-Z (sc),FO-Z (sc),SI-Z (sc),SL-Z (sc),CU-Z (sc),KC-Z (sc),EP-Z (sc),CH-Z (sc),SC-Z (sc),KN-Z (sc),wFA (sc),wFT (sc),wFC (sc),wFS (sc),wFO (sc),wSI (sc),wSL (sc),wCU (sc),wKC (sc),wEP (sc),wCH (sc),wSC (sc),wKN (sc),wFA/C (sc),wFT/C (sc),wFC/C (sc),wFS/C (sc),wFO/C (sc),wSI/C (sc),wSL/C (sc),wCU/C (sc),wKC/C (sc),wEP/C (sc),wCH/C (sc),wSC/C (sc),wKN/C (sc),O-Swing% (sc),Z-Swing% (sc),Swing% (sc),O-Contact% (sc),Z-Contact% (sc),Contact% (sc),Zone% (sc),Pace,Def,wSB,UBR,Age Rng,Off,Lg,wGDP,Pull%,Cent%,Oppo%,Soft%,Med%,Hard%,TTO%,CH% (pi),CS% (pi),CU% (pi),FA% (pi),FC% (pi),FS% (pi),KN% (pi),SB% (pi),SI% (pi),SL% (pi),XX% (pi),vCH (pi),vCS (pi),vCU (pi),vFA (pi),vFC (pi),vFS (pi),vKN (pi),vSB (pi),vSI (pi),vSL (pi),vXX (pi),CH-X (pi),CS-X (pi),CU-X (pi),FA-X (pi),FC-X (pi),FS-X (pi),KN-X (pi),SB-X (pi),SI-X (pi),SL-X (pi),XX-X (pi),CH-Z (pi),CS-Z (pi),CU-Z (pi),FA-Z (pi),FC-Z (pi),FS-Z (pi),KN-Z (pi),SB-Z (pi),SI-Z (pi),SL-Z (pi),XX-Z (pi),wCH (pi),wCS (pi),wCU (pi),wFA (pi),wFC (pi),wFS (pi),wKN (pi),wSB (pi),wSI (pi),wSL (pi),wXX (pi),wCH/C (pi),wCS/C (pi),wCU/C (pi),wFA/C (pi),wFC/C (pi),wFS/C (pi),wKN/C (pi),wSB/C (pi),wSI/C (pi),wSL/C (pi),wXX/C (pi),O-Swing% (pi),Z-Swing% (pi),Swing% (pi),O-Contact% (pi),Z-Contact% (pi),Contact% (pi),Zone% (pi),Pace (pi),FRM,AVG+,BB%+,K%+,OBP+,SLG+,ISO+,BABIP+,LD+%,GB%+,FB%+,HR/FB%+,Pull%+,Cent%+,Oppo%+,Soft%+,Med%+,Hard%+,EV,LA,Barrels,Barrel%,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA,name_common,key_mlabm,Season_y,team_ID,stint_ID,lg_ID,pitcher,G,PA_y,salary,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAR_rep,WAA,WAR_y,dupe_delete,KG,meters,BMI,ratio
1017834,Garcia,Karim,114588,garck002,garcika01,1537,1995,2004,garcika01,1975,10,29,Mexico,Sonora,Ciudad Obregon,,,,,,,Karim,Garcia,Gustavo Karim,210,72,L,L,1995-09-02,2004-08-24,garck002,garcika01,garcika01,2001,1,CLE,AL,1B,2,0.0,9.0,3,0,0,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Karim Garcia,114588,1995,LAD,1,NL,N,13,20,109000.0,-3.8,-2.8,-1.2,0.06,-0.38,-0.32,15372001,95.25432,1.8288,28.480805,52.085696
2059324,Moses,Jerry,119436,mosej101,mosesje01,1009242,1965,1975,mosesje01,1946,8,9,USA,MS,Yazoo City,2018.0,3.0,26.0,USA,MA,Haverhill,Jerry,Moses,Gerald Braheen,210,75,R,R,1965-05-09,1975-08-09,mosej101,mosesje01,mosesje01,1971,1,CAL,AL,C,63,47.0,1359.0,299,38,8,3,10.0,,29.0,22.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jerry Moses,119436,1965,BOS,1,AL,,4,4,,0.6,0.6,0.0,,0.07,,10092421971,95.25432,1.905,26.24791,50.002268
172613,Belisle,Matt,279571,belim001,belisma01,1837,2003,2018,belisma01,1980,6,6,USA,TX,Austin,,,,,,,Matt,Belisle,Matthew Thomas,230,75,R,R,2003-09-07,2018-09-30,belim001,belisma01,belisma01,2006,1,CIN,NL,P,30,2.0,120.0,1,5,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Matt Belisle,279571,2003,CIN,1,NL,Y,6,1,,-0.1,-0.1,0.0,0.0,-0.01,-0.01,18372006,104.32616,1.905,28.74771,54.764388
2594208,Schang,Bobby,121770,schab101,schanbo01,1011519,1914,1927,schanbo01,1886,12,7,USA,NY,Wales Center,1966.0,8.0,29.0,USA,CA,Sacramento,Bobby,Schang,Robert Martin,165,67,R,R,1914-09-23,1927-05-28,schab101,schanbo01,schanbo01,1927,1,SLN,NL,C,3,,,3,1,0,0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Bobby Schang,121770,1914,PIT,1,NL,N,11,36,,-1.2,-1.2,0.6,0.12,-0.15,-0.03,10115191927,74.84268,1.7018,25.842367,43.97854
1128249,Grebeck,Craig,115078,grebc001,grebecr01,1004982,1990,2001,grebecr01,1964,12,29,USA,PA,Johnstown,,,,,,,Craig,Grebeck,Craig Allen,160,68,R,R,1990-04-13,2001-06-02,grebc001,grebecr01,grebecr01,1998,1,TOR,AL,2B,91,77.0,2169.0,145,250,10,38,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Craig Grebeck,115078,1990,CHW,1,AL,N,59,135,100000.0,-0.1,-9.0,10.4,0.46,-0.01,0.45,10049821998,72.57472,1.7272,24.327647,42.018712
618066,Covington,Wes,112747,coviw101,covinwe01,1002687,1956,1966,covinwe01,1932,3,27,USA,NC,Laurinburg,2011.0,7.0,4.0,CAN,AB,Edmonton,Wes,Covington,John Wesley,205,73,L,R,1956-04-19,1966-10-02,coviw101,covinwe01,covinwe01,1965,1,PHI,NL,OF,64,60.0,1389.0,88,2,3,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Wes Covington,112747,1956,MLN,1,NL,N,75,157,,1.9,2.8,-2.2,0.62,0.1,0.72,10026871965,92.98636,1.8542,27.04619,50.149045
733974,Diekman,Jake,518617,diekj001,diekmja01,5003,2012,2022,diekmja01,1987,1,21,USA,NE,Wymore,,,,,,,Jake,Diekman,Jacob Tanner,195,76,R,L,2012-05-15,2022-07-01,diekj001,diekmja01,diekmja01,2014,1,PHI,NL,P,73,0.0,213.0,3,9,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jake Diekman,518617,2012,PHI,1,NL,Y,29,0,,0.0,0.0,0.0,0.0,0.0,0.0,50032014,88.45044,1.9304,23.735882,45.819747
3063403,Waugh,Jim,123994,waugj101,waughji01,1013687,1952,1953,waughji01,1933,11,25,USA,OH,Lancaster,2010.0,2.0,16.0,USA,SC,Rock Hill,Jim,Waugh,James Elden,185,75,R,R,1952-04-19,1953-09-26,waugj101,waughji01,waughji01,1952,1,PIT,NL,P,17,,,0,11,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jim Waugh,123994,1952,PIT,1,NL,Y,17,13,,-0.2,-0.2,0.0,0.0,-0.02,-0.02,10136871952,83.91452,1.905,23.123158,44.049617
2302562,Pino,Yohan,464416,pinoy001,pinoyo01,6955,2014,2015,pinoyo01,1983,12,26,Venezuela,Aragua,Turmero,,,,,,,Yohan,Pino,Yohan Jose,190,74,R,R,2014-06-19,2015-06-19,pinoy001,pinoyo01,pinoyo01,2015,1,KCA,AL,P,7,1.0,58.0,0,1,2,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Yohan Pino,464416,2014,MIN,1,AL,Y,0,0,,0.0,0.0,0.0,,,,69552015,86.18248,1.8796,24.394286,45.8515
1467316,John,Tommy,116550,johnt001,johnto01,1006515,1963,1989,johnto01,1943,5,22,USA,IN,Terre Haute,,,,,,,Tommy,John,Thomas Edward,180,75,R,L,1963-09-06,1989-05-25,johnt001,johnto01,johnto01,1964,1,CLE,AL,P,25,14.0,283.0,5,16,0,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Tommy John,116550,1963,CLE,1,AL,Y,6,6,,-1.1,-1.1,0.0,0.0,-0.12,-0.12,10065151964,81.64656,1.905,22.498208,42.859087


In [153]:
pool = df[(df.weight > df.weight.quantile(0.97)) & (df.height < df.height.quantile(0.50))]

In [154]:
pool.POS.value_counts()

P     106
1B     70
C      34
OF     19
3B     13
2B      8
SS      5
Name: POS, dtype: Int64

In [160]:
print(list(pool.columns))

['name_last', 'name_first', 'key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs', 'mlb_played_first', 'mlb_played_last', 'playerID_x', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry', 'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay', 'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast', 'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame', 'retroID', 'bbrefID', 'playerID_y', 'yearID', 'stint', 'teamID', 'lgID', 'POS', 'G_x', 'GS', 'InnOuts', 'PO', 'A', 'E', 'DP', 'PB', 'WP', 'SB_x', 'CS_x', 'ZR', 'Season_x', 'Name', 'Team', 'Age', 'G_y', 'AB', 'PA_x', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB_y', 'CS_y', 'AVG', 'GB', 'FB', 'LD', 'IFFB', 'Pitches', 'Balls', 'Strikes', 'IFH', 'BU', 'BUH', 'BB%', 'K%', 'BB/K', 'OBP', 'SLG', 'OPS', 'ISO', 'BABIP', 'GB/FB', 'LD%', 'GB%', 'FB%', 'IFFB%', 'HR/FB', 'IFH%', 'BUH%', 'wOBA', 'wRAA', 'wRC', 'Bat', 'Fld', 'Rep', 'Pos', 'RAR', 'WAR_x', 'Dol', 'Sp

In [185]:
# Probably easiest if we make ourselves a dataframe for every position, since we'll need to fill up a 26-man roster, which is parsed into several discrete roles that each require a particular number of players. 

# These role counts aren't codified, but teams have nearly always carried the same distribution of player-roles.

pool_P = pool[pool['POS'] == "P"]
pool_C = pool[pool['POS'] == "C"]
pool_1B = pool[pool['POS'] == "1B"]
pool_2B = pool[pool['POS'] == "2B"]
pool_3B = pool[pool['POS'] == "3B"]
pool_SS = pool[pool['POS'] == "SS"]
pool_OF = pool[pool['POS'] == "OF"]

### Pitchers

In [189]:
pool_P.sample(10)

Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,playerID_x,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,playerID_y,yearID,stint,teamID,lgID,POS,G_x,GS,InnOuts,PO,A,E,DP,PB,WP,SB_x,CS_x,ZR,Season_x,Name,Team,Age,G_y,AB,PA_x,H,1B,2B,3B,HR,R,RBI,BB,IBB,SO,HBP,SF,SH,GDP,SB_y,CS_y,AVG,GB,FB,LD,IFFB,Pitches,Balls,Strikes,IFH,BU,BUH,BB%,K%,BB/K,OBP,SLG,OPS,ISO,BABIP,GB/FB,LD%,GB%,FB%,IFFB%,HR/FB,IFH%,BUH%,wOBA,wRAA,wRC,Bat,Fld,Rep,Pos,RAR,WAR_x,Dol,Spd,wRC+,WPA,-WPA,+WPA,RE24,REW,pLI,phLI,PH,WPA/LI,Clutch,FB% (Pitch),FBv,SL%,SLv,CT%,CTv,CB%,CBv,CH%,CHv,SF%,SFv,KN%,KNv,XX%,PO%,wFB,wSL,wCT,wCB,wCH,wSF,wKN,wFB/C,wSL/C,wCT/C,wCB/C,wCH/C,wSF/C,wKN/C,O-Swing%,Z-Swing%,Swing%,O-Contact%,Z-Contact%,Contact%,Zone%,F-Strike%,SwStr%,BsR,FA% (sc),FT% (sc),FC% (sc),FS% (sc),FO% (sc),SI% (sc),SL% (sc),CU% (sc),KC% (sc),EP% (sc),CH% (sc),SC% (sc),KN% (sc),UN% (sc),vFA (sc),vFT (sc),vFC (sc),vFS (sc),vFO (sc),vSI (sc),vSL (sc),vCU (sc),vKC (sc),vEP (sc),vCH (sc),vSC (sc),vKN (sc),FA-X (sc),FT-X (sc),FC-X (sc),FS-X (sc),FO-X (sc),SI-X (sc),SL-X (sc),CU-X (sc),KC-X (sc),EP-X (sc),CH-X (sc),SC-X (sc),KN-X (sc),FA-Z (sc),FT-Z (sc),FC-Z (sc),FS-Z (sc),FO-Z (sc),SI-Z (sc),SL-Z (sc),CU-Z (sc),KC-Z (sc),EP-Z (sc),CH-Z (sc),SC-Z (sc),KN-Z (sc),wFA (sc),wFT (sc),wFC (sc),wFS (sc),wFO (sc),wSI (sc),wSL (sc),wCU (sc),wKC (sc),wEP (sc),wCH (sc),wSC (sc),wKN (sc),wFA/C (sc),wFT/C (sc),wFC/C (sc),wFS/C (sc),wFO/C (sc),wSI/C (sc),wSL/C (sc),wCU/C (sc),wKC/C (sc),wEP/C (sc),wCH/C (sc),wSC/C (sc),wKN/C (sc),O-Swing% (sc),Z-Swing% (sc),Swing% (sc),O-Contact% (sc),Z-Contact% (sc),Contact% (sc),Zone% (sc),Pace,Def,wSB,UBR,Age Rng,Off,Lg,wGDP,Pull%,Cent%,Oppo%,Soft%,Med%,Hard%,TTO%,CH% (pi),CS% (pi),CU% (pi),FA% (pi),FC% (pi),FS% (pi),KN% (pi),SB% (pi),SI% (pi),SL% (pi),XX% (pi),vCH (pi),vCS (pi),vCU (pi),vFA (pi),vFC (pi),vFS (pi),vKN (pi),vSB (pi),vSI (pi),vSL (pi),vXX (pi),CH-X (pi),CS-X (pi),CU-X (pi),FA-X (pi),FC-X (pi),FS-X (pi),KN-X (pi),SB-X (pi),SI-X (pi),SL-X (pi),XX-X (pi),CH-Z (pi),CS-Z (pi),CU-Z (pi),FA-Z (pi),FC-Z (pi),FS-Z (pi),KN-Z (pi),SB-Z (pi),SI-Z (pi),SL-Z (pi),XX-Z (pi),wCH (pi),wCS (pi),wCU (pi),wFA (pi),wFC (pi),wFS (pi),wKN (pi),wSB (pi),wSI (pi),wSL (pi),wXX (pi),wCH/C (pi),wCS/C (pi),wCU/C (pi),wFA/C (pi),wFC/C (pi),wFS/C (pi),wKN/C (pi),wSB/C (pi),wSI/C (pi),wSL/C (pi),wXX/C (pi),O-Swing% (pi),Z-Swing% (pi),Swing% (pi),O-Contact% (pi),Z-Contact% (pi),Contact% (pi),Zone% (pi),Pace (pi),FRM,AVG+,BB%+,K%+,OBP+,SLG+,ISO+,BABIP+,LD+%,GB%+,FB%+,HR/FB%+,Pull%+,Cent%+,Oppo%+,Soft%+,Med%+,Hard%+,EV,LA,Barrels,Barrel%,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA,name_common,key_mlabm,Season_y,team_ID,stint_ID,lg_ID,pitcher,G,PA_y,salary,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAR_rep,WAA,WAR_y,dupe_delete,KG,meters,BMI,ratio
1064549,Givens,Mychal,571710,givem001,givenmy01,10430,2015,2022,givenmy01,1990,5,13,USA,FL,Tampa,,,,,,,Mychal,Givens,Mychal Antonio,239,72,R,R,2015-06-24,2022-06-29,givem001,givenmy01,givenmy01,2018,1,BAL,AL,P,69,0,230,4,4,1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Mychal Givens,571710,2015,BAL,1,AL,Y,1,0,,0.0,0.0,0.0,0.0,0.0,0.0,104302018,108.408488,1.8288,32.413868,59.278482
591287,Colon,Bartolo,112526,colob001,colonba01,375,1997,2018,colonba01,1973,5,24,D.R.,Puerto Plata,Altamira,,,,,,,Bartolo,Colon,Bartolo,285,71,R,R,1997-04-04,2018-09-22,colob001,colonba01,colonba01,2014,1,NYN,NL,P,31,31,607,11,27,5,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Bartolo Colon,112526,1997,CLE,1,AL,Y,1,1,150000.0,-0.2,-0.2,0.0,0.0,-0.01,-0.01,3752014,129.27372,1.8034,39.748992,71.683331
3225106,Zagurski,Mike,489448,zagum001,zagurmi01,9683,2007,2018,zagurmi01,1983,1,27,USA,NE,Omaha,,,,,,,Mike,Zagurski,Michael Justin,240,72,L,L,2007-05-25,2018-07-07,zagum001,zagurmi01,zagurmi01,2012,1,ARI,NL,P,45,0,112,0,6,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Mike Zagurski,489448,2007,PHI,1,NL,Y,23,0,,0.0,0.0,0.0,0.0,0.0,0.0,96832012,108.86208,1.8288,32.549491,59.526509
1064558,Givens,Mychal,571710,givem001,givenmy01,10430,2015,2022,givenmy01,1990,5,13,USA,FL,Tampa,,,,,,,Mychal,Givens,Mychal Antonio,239,72,R,R,2015-06-24,2022-06-29,givem001,givenmy01,givenmy01,2019,1,BAL,AL,P,58,0,189,2,4,2,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Mychal Givens,571710,2015,BAL,1,AL,Y,1,0,,0.0,0.0,0.0,0.0,0.0,0.0,104302019,108.408488,1.8288,32.413868,59.278482
2998410,Vizcaino,Arodys,527055,vizca001,vizcaar01,5498,2011,2022,vizcaar01,1990,11,13,D.R.,San Cristobal,Yaguate,,,,,,,Arodys,Vizcaino,Arodys,245,72,R,R,2011-08-10,2022-06-13,vizca001,vizcaar01,vizcaar01,2014,1,CHN,NL,P,5,0,15,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Arodys Vizcaino,527055,2011,ATL,1,NL,Y,17,0,,0.0,0.0,0.0,0.0,0.0,0.0,54982014,111.13004,1.8288,33.227605,60.766645
590942,Colon,Bartolo,112526,colob001,colonba01,375,1997,2018,colonba01,1973,5,24,D.R.,Puerto Plata,Altamira,,,,,,,Bartolo,Colon,Bartolo,285,71,R,R,1997-04-04,2018-09-22,colob001,colonba01,colonba01,1999,1,CLE,AL,P,32,32,615,20,31,2,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Bartolo Colon,112526,1997,CLE,1,AL,Y,1,1,150000.0,-0.2,-0.2,0.0,0.0,-0.01,-0.01,3751999,129.27372,1.8034,39.748992,71.683331
1016408,Garces,Rich,114581,garcr001,garceri01,189,1990,2002,garceri01,1971,5,18,Venezuela,Aragua,Maracay,,,,,,,Rich,Garces,Richard Aron,250,72,R,R,1990-09-18,2002-07-20,garcr001,garceri01,garceri01,1998,1,BOS,AL,P,30,0,138,2,5,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Rich Garces,114581,1990,MIN,1,AL,Y,0,0,,0.0,0.0,0.0,,,,1891998,113.398,1.8288,33.90572,62.00678
2958218,Valdez,Framber,664285,valdf001,valdefr01,17295,2018,2022,valdefr01,1993,11,19,D.R.,San Cristobal,Sabana Grande de Palenque,,,,,,,Framber,Valdez,Framber,239,71,R,L,2018-08-21,2022-06-28,valdf001,valdefr01,valdefr01,2019,1,HOU,AL,P,26,8,212,3,11,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Framber Valdez,664285,2018,HOU,1,AL,Y,0,0,,0.0,0.0,0.0,,,,172952019,108.408488,1.8034,33.333365,60.11339
1971757,Mijares,Jose,467726,mijaj001,mijarjo01,4140,2008,2013,mijarjo01,1984,10,29,Venezuela,Distrito Federal,Caracas,,,,,,,Jose,Mijares,Jose Manuel,265,71,L,L,2008-09-13,2013-09-28,mijaj001,mijarjo01,mijarjo01,2013,1,SFN,NL,P,60,0,147,0,4,2,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jose Mijares,467726,2008,MIN,1,AL,Y,0,0,,0.0,0.0,0.0,,,,41402013,120.20188,1.8034,36.959589,66.652922
2475198,Rodney,Fernando,407845,rodnf001,rodnefe01,494,2002,2019,rodnefe01,1977,3,18,D.R.,Distrito Nacional,Santo Domingo,,,,,,,Fernando,Rodney,Fernando,240,71,R,R,2002-05-04,2019-09-28,rodnf001,rodnefe01,rodnefe01,2002,1,DET,AL,P,20,0,54,1,1,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Fernando Rodney,407845,2002,DET,1,AL,Y,0,0,,0.0,0.0,0.0,,,,4942002,108.86208,1.8034,33.472835,60.364911


In [190]:
huskies_P = (
    pool_P.sort_values(by=["weight", "WAR_y"], ascending=[False, False])
    .drop_duplicates(subset=["key_fangraphs"])
    .nlargest(13, "weight")
)  # 13 heaviest P
huskies_P

Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,playerID_x,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,playerID_y,yearID,stint,teamID,lgID,POS,G_x,GS,InnOuts,PO,A,E,DP,PB,WP,SB_x,CS_x,ZR,Season_x,Name,Team,Age,G_y,AB,PA_x,H,1B,2B,3B,HR,R,RBI,BB,IBB,SO,HBP,SF,SH,GDP,SB_y,CS_y,AVG,GB,FB,LD,IFFB,Pitches,Balls,Strikes,IFH,BU,BUH,BB%,K%,BB/K,OBP,SLG,OPS,ISO,BABIP,GB/FB,LD%,GB%,FB%,IFFB%,HR/FB,IFH%,BUH%,wOBA,wRAA,wRC,Bat,Fld,Rep,Pos,RAR,WAR_x,Dol,Spd,wRC+,WPA,-WPA,+WPA,RE24,REW,pLI,phLI,PH,WPA/LI,Clutch,FB% (Pitch),FBv,SL%,SLv,CT%,CTv,CB%,CBv,CH%,CHv,SF%,SFv,KN%,KNv,XX%,PO%,wFB,wSL,wCT,wCB,wCH,wSF,wKN,wFB/C,wSL/C,wCT/C,wCB/C,wCH/C,wSF/C,wKN/C,O-Swing%,Z-Swing%,Swing%,O-Contact%,Z-Contact%,Contact%,Zone%,F-Strike%,SwStr%,BsR,FA% (sc),FT% (sc),FC% (sc),FS% (sc),FO% (sc),SI% (sc),SL% (sc),CU% (sc),KC% (sc),EP% (sc),CH% (sc),SC% (sc),KN% (sc),UN% (sc),vFA (sc),vFT (sc),vFC (sc),vFS (sc),vFO (sc),vSI (sc),vSL (sc),vCU (sc),vKC (sc),vEP (sc),vCH (sc),vSC (sc),vKN (sc),FA-X (sc),FT-X (sc),FC-X (sc),FS-X (sc),FO-X (sc),SI-X (sc),SL-X (sc),CU-X (sc),KC-X (sc),EP-X (sc),CH-X (sc),SC-X (sc),KN-X (sc),FA-Z (sc),FT-Z (sc),FC-Z (sc),FS-Z (sc),FO-Z (sc),SI-Z (sc),SL-Z (sc),CU-Z (sc),KC-Z (sc),EP-Z (sc),CH-Z (sc),SC-Z (sc),KN-Z (sc),wFA (sc),wFT (sc),wFC (sc),wFS (sc),wFO (sc),wSI (sc),wSL (sc),wCU (sc),wKC (sc),wEP (sc),wCH (sc),wSC (sc),wKN (sc),wFA/C (sc),wFT/C (sc),wFC/C (sc),wFS/C (sc),wFO/C (sc),wSI/C (sc),wSL/C (sc),wCU/C (sc),wKC/C (sc),wEP/C (sc),wCH/C (sc),wSC/C (sc),wKN/C (sc),O-Swing% (sc),Z-Swing% (sc),Swing% (sc),O-Contact% (sc),Z-Contact% (sc),Contact% (sc),Zone% (sc),Pace,Def,wSB,UBR,Age Rng,Off,Lg,wGDP,Pull%,Cent%,Oppo%,Soft%,Med%,Hard%,TTO%,CH% (pi),CS% (pi),CU% (pi),FA% (pi),FC% (pi),FS% (pi),KN% (pi),SB% (pi),SI% (pi),SL% (pi),XX% (pi),vCH (pi),vCS (pi),vCU (pi),vFA (pi),vFC (pi),vFS (pi),vKN (pi),vSB (pi),vSI (pi),vSL (pi),vXX (pi),CH-X (pi),CS-X (pi),CU-X (pi),FA-X (pi),FC-X (pi),FS-X (pi),KN-X (pi),SB-X (pi),SI-X (pi),SL-X (pi),XX-X (pi),CH-Z (pi),CS-Z (pi),CU-Z (pi),FA-Z (pi),FC-Z (pi),FS-Z (pi),KN-Z (pi),SB-Z (pi),SI-Z (pi),SL-Z (pi),XX-Z (pi),wCH (pi),wCS (pi),wCU (pi),wFA (pi),wFC (pi),wFS (pi),wKN (pi),wSB (pi),wSI (pi),wSL (pi),wXX (pi),wCH/C (pi),wCS/C (pi),wCU/C (pi),wFA/C (pi),wFC/C (pi),wFS/C (pi),wKN/C (pi),wSB/C (pi),wSI/C (pi),wSL/C (pi),wXX/C (pi),O-Swing% (pi),Z-Swing% (pi),Swing% (pi),O-Contact% (pi),Z-Contact% (pi),Contact% (pi),Zone% (pi),Pace (pi),FRM,AVG+,BB%+,K%+,OBP+,SLG+,ISO+,BABIP+,LD+%,GB%+,FB%+,HR/FB%+,Pull%+,Cent%+,Oppo%+,Soft%+,Med%+,Hard%+,EV,LA,Barrels,Barrel%,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA,name_common,key_mlabm,Season_y,team_ID,stint_ID,lg_ID,pitcher,G,PA_y,salary,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAR_rep,WAA,WAR_y,dupe_delete,KG,meters,BMI,ratio
590896,Colon,Bartolo,112526,colob001,colonba01,375,1997,2018,colonba01,1973,5,24,D.R.,Puerto Plata,Altamira,,,,,,,Bartolo,Colon,Bartolo,285,71,R,R,1997-04-04,2018-09-22,colob001,colonba01,colonba01,1997,1,CLE,AL,P,19,17.0,282.0,6,17,5,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Bartolo Colon,112526,1997,CLE,1,AL,Y,1,1,150000.0,-0.2,-0.2,0.0,0.0,-0.01,-0.01,3751997,129.27372,1.8034,39.748992,71.683331
2053415,Moronta,Reyes,606625,moror001,moronre01,14966,2017,2022,moronre01,1993,1,6,D.R.,Santiago,Santiago,,,,,,,Reyes,Moronta,Reyes Armando,265,70,R,R,2017-09-05,2022-06-29,moror001,moronre01,moronre01,2017,1,SFN,NL,P,7,0.0,20.0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Reyes Moronta,606625,2017,SFG,1,NL,Y,6,0,,0.0,0.0,0.0,0.0,0.0,0.0,149662017,120.20188,1.778,38.02312,67.605107
1971715,Mijares,Jose,467726,mijaj001,mijarjo01,4140,2008,2013,mijarjo01,1984,10,29,Venezuela,Distrito Federal,Caracas,,,,,,,Jose,Mijares,Jose Manuel,265,71,L,L,2008-09-13,2013-09-28,mijaj001,mijarjo01,mijarjo01,2008,1,MIN,AL,P,10,0.0,31.0,0,2,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jose Mijares,467726,2008,MIN,1,AL,Y,0,0,,0.0,0.0,0.0,,,,41402008,120.20188,1.8034,36.959589,66.652922
375861,Buckeye,Garland,111657,buckg101,buckega01,1001618,1918,1928,buckega01,1897,10,16,USA,MN,Heron Lake,1975.0,11.0,14.0,USA,WI,Sand Lake,Garland,Buckeye,Garland Maires,260,72,B,L,1918-06-19,1928-07-12,buckg101,buckega01,buckega01,1918,1,WS1,AL,P,1,,,1,1,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Garland Buckeye,111657,1918,WSH,1,AL,Y,1,0,,0.0,0.0,0.0,0.0,0.0,0.0,10016181918,117.93392,1.8288,35.261949,64.487052
1777452,Machi,Jean,466948,machj002,machije01,3220,2012,2017,machije01,1982,2,1,Venezuela,Anzoategui,El Tigre,,,,,,,Jean,Machi,Jean Manuel,257,71,R,R,2012-09-03,2017-05-12,machj002,machije01,machije01,2012,1,SFN,NL,P,8,0.0,20.0,0,2,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jean Machi,466948,2012,SFG,1,NL,Y,8,0,,0.0,0.0,0.0,0.0,0.0,0.0,32202012,116.573144,1.8034,35.843828,64.640759
730324,Diaz,Jairo,545064,diazj006,diazja01,12774,2014,2020,diazja01,1991,5,27,Venezuela,Anzoategui,Puerto La Cruz,,,,,,,Jairo,Diaz,Jairo Jose,254,72,R,R,2014-09-08,2020-09-26,diazj006,diazja01,diazja01,2014,1,LAA,AL,P,5,0.0,17.0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jairo Diaz,545064,2014,LAA,1,AL,Y,0,0,,0.0,0.0,0.0,,,,127742014,115.212368,1.8288,34.448211,62.998889
1016342,Garces,Rich,114581,garcr001,garceri01,189,1990,2002,garceri01,1971,5,18,Venezuela,Aragua,Maracay,,,,,,,Rich,Garces,Richard Aron,250,72,R,R,1990-09-18,2002-07-20,garcr001,garceri01,garceri01,1990,1,MIN,AL,P,5,0.0,17.0,0,1,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Rich Garces,114581,1990,MIN,1,AL,Y,0,0,,0.0,0.0,0.0,,,,1891990,113.398,1.8288,33.90572,62.00678
1816434,Martes,Francis,642564,martf003,martefr01,17303,2017,2017,martefr01,1995,11,24,D.R.,Sanchez Ramirez,Cotui,,,,,,,Francis,Martes,Francis Euclides,249,72,R,R,2017-06-09,2017-10-01,martf003,martefr01,martefr01,2017,1,HOU,AL,P,32,4.0,163.0,4,4,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Francis Martes,642564,2017,HOU,1,AL,Y,2,0,,0.0,0.0,0.0,0.0,0.0,0.0,173032017,112.944408,1.8288,33.770097,61.758753
2998403,Vizcaino,Arodys,527055,vizca001,vizcaar01,5498,2011,2022,vizcaar01,1990,11,13,D.R.,San Cristobal,Yaguate,,,,,,,Arodys,Vizcaino,Arodys,245,72,R,R,2011-08-10,2022-06-13,vizca001,vizcaar01,vizcaar01,2011,1,ATL,NL,P,17,0.0,52.0,2,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Arodys Vizcaino,527055,2011,ATL,1,NL,Y,17,0,,0.0,0.0,0.0,0.0,0.0,0.0,54982011,111.13004,1.8288,33.227605,60.766645
496057,Castro,Angel,502162,casta004,castran01,3243,2015,2015,castran01,1982,11,14,D.R.,Duarte,Pimentel,,,,,,,Angel,Castro,Angel M.,240,71,R,R,2015-05-09,2015-07-24,casta004,castran01,castran01,2015,1,OAK,AL,P,5,0.0,12.0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Angel Castro,502162,2015,OAK,1,AL,Y,1,0,,0.0,0.0,0.0,0.0,0.0,0.0,32432015,108.86208,1.8034,33.472835,60.364911


In [191]:
huskies_C = (
    pool_C.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["key_fangraphs"])
    .nlargest(2, "weight")
)  # 2 heaviest C
huskies_C

Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,playerID_x,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,playerID_y,yearID,stint,teamID,lgID,POS,G_x,GS,InnOuts,PO,A,E,DP,PB,WP,SB_x,CS_x,ZR,Season_x,Name,Team,Age,G_y,AB,PA_x,H,1B,2B,3B,HR,R,RBI,BB,IBB,SO,HBP,SF,SH,GDP,SB_y,CS_y,AVG,GB,FB,LD,IFFB,Pitches,Balls,Strikes,IFH,BU,BUH,BB%,K%,BB/K,OBP,SLG,OPS,ISO,BABIP,GB/FB,LD%,GB%,FB%,IFFB%,HR/FB,IFH%,BUH%,wOBA,wRAA,wRC,Bat,Fld,Rep,Pos,RAR,WAR_x,Dol,Spd,wRC+,WPA,-WPA,+WPA,RE24,REW,pLI,phLI,PH,WPA/LI,Clutch,FB% (Pitch),FBv,SL%,SLv,CT%,CTv,CB%,CBv,CH%,CHv,SF%,SFv,KN%,KNv,XX%,PO%,wFB,wSL,wCT,wCB,wCH,wSF,wKN,wFB/C,wSL/C,wCT/C,wCB/C,wCH/C,wSF/C,wKN/C,O-Swing%,Z-Swing%,Swing%,O-Contact%,Z-Contact%,Contact%,Zone%,F-Strike%,SwStr%,BsR,FA% (sc),FT% (sc),FC% (sc),FS% (sc),FO% (sc),SI% (sc),SL% (sc),CU% (sc),KC% (sc),EP% (sc),CH% (sc),SC% (sc),KN% (sc),UN% (sc),vFA (sc),vFT (sc),vFC (sc),vFS (sc),vFO (sc),vSI (sc),vSL (sc),vCU (sc),vKC (sc),vEP (sc),vCH (sc),vSC (sc),vKN (sc),FA-X (sc),FT-X (sc),FC-X (sc),FS-X (sc),FO-X (sc),SI-X (sc),SL-X (sc),CU-X (sc),KC-X (sc),EP-X (sc),CH-X (sc),SC-X (sc),KN-X (sc),FA-Z (sc),FT-Z (sc),FC-Z (sc),FS-Z (sc),FO-Z (sc),SI-Z (sc),SL-Z (sc),CU-Z (sc),KC-Z (sc),EP-Z (sc),CH-Z (sc),SC-Z (sc),KN-Z (sc),wFA (sc),wFT (sc),wFC (sc),wFS (sc),wFO (sc),wSI (sc),wSL (sc),wCU (sc),wKC (sc),wEP (sc),wCH (sc),wSC (sc),wKN (sc),wFA/C (sc),wFT/C (sc),wFC/C (sc),wFS/C (sc),wFO/C (sc),wSI/C (sc),wSL/C (sc),wCU/C (sc),wKC/C (sc),wEP/C (sc),wCH/C (sc),wSC/C (sc),wKN/C (sc),O-Swing% (sc),Z-Swing% (sc),Swing% (sc),O-Contact% (sc),Z-Contact% (sc),Contact% (sc),Zone% (sc),Pace,Def,wSB,UBR,Age Rng,Off,Lg,wGDP,Pull%,Cent%,Oppo%,Soft%,Med%,Hard%,TTO%,CH% (pi),CS% (pi),CU% (pi),FA% (pi),FC% (pi),FS% (pi),KN% (pi),SB% (pi),SI% (pi),SL% (pi),XX% (pi),vCH (pi),vCS (pi),vCU (pi),vFA (pi),vFC (pi),vFS (pi),vKN (pi),vSB (pi),vSI (pi),vSL (pi),vXX (pi),CH-X (pi),CS-X (pi),CU-X (pi),FA-X (pi),FC-X (pi),FS-X (pi),KN-X (pi),SB-X (pi),SI-X (pi),SL-X (pi),XX-X (pi),CH-Z (pi),CS-Z (pi),CU-Z (pi),FA-Z (pi),FC-Z (pi),FS-Z (pi),KN-Z (pi),SB-Z (pi),SI-Z (pi),SL-Z (pi),XX-Z (pi),wCH (pi),wCS (pi),wCU (pi),wFA (pi),wFC (pi),wFS (pi),wKN (pi),wSB (pi),wSI (pi),wSL (pi),wXX (pi),wCH/C (pi),wCS/C (pi),wCU/C (pi),wFA/C (pi),wFC/C (pi),wFS/C (pi),wKN/C (pi),wSB/C (pi),wSI/C (pi),wSL/C (pi),wXX/C (pi),O-Swing% (pi),Z-Swing% (pi),Swing% (pi),O-Contact% (pi),Z-Contact% (pi),Contact% (pi),Zone% (pi),Pace (pi),FRM,AVG+,BB%+,K%+,OBP+,SLG+,ISO+,BABIP+,LD+%,GB%+,FB%+,HR/FB%+,Pull%+,Cent%+,Oppo%+,Soft%+,Med%+,Hard%+,EV,LA,Barrels,Barrel%,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA,name_common,key_mlabm,Season_y,team_ID,stint_ID,lg_ID,pitcher,G,PA_y,salary,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAR_rep,WAA,WAR_y,dupe_delete,KG,meters,BMI,ratio
2005336,Molina,Jose,150040,molij001,molinjo01,25,1999,2014,molinjo01,1975,6,3,P.R.,,Bayamon,,,,,,,Jose,Molina,Jose Benjamin,250,72,R,R,1999-09-06,2014-09-28,molij001,molinjo01,molinjo01,2013,1,TBA,AL,C,96,87,2248,674,34,4,5,8,,56,23,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jose Molina,150040,1999,CHC,1,NL,N,10,21,200000,-0.3,-0.3,0.4,0.06,-0.04,0.02,252013,113.398,1.8288,33.90572,62.00678
533718,Chavez,Raul,112223,chavr001,chavera01,553,1996,2009,chavera01,1973,3,17,Venezuela,Carabobo,Valencia,,,,,,,Raul,Chavez,Raul Alexander,245,71,R,R,1996-08-30,2009-09-24,chavr001,chavera01,chavera01,2008,1,PIT,NL,C,35,31,834,188,23,1,2,3,,13,12,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Raul Chavez,112223,1996,MON,1,NL,N,4,6,109000,-0.4,-0.4,0.1,0.02,-0.04,-0.02,5532008,111.13004,1.8034,34.170186,61.622513


In [178]:
huskies_1B = (
    pool_1B.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["key_fangraphs"])
    .nlargest(1, "weight")
)  # 1 heaviest 1B
huskies_1B

Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,playerID_x,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,playerID_y,yearID,stint,teamID,lgID,POS,G_x,GS,InnOuts,PO,A,E,DP,PB,WP,SB_x,CS_x,ZR,Season_x,Name,Team,Age,G_y,AB,PA_x,H,1B,2B,3B,HR,R,RBI,BB,IBB,SO,HBP,SF,SH,GDP,SB_y,CS_y,AVG,GB,FB,LD,IFFB,Pitches,Balls,Strikes,IFH,BU,BUH,BB%,K%,BB/K,OBP,SLG,OPS,ISO,BABIP,GB/FB,LD%,GB%,FB%,IFFB%,HR/FB,IFH%,BUH%,wOBA,wRAA,wRC,Bat,Fld,Rep,Pos,RAR,WAR_x,Dol,Spd,wRC+,WPA,-WPA,+WPA,RE24,REW,pLI,phLI,PH,WPA/LI,Clutch,FB% (Pitch),FBv,SL%,SLv,CT%,CTv,CB%,CBv,CH%,CHv,SF%,SFv,KN%,KNv,XX%,PO%,wFB,wSL,wCT,wCB,wCH,wSF,wKN,wFB/C,wSL/C,wCT/C,wCB/C,wCH/C,wSF/C,wKN/C,O-Swing%,Z-Swing%,Swing%,O-Contact%,Z-Contact%,Contact%,Zone%,F-Strike%,SwStr%,BsR,FA% (sc),FT% (sc),FC% (sc),FS% (sc),FO% (sc),SI% (sc),SL% (sc),CU% (sc),KC% (sc),EP% (sc),CH% (sc),SC% (sc),KN% (sc),UN% (sc),vFA (sc),vFT (sc),vFC (sc),vFS (sc),vFO (sc),vSI (sc),vSL (sc),vCU (sc),vKC (sc),vEP (sc),vCH (sc),vSC (sc),vKN (sc),FA-X (sc),FT-X (sc),FC-X (sc),FS-X (sc),FO-X (sc),SI-X (sc),SL-X (sc),CU-X (sc),KC-X (sc),EP-X (sc),CH-X (sc),SC-X (sc),KN-X (sc),FA-Z (sc),FT-Z (sc),FC-Z (sc),FS-Z (sc),FO-Z (sc),SI-Z (sc),SL-Z (sc),CU-Z (sc),KC-Z (sc),EP-Z (sc),CH-Z (sc),SC-Z (sc),KN-Z (sc),wFA (sc),wFT (sc),wFC (sc),wFS (sc),wFO (sc),wSI (sc),wSL (sc),wCU (sc),wKC (sc),wEP (sc),wCH (sc),wSC (sc),wKN (sc),wFA/C (sc),wFT/C (sc),wFC/C (sc),wFS/C (sc),wFO/C (sc),wSI/C (sc),wSL/C (sc),wCU/C (sc),wKC/C (sc),wEP/C (sc),wCH/C (sc),wSC/C (sc),wKN/C (sc),O-Swing% (sc),Z-Swing% (sc),Swing% (sc),O-Contact% (sc),Z-Contact% (sc),Contact% (sc),Zone% (sc),Pace,Def,wSB,UBR,Age Rng,Off,Lg,wGDP,Pull%,Cent%,Oppo%,Soft%,Med%,Hard%,TTO%,CH% (pi),CS% (pi),CU% (pi),FA% (pi),FC% (pi),FS% (pi),KN% (pi),SB% (pi),SI% (pi),SL% (pi),XX% (pi),vCH (pi),vCS (pi),vCU (pi),vFA (pi),vFC (pi),vFS (pi),vKN (pi),vSB (pi),vSI (pi),vSL (pi),vXX (pi),CH-X (pi),CS-X (pi),CU-X (pi),FA-X (pi),FC-X (pi),FS-X (pi),KN-X (pi),SB-X (pi),SI-X (pi),SL-X (pi),XX-X (pi),CH-Z (pi),CS-Z (pi),CU-Z (pi),FA-Z (pi),FC-Z (pi),FS-Z (pi),KN-Z (pi),SB-Z (pi),SI-Z (pi),SL-Z (pi),XX-Z (pi),wCH (pi),wCS (pi),wCU (pi),wFA (pi),wFC (pi),wFS (pi),wKN (pi),wSB (pi),wSI (pi),wSL (pi),wXX (pi),wCH/C (pi),wCS/C (pi),wCU/C (pi),wFA/C (pi),wFC/C (pi),wFS/C (pi),wKN/C (pi),wSB/C (pi),wSI/C (pi),wSL/C (pi),wXX/C (pi),O-Swing% (pi),Z-Swing% (pi),Swing% (pi),O-Contact% (pi),Z-Contact% (pi),Contact% (pi),Zone% (pi),Pace (pi),FRM,AVG+,BB%+,K%+,OBP+,SLG+,ISO+,BABIP+,LD+%,GB%+,FB%+,HR/FB%+,Pull%+,Cent%+,Oppo%+,Soft%+,Med%+,Hard%+,EV,LA,Barrels,Barrel%,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA,name_common,key_mlabm,Season_y,team_ID,stint_ID,lg_ID,pitcher,G,PA_y,salary,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAR_rep,WAA,WAR_y,dupe_delete,KG,meters,BMI,ratio
893023,Fielder,Prince,425902,fielp001,fieldpr01,4613,2005,2016,fieldpr01,1984,5,9,USA,CA,Ontario,,,,,,,Prince,Fielder,Prince Semien,275,71,L,R,2005-06-13,2016-07-18,fielp001,fieldpr01,fieldpr01,2013,1,DET,AL,1B,151,151,3971,1152,96,6,119,,,,,,2009,Prince Fielder,MIL,25,162,591,719,177,93,35,3,46,103,141,110,21,138,9,9,0,14,2,3,0.299,188,199,75,11,2805,1186,1619,5,0,0,0.153,0.192,0.8,0.412,0.602,1.014,0.303,0.315,0.0094,0.162,0.407,0.431,0.055,0.231,0.027,0.0,0.422,55.5,142,54.1,0.2,21.9,-12.3,58.9,5.9,$37.7,2.5,161,7.97,-10.81,18.78,71.03,7.33,1.04,,0,6.49,1.15,0.556,91.6,0.161,82.8,0.031,86.0,0.098,76.4,0.125,82.9,0.029,85.3,0.0,78.0,0.037,,35.2,4.1,4.7,4.4,5.1,2.1,-0.2,2.34,0.95,5.53,1.65,1.5,2.7,-18.5,0.252,0.688,0.445,0.588,0.854,0.769,0.441,0.53,0.099,-5.7,0.324,0.099,0.035,0.026,0.001,0.136,0.161,0.094,0.004,,0.119,0.001,0.0,,92.0,91.4,86.4,84.5,82.3,90.0,83.0,76.0,77.8,,82.9,66.4,78.0,-1.8,-3.4,0.1,-7.0,-5.4,-5.4,0.1,3.4,1.9,,-5.6,9.7,4.6,9.0,6.9,4.8,1.7,5.6,5.6,0.9,-6.6,-4.5,,5.1,-7.0,-5.2,19.0,6.1,1.2,2.2,0.2,7.7,4.9,4.6,1.0,,7.6,-0.1,-0.2,2.18,2.29,1.26,3.09,5.62,2.1,1.13,1.82,10.23,,2.35,-4.86,-18.5,0.3,0.64,0.442,0.65,0.862,0.778,0.417,22.2,-12.1,-1.4,-4.6,25 - 25,48.5,0.6,0.3,0.377,0.368,0.255,0.119,0.452,0.429,0.409,0.112,0.001,0.102,0.326,0.041,0.035,0.0,0.001,0.218,0.147,0.0,83.4,77.3,76.7,92.5,86.7,85.2,78.7,66.5,91.0,83.3,0.0,-5.5,10.2,3.9,-1.9,0.1,-5.5,5.2,6.3,-3.6,0.2,0.0,3.5,-10.5,-8.0,8.0,3.5,1.1,-6.2,-11.3,4.7,-0.5,0.0,2.3,0.0,0.7,0.0,-0.2,0.1,0.0,0.1,1.4,-0.3,0.0,0.75,-0.61,0.26,-0.01,-0.21,0.12,0.0,2.07,0.23,-0.08,0.0,0.285,0.642,0.441,0.638,0.858,0.778,0.438,22.2,,113,165,110,121,143,194,104,0.84,94,115,235,93,107,101,77,80,155,,,0,,,0,,0,0.148,0.247,,,,Prince Fielder,425902,2005,MIL,1,NL,N,39,62,,-1.7,-1.7,-0.8,0.19,-0.19,0.0,46132013,124.7378,1.8034,38.35429,69.168127
