In [122]:
## Let's try to figure out who the shortest, heaviest baseball players are.

## Here's our overall plan of attack

# Setting Things Up ✅
## Import CSVs ✅
### Separate CSVs --> DataFrames for People, Pitching Data, Batting Data ✅
## Squish everything into one mondo DF ✅
## Add Physical Data ✅
### Height ✅
### Weight ✅
## Calculate BMI ✅
### Convert Imperial to Metric ✅
### BMI-ify ✅
### Throw BMI back into df ✅
## Assemble per-position lists sorted by BMI, then mWAR

# Knocking Things Down
## Find worst team that made playoffs in 2021
### Describe team fWAR/bWAR
### Describe individual fWAR/bWAR
## Pull from BMI lists per position until high BMI roster is full
### mWAR shall be higher on a team basis.
### mWAR shall be higher per position.
# If there's enough time:
## Repeat for:
### Tallness
### Shortness
### Heavy
### Light

# If there's enough time:
## Repeat for:
### Tallness
### Shortness
### Heavy
### Light

## Setting Things Up

### Import the necessaries

In [123]:
import numpy as np
import pandas as pd
import pybaseball
import seaborn as sns
import matplotlib.pyplot as plot
from deepdiff import DeepDiff
from pybaseball import bwar_pitch
from pybaseball import bwar_bat
from pybaseball import cache
from pybaseball.lahman import *
from pybaseball import chadwick_register

pd.set_option("display.max_columns", 1000)
pd.set_option("display.max_rows", 100)


In [124]:
# Constants
KG_TO_LB = 0.453592
M_TO_IN = 0.0254

In [125]:
chadwick = chadwick_register(save=True)
chadwick = pd.DataFrame(chadwick)

In [126]:
download_lahman()

### Read infinity baseball data to DataFrames

In [127]:
# a table of all player biographical info and ids
people = pd.DataFrame(people())

# park id, name, alias, city, state, and country
parks = pd.DataFrame(parks())

# all star roster data: player, year, team, league, position
allstar = pd.DataFrame(all_star_full())

# each player's games played per position for each season
appearances = pd.DataFrame(appearances())

# batting stats by year, regular season
batting = pd.DataFrame(batting())

# batting stats by year, post season
batting_post = pd.DataFrame(batting_post())

# fielding stats by year 
fielding = pd.DataFrame(fielding())

# games played in left, center, right field 
fielding_of = pd.DataFrame(fielding_of())

# LF/CF/RF splits
fielding_of_split = pd.DataFrame(fielding_of_split())

# postseason fielding 
fielding_post = pd.DataFrame(fielding_post())

# home game attendance by park by year 
home_games = pd.DataFrame(home_games())

# historical player pitching stats
pitching = pd.DataFrame(pitching())

# postseason pitching stats
pitching_post = pd.DataFrame(pitching_post())

# playoff series winners and losers 
series_post = pd.DataFrame(series_post())

# data on teams by year: record, division, stadium, attendance, etc
teams = pd.DataFrame(teams())

# current and historical franchises, whether they're still active, and their ids
teams_franchises = pd.DataFrame(teams_franchises())

# split season data for teams
teams_half = pd.DataFrame(teams_half()) 

# fangraphs batting since 2008
fangraphs_batting = pd.DataFrame(pybaseball.batting_stats_range(
    start_dt="2008-01-01", end_dt="2021-12-31"))

# fangraphs pitching since 2008
fangraphs_pitching = pd.DataFrame(pybaseball.pitching_stats_range(
    start_dt="2008-01-01", end_dt="2021-12-31"))

# fangraphs team pitching since 2008
fangraphs_team_pitching = pd.DataFrame(
    pybaseball.team_pitching(start_season="2008", end_season="2021"))

# fangraphs team batting since 2008
fangraphs_team_batting = pd.DataFrame(
    pybaseball.team_batting(start_season="2008", end_season="2021"))

# fangraphs team fielding since 2008
fangraphs_team_fielding = pd.DataFrame(
    pybaseball.team_fielding(start_season="2008", end_season="2021"))

# bref pitching WAR
bwar_pitch = pd.DataFrame(bwar_pitch(return_all=True))

# bref batting WAR
bwar_bat = pd.DataFrame(bwar_bat(return_all=True))


  table = table.drop('', 1)


----

### Add BMI Calcs

In [128]:
# BMI Calculations
people['KG'] = people['weight'] * KG_TO_LB
people['meters'] = people['height'] * M_TO_IN
people['BMI'] = people['KG'] / people['meters'] ** 2
people['ratio'] = people['meters'] * people['BMI']


----

## Assemble Monster DataFrame of Everything About Player Ever

### Hello Everybody

In [129]:
people.shape

(20543, 28)

In [130]:
people.columns

Index(['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry',
       'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay',
       'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast',
       'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame',
       'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio'],
      dtype='object')

In [131]:
chadwick.shape

(24258, 8)

In [132]:
chadwick.columns

Index(['name_last', 'name_first', 'key_mlbam', 'key_retro', 'key_bbref',
       'key_fangraphs', 'mlb_played_first', 'mlb_played_last'],
      dtype='object')

In [133]:
# Let's change some of these column names to save ourselves some merging hassle

In [134]:
chadwick = chadwick.rename(columns={
    "name_last": "nameLast",
    "name_first": "nameFirst",
    "key_fangraphs": "fangraphsID",
    "key_bbref": "playerID",
    "key_retro": "retroID",
    "key_mlbam": "mlbID"
})

In [135]:
chadwick.columns

Index(['nameLast', 'nameFirst', 'mlbID', 'retroID', 'playerID', 'fangraphsID',
       'mlb_played_first', 'mlb_played_last'],
      dtype='object')

Merge Chadwick, check.

In [136]:
# Merge chadwick into people
df = people.merge(
    chadwick,
    left_on=["playerID", "retroID", "nameLast", "nameFirst"],
    right_on=["playerID", "retroID", "nameLast", "nameFirst"], how='outer'
)

In [137]:
df.columns

Index(['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry',
       'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay',
       'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast',
       'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame',
       'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'mlbID',
       'fangraphsID', 'mlb_played_first', 'mlb_played_last'],
      dtype='object')

In [138]:
df.shape

(24587, 32)

----

### Add BWAR Stats 

#### Add BWAR Batting

##### BWAR Batting Prep

In [139]:
bwar_bat.columns

Index(['name_common', 'age', 'mlb_ID', 'player_ID', 'year_ID', 'team_ID',
       'stint_ID', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp',
       'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher',
       'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p',
       'runs_replacement', 'runs_above_rep', 'runs_above_avg',
       'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def',
       'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG',
       'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent',
       'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off',
       'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_plus', 'TOB_lg', 'TB_lg'],
      dtype='object')

In [140]:
bwar_bat = bwar_bat.rename(columns={
    'player_ID': 'playerID',
    'mlb_ID': 'mlbID',
    'stint_ID': 'stint'
})

In [141]:
bwar_bat.columns

Index(['name_common', 'age', 'mlbID', 'playerID', 'year_ID', 'team_ID',
       'stint', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp',
       'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher',
       'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p',
       'runs_replacement', 'runs_above_rep', 'runs_above_avg',
       'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def',
       'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG',
       'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent',
       'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off',
       'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_plus', 'TOB_lg', 'TB_lg'],
      dtype='object')

In [142]:
bwar_bat.columns

Index(['name_common', 'age', 'mlbID', 'playerID', 'year_ID', 'team_ID',
       'stint', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp',
       'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher',
       'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p',
       'runs_replacement', 'runs_above_rep', 'runs_above_avg',
       'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def',
       'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG',
       'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent',
       'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off',
       'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_plus', 'TOB_lg', 'TB_lg'],
      dtype='object')

----

##### BWAR Batting Merge

In [143]:
df = df.merge(
    bwar_bat, left_on=["playerID", "mlbID"], right_on=["playerID", "mlbID"], how="outer"
)

In [144]:
df.columns

Index(['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry',
       'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay',
       'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast',
       'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame',
       'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'mlbID',
       'fangraphsID', 'mlb_played_first', 'mlb_played_last', 'name_common',
       'age', 'year_ID', 'team_ID', 'stint', 'lg_ID', 'PA', 'G', 'Inn',
       'runs_bat', 'runs_br', 'runs_dp', 'runs_field', 'runs_infield',
       'runs_outfield', 'runs_catcher', 'runs_good_plays', 'runs_defense',
       'runs_position', 'runs_position_p', 'runs_replacement',
       'runs_above_rep', 'runs_above_avg', 'runs_above_avg_off',
       'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def', 'WAR', 'WAR_def',
       'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG', 'oppRpG',
       'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent', 'pyth_

#### Add BWAR Pitching

##### BWAR Pitching Prep

In [145]:
bwar_pitch.columns

Index(['name_common', 'age', 'mlb_ID', 'player_ID', 'year_ID', 'team_ID',
       'stint_ID', 'lg_ID', 'G', 'GS', 'IPouts', 'IPouts_start',
       'IPouts_relief', 'RA', 'xRA', 'xRA_sprp_adj', 'xRA_extras_adj',
       'xRA_def_pitcher', 'PPF', 'PPF_custom', 'xRA_final', 'BIP', 'BIP_perc',
       'RS_def_total', 'runs_above_avg', 'runs_above_avg_adj',
       'runs_above_rep', 'RpO_replacement', 'GR_leverage_index_avg', 'WAR',
       'salary', 'teamRpG', 'oppRpG', 'pyth_exponent', 'waa_win_perc', 'WAA',
       'WAA_adj', 'oppRpG_rep', 'pyth_exponent_rep', 'waa_win_perc_rep',
       'WAR_rep', 'ERA_plus', 'ER_lg'],
      dtype='object')

----

merge bwar_pitch, check

In [146]:
bwar_pitch = bwar_pitch.rename(columns={
    "stint_ID": "stint",
    "mlb_ID": "mlbID",
    "player_ID": "playerID"
})

In [147]:
bwar_pitch.stint

0        1
1        1
2        1
3        1
4        1
        ..
53654    1
53655    1
53656    1
53657    1
53658    1
Name: stint, Length: 53659, dtype: int64

##### BWAR Pitching Merge

In [148]:
print(df.columns.tolist())

['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry', 'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay', 'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast', 'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame', 'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'mlbID', 'fangraphsID', 'mlb_played_first', 'mlb_played_last', 'name_common', 'age', 'year_ID', 'team_ID', 'stint', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp', 'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher', 'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p', 'runs_replacement', 'runs_above_rep', 'runs_above_avg', 'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def', 'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG', 'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent', 'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off', 'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_p

In [149]:
df = df.merge(
    bwar_pitch,
    left_on=[
        'G',
        'WAA',
        'WAR',
        'WAR_rep',
        'age',
        'lg_ID',
        'mlbID',
        'oppRpG',
        'oppRpG_rep',
        'playerID',
        'pyth_exponent',
        'pyth_exponent_rep',
        'runs_above_avg',
        'runs_above_rep',
        'salary',
        'stint',
        'teamRpG',
        'team_ID',
        'waa_win_perc',
        'waa_win_perc_rep',
        'year_ID'
    ],
    right_on=[
        'G',
        'WAA',
        'WAR',
        'WAR_rep',
        'age',
        'lg_ID',
        'mlbID',
        'oppRpG',
        'oppRpG_rep',
        'playerID',
        'pyth_exponent',
        'pyth_exponent_rep',
        'runs_above_avg',
        'runs_above_rep',
        'salary',
        'stint',
        'teamRpG',
        'team_ID',
        'waa_win_perc',
        'waa_win_perc_rep',
        'year_ID'
    ], how='outer'
)

In [150]:
print(df.columns.tolist())

['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry', 'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay', 'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast', 'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame', 'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'mlbID', 'fangraphsID', 'mlb_played_first', 'mlb_played_last', 'name_common_x', 'age', 'year_ID', 'team_ID', 'stint', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp', 'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher', 'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p', 'runs_replacement', 'runs_above_rep', 'runs_above_avg', 'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def', 'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG', 'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent', 'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off', 'waa_win_perc_def', 'waa_win_perc_rep', 'OPS

----

#### Add Fielding_OF

##### Fielding_OF Prep

In [151]:
fielding_of.columns

Index(['playerID', 'yearID', 'stint', 'Glf', 'Gcf', 'Grf'], dtype='object')

In [152]:
fielding_of = fielding_of.rename(columns={
    "yearID": "year_ID"
})

##### Fielding_OF Merge

In [153]:
df = df.merge(
    fielding_of,
    left_on=["playerID", "year_ID", "stint"],
    right_on=["playerID", "year_ID", "stint"], how='left'
)


In [154]:
df.shape

(177731, 104)

----

#### Add Fielding

##### Fielding Check

In [155]:
fielding.columns

Index(['playerID', 'yearID', 'stint', 'teamID', 'lgID', 'POS', 'G', 'GS',
       'InnOuts', 'PO', 'A', 'E', 'DP', 'PB', 'WP', 'SB', 'CS', 'ZR'],
      dtype='object')

In [156]:
fielding = fielding.rename(columns={
    "yearID": "year_ID"
})

##### Fielding Merge

In [157]:
df = df.merge(
    fielding,
    left_on=["playerID", "year_ID", "G", "stint"],
    right_on=["playerID", "year_ID", "G", "stint"], how='left'
)


----

### Meet the Monster

In [158]:
df.sample(25) ### works to here

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
171846,thompri01,,,,,,,,,,,,,,,,,,,,,,,,,,,,123299.0,,,,,31.0,1990.0,MON,1.0,NL,,1.0,,,,,,,,,,,,,,0.59,0.508,,,0.0555,,,0.04,,,0.0091,104500.0,,4.21496,3.72496,,4.29689,1.805,1.841,0.5555,,,0.4911,,,,Rich Thompson,0.0,3.0,0.0,3.0,0.0,0.542,-0.042,,-0.018,96.0,98.0,0.508,4.0,0.0009,-19.7,0.49,0.184,0.02,-0.0291,,0.575,,,,MON,NL,P,0.0,3.0,0.0,0.0,0.0,0.0,,,,,
149568,kleinph01,,,,,,,,,,,,,,,,,,,,,,,,,,,,607309.0,,,,,27.0,2016.0,TEX,1.0,AL,,8.0,,,,,,,,,,,,,,-0.037,-0.894,,,-0.092,,,-0.01,,,0.0883,509500.0,,4.50306,4.61431,,4.60901,1.877,1.877,0.4885,,,0.4891,,,,Phil Klein,0.0,26.0,0.0,26.0,5.0,4.438,-0.32,,0.024,108.0,100.286,4.106,21.0,0.0047,5.0,-0.89,0.2,0.7163,-0.0075,90.92,4.546,,,,TEX,AL,P,0.0,26.0,0.0,0.0,0.0,0.0,,,,,
119110,hayesjo01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Johnny Hayes,32.0,1942.0,NBY,0.0,NN2,120.0,33.0,,3.52,-0.17,0.0,0.0,,,,,0.0,1.01,0.0,3.64,8.0,4.4,4.4,1.0,0.43,0.43,0.1,0.75,0.1,0.75,0.32,,N,5.16,5.02788,0.09853,4.91759,1.938,1.925,0.5126,0.5126,0.5029,0.4893,119.253176,38.715,34.848,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
59330,lopezra01,1933.0,5.0,26.0,Cuba,,Las Villas,1982.0,9.0,4.0,USA,FL,Miami,Ramon,Lopez,Jose Ramon,175.0,72.0,R,R,1966-08-21,1966-09-22,loper101,lopezra01,79.3786,1.8288,23.734004,43.404746,117920.0,1007758.0,1966.0,1966.0,Ramon Lopez,33.0,1966.0,CAL,1.0,AL,0.0,5.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,-0.01,0.0,0.0,7000.0,Y,3.88302,3.88302,0.06917,3.88302,1.794,1.794,0.5,0.5,0.5,0.5,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
122380,shivege01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,George Shively,31.0,1924.0,AC,2.0,ECL,203.0,44.0,,-5.02,-0.48,0.0,0.0,,,,,0.0,-1.72,0.0,6.59,-0.6,-7.2,-7.2,-1.7,-0.66,-0.69,-0.16,-0.03,-0.16,-0.06,0.63,,N,5.19762,5.36171,0.10246,5.21193,1.958,1.958,0.4848,0.4848,0.4964,0.4861,77.579358,66.741,71.083,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
31701,flynnge01,1871.0,5.0,24.0,USA,IL,Chicago,1901.0,12.0,28.0,USA,IL,Chicago,George,Flynn,George Albert,150.0,69.0,,,1896-04-17,1896-05-23,flyng101,flynnge01,68.0388,1.7526,22.150882,38.821636,114266.0,1004184.0,1896.0,1896.0,George Flynn,25.0,1896.0,CHC,1.0,NL,122.0,29.0,,-5.64,0.99,0.0,0.0,,,,,0.0,-1.95,0.0,4.17,-2.4,-6.6,-6.6,-2.0,-0.56,-0.55,-0.16,-0.14,-0.16,-0.13,0.42,,N,5.99331,6.22089,0.11769,6.07714,2.041,2.045,0.481,0.481,0.4944,0.488,66.121217,43.792,42.792,,,,,,,,,,,,,,,,,,,,,,,29.0,0.0,0.0,CHN,NL,OF,,855.0,66.0,6.0,10.0,3.0,,,,,
127284,bendech01,,,,,,,,,,,,,,,,,,,,,,,,,,,,110850.0,,,,,27.0,1911.0,PHA,1.0,AL,,31.0,,,,,,,,,,,,,,59.691,36.501,,,3.8781,,,5.92,,,2.2219,1200.0,,4.62115,3.48705,,5.37922,1.816,1.928,0.6251,,,0.4273,,,,Charles Bender,24.0,649.0,591.0,58.0,66.0,107.588,0.0,,0.305,94.0,95.543,102.501,674.0,0.1527,2.0,35.157,0.208,1.0,-0.1804,145.407692,75.612,,,,PHA,AL,P,,,11.0,58.0,0.0,4.0,,,,,
69190,mooremi01,1959.0,11.0,26.0,USA,OK,Carnegie,,,,,,,Mike,Moore,Michael Wayne,205.0,76.0,R,R,1982-04-11,1995-08-31,moorm001,mooremi01,92.98636,1.9304,24.953107,48.169478,119313.0,1009122.0,1982.0,1995.0,Mike Moore,28.0,1988.0,SEA,1.0,AL,0.0,0.0,228.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,481950.0,Y,,4.37905,0.07752,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
22770,danieka01,1963.0,8.0,20.0,USA,GA,Vienna,,,,,,,Kal,Daniels,Kalvoski,195.0,71.0,L,R,1986-04-09,1992-09-22,danik001,danieka01,88.45044,1.8034,27.196678,49.04649,113039.0,1002973.0,1986.0,1992.0,Kal Daniels,22.0,1986.0,CIN,1.0,NL,207.0,74.0,350.7,13.29,2.95,-0.53,0.8,0.0,-1.0,0.0,,-0.2,-1.34,0.0,7.07,21.2,14.2,14.4,-1.5,1.51,1.52,-0.19,2.22,-0.19,2.23,0.71,60000.0,N,4.34701,4.15282,0.07241,4.05724,1.84,1.822,0.5207,0.521,0.4977,0.4894,147.698454,69.381,72.581,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
140093,fuentbr01,,,,,,,,,,,,,,,,,,,,,,,,,,,,150118.0,,,,,27.0,2003.0,COL,1.0,NL,,75.0,,,,,,,,,,,,,,22.392,15.827,,,1.605,,,2.13,,,0.6668,300000.0,,4.62384,4.41759,,4.71156,1.873,1.89,0.5214,,,0.4911,,,,Brian Fuentes,0.0,226.0,0.0,226.0,24.0,37.428,-2.792,,0.334,116.0,116.106,39.827,198.0,0.0407,8.2,15.469,0.201,1.014,-0.1404,181.682609,41.787,,,,COL,NL,P,0.0,226.0,3.0,9.0,1.0,1.0,,,,,


----

# OPEN TRYOUTS ARE HERE

In [159]:
df_save = df

In [160]:
df_pitchers = df[df['pitcher'] == "Y"]

In [161]:
df_fielders = df[df['pitcher'] == "N"]

In [162]:
df['BMI'] = round(df.BMI, 2)  # clean up our BMI decimals

In [163]:
df = df.dropna(subset=['BMI'])  # Drop anyone without a weight.

In [182]:
df_huskies_tryouts = df[df['weight'] >= 225]  # Drop anyone with a weight under 225

In [184]:
df_huskies_tryouts = df_huskies_tryouts.sort_values('weight', ascending=False)

In [185]:
df_huskies_tryouts.groupby('POS').weight.max().sum() # weight of our heaviest starting lineup

1892.0

Looks like our highest possible team WAR for a 9-man lineup is a 13 WAR season. Which ain't bad. But let's keep going. Probably easiest if we make ourselves a dataframe for every position, since we'll need to fill up a 26-man roster.|

In [186]:
df_huskies_tryouts_P = df_huskies_tryouts[df_huskies_tryouts['POS'] == "P"]
df_huskies_tryouts_C = df_huskies_tryouts[df_huskies_tryouts['POS'] == "C"]
df_huskies_tryouts_1B = df_huskies_tryouts[df_huskies_tryouts['POS'] == "1B"]
df_huskies_tryouts_2B = df_huskies_tryouts[df_huskies_tryouts['POS'] == "2B"]
df_huskies_tryouts_3B = df_huskies_tryouts[df_huskies_tryouts['POS'] == "3B"]
df_huskies_tryouts_SS = df_huskies_tryouts[df_huskies_tryouts['POS'] == "SS"]
df_huskies_tryouts_OF = df_huskies_tryouts[df_huskies_tryouts['POS'] == "OF"]

We'll need:
- 5 starting pitchers
- 7 relievers
- 2 catchers
- 4 starting infielders
- 2 utility infielders
- 5 outfielders

In [206]:
huskies_P = (
    df_huskies_tryouts_P.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(13, "weight")
)  # 13 heaviest P
huskies_P

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
25065,diazju03,1984.0,2.0,27.0,D.R.,La Romana,La Romana,,,,,,,Jumbo,Diaz,Jose Rafael,315.0,76.0,R,R,2014-06-20,2017-07-16,diazj005,diazju03,142.88148,1.9304,38.34,74.016515,471822.0,3397.0,2014.0,2017.0,Jumbo Diaz,32.0,2016.0,CIN,1.0,NL,1.0,45.0,43.0,-0.28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.14,0.0,-0.1,-0.1,-0.1,0.0,-0.01,-0.01,-0.02,-0.01,-0.02,-0.01,0.0,525000.0,Y,4.49699,4.5001,0.08241,4.49938,1.87,1.87,0.4997,0.4997,0.5,0.4999,-100.0,0.332,0.427,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,P,0.0,129.0,1.0,3.0,2.0,0.0,,,,,
11961,brownju01,1907.0,4.0,30.0,USA,RI,Greene,1966.0,10.0,2.0,USA,NY,Freeport,Jumbo,Brown,Walter George,295.0,76.0,R,R,1925-08-26,1941-08-27,browj110,brownju01,133.80964,1.9304,35.91,69.317053,111597.0,1001533.0,1925.0,1941.0,Jumbo Brown,21.0,1928.0,CLE,1.0,AL,3.0,5.0,,1.17,0.0,0.0,0.0,,,,,0.0,0.0,0.32,0.0,1.5,1.5,1.5,0.0,0.15,0.15,0.0,0.15,0.0,0.15,0.0,,Y,5.0819,4.7839,0.08249,4.7614,1.92,1.902,0.529,0.529,0.5,0.4978,411.680067,1.065,1.235,,,,,,,,,,,,,,,,,,,,,,,,,,CLE,AL,P,,,1.0,1.0,0.0,0.0,,,,,
81653,rauchjo01,1978.0,9.0,27.0,USA,KY,Louisville,,,,,,,Jon,Rauch,Jon Erich,290.0,83.0,R,R,2002-04-02,2013-05-17,raucj001,rauchjo01,131.54168,2.1082,29.6,62.395257,400010.0,1475.0,2002.0,2013.0,Jon Rauch,34.0,2013.0,MIA,1.0,NL,1.0,15.0,16.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.13,0.0,0.1,0.1,0.1,0.0,0.02,0.02,-0.01,0.02,-0.01,0.02,0.0,1000000.0,Y,4.03667,4.02801,0.07154,4.02584,1.813,1.812,0.501,0.501,0.5,0.4998,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,MIA,NL,P,0.0,50.0,1.0,4.0,0.0,0.0,,,,,
12250,broxtjo01,1984.0,6.0,16.0,USA,GA,Augusta,,,,,,,Jonathan,Broxton,Jonathan Roy,285.0,76.0,R,R,2005-07-29,2017-05-30,broxj001,broxtjo01,129.27372,1.9304,34.69,66.967323,455009.0,4759.0,2005.0,2017.0,Jonathan Broxton,28.0,2012.0,CIN,2.0,NL,0.0,25.0,22.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,,Y,4.28947,4.28947,0.07895,4.28947,1.845,1.845,0.5,0.5,0.5,0.5,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,P,0.0,67.0,1.0,4.0,0.0,0.0,,,,,
19460,colonba01,1973.0,5.0,24.0,D.R.,Puerto Plata,Altamira,,,,,,,Bartolo,Colon,Bartolo,285.0,71.0,R,R,1997-04-04,2018-09-22,colob001,colonba01,129.27372,1.8034,39.75,71.683331,112526.0,375.0,1997.0,2018.0,Bartolo Colon,29.0,2002.0,MON,2.0,NL,43.0,17.0,117.0,-7.75,-0.2,0.0,0.0,0.0,0.0,0.0,,0.0,0.08,5.86,0.0,-2.0,-2.0,-2.0,0.1,-0.22,-0.22,0.0,-0.22,0.0,-0.22,0.0,,Y,4.36277,4.481,0.08222,4.40216,1.861,1.864,0.4876,0.4876,0.5005,0.4917,-27.516112,14.096,16.712,,,,,,,,,,,,,,,,,,,,,,,,,,MON,NL,P,17.0,351.0,6.0,15.0,1.0,1.0,,,,,
75964,paronch01,1975.0,7.0,28.0,USA,NH,Woodsville,,,,,,,Chad,Paronto,Chad Michael,285.0,77.0,R,R,2001-04-18,2009-09-29,paroc001,paronch01,129.27372,1.9558,33.8,66.097617,406301.0,399.0,2001.0,2009.0,Chad Paronto,33.0,2009.0,HOU,1.0,NL,0.0,6.0,6.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Y,4.50898,4.50898,0.08274,4.50898,1.872,1.872,0.5,0.5,0.5,0.5,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,HOU,NL,P,0.0,20.0,0.0,0.0,0.0,0.0,,,,,
64262,mccluse01,1981.0,2.0,7.0,USA,WV,Lewisburg,,,,,,,Seth,McClung,Michael Seth,280.0,78.0,L,R,2003-03-31,2009-10-04,mccls002,mccluse01,127.00576,1.9812,32.36,64.105471,425528.0,1657.0,2003.0,2009.0,Seth McClung,26.0,2007.0,MIL,1.0,NL,0.0,14.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,-0.01,0.0,0.0,,Y,4.77852,4.77852,0.08882,4.77852,1.903,1.903,0.5,0.5,0.5,0.5,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,MIL,NL,P,0.0,36.0,1.0,0.0,0.0,0.0,,,,,
55015,kuhnejo01,1995.0,2.0,19.0,USA,NC,Goldsboro,,,,,,,Joel,Kuhnel,Joel Kenneth,280.0,76.0,R,R,2019-08-16,2022-07-01,kuhnj001,kuhnejo01,127.00576,1.9304,34.08,65.792458,669270.0,19995.0,2019.0,2022.0,Joel Kuhnel,24.0,2019.0,CIN,1.0,NL,1.0,11.0,9.7,-0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.16,0.0,-0.1,-0.1,-0.1,0.0,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01,0.0,,Y,4.70742,4.72014,0.08805,4.71719,1.895,1.896,0.4987,0.4987,0.5,0.4997,-100.0,0.339,0.457,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,P,0.0,29.0,0.0,0.0,0.0,0.0,,,,,
16595,cedajo01,1987.0,1.0,28.0,D.R.,Distrito Nacional,Santo Domingo,,,,,,,Jose,Ceda,Jose Marti,280.0,77.0,R,R,2010-09-06,2011-09-24,cedaj001,cedajo01,127.00576,1.9558,33.2,64.93801,469167.0,2377.0,2010.0,2011.0,Jose Ceda,23.0,2010.0,FLA,1.0,NL,2.0,8.0,8.7,-0.55,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.27,0.0,-0.3,-0.3,-0.3,0.0,-0.03,-0.03,0.0,-0.03,0.0,-0.03,0.0,,Y,4.3462,4.3812,0.08031,4.37341,1.854,1.856,0.4963,0.4963,0.5,0.4992,-100.0,0.672,0.836,,,,,,,,,,,,,,,,,,,,,,,,,,FLO,NL,P,0.0,26.0,0.0,0.0,0.0,0.0,,,,,
86706,russead01,1983.0,4.0,14.0,USA,OH,North Olmsted,,,,,,,Adam,Russell,Adam William,280.0,80.0,R,R,2008-06-17,2011-07-17,russa001,russead01,127.00576,2.032,30.76,62.502835,452240.0,7334.0,2008.0,2011.0,Adam Russell,26.0,2009.0,SDP,1.0,NL,1.0,15.0,12.3,-0.27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.14,0.0,-0.1,-0.1,-0.1,0.0,-0.01,-0.01,0.0,-0.01,0.0,-0.01,0.0,405000.0,Y,4.50031,4.50898,0.08274,4.5069,1.871,1.871,0.4991,0.4991,0.5,0.4998,-100.0,0.325,0.403,,,,,,,,,,,,,,,,,,,,,,,,,,SDN,NL,P,0.0,37.0,0.0,2.0,0.0,0.0,,,,,


In [207]:
huskies_C = (
    df_huskies_tryouts_C.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(2, "weight")
)  # 2 heaviest catchers
huskies_C

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
31630,flowety01,1986.0,1.0,24.0,USA,GA,Roswell,,,,,,,Tyler,Flowers,Cole Tyler,260.0,76.0,R,R,2009-09-03,2020-09-27,flowt001,flowety01,117.93392,1.9304,31.65,61.092996,452095.0,9134.0,2009.0,2020.0,Tyler Flowers,34.0,2020.0,ATL,1.0,NL,80.0,22.0,184.0,-2.32,0.0,-0.23,0.0,0.0,0.0,2.0,0.0,2.0,1.52,0.0,2.73,3.7,1.0,-1.0,3.5,0.11,-0.09,0.36,0.37,0.36,0.17,0.26,4000000.0,N,4.83284,4.87966,0.08989,4.75541,1.912,1.907,0.5044,0.4954,0.5157,0.4877,78.114864,26.624,29.829,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,C,22.0,552.0,181.0,5.0,1.0,0.0,1.0,,15.0,2.0,
61149,maldoca03,1979.0,1.0,3.0,Venezuela,Zulia,Maracaibo,,,,,,,Carlos,Maldonado,Carlos Luis,260.0,74.0,R,R,2006-09-08,2012-05-29,maldc003,maldoca03,117.93392,1.8796,33.38,62.744158,430961.0,2034.0,2006.0,2012.0,Carlos Maldonado,27.0,2006.0,PIT,1.0,NL,20.0,8.0,54.0,-4.27,0.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.39,0.0,0.62,-3.1,-3.7,-3.7,0.4,-0.38,-0.38,0.04,-0.32,0.04,-0.32,0.06,,N,4.40425,4.86675,0.09085,4.78882,1.886,1.908,0.4531,0.4531,0.5048,0.4923,-31.900107,6.806,8.326,,,,,,,,,,,,,,,,,,,,,,,,,,PIT,NL,C,6.0,162.0,26.0,4.0,1.0,0.0,0.0,,6.0,1.0,


In [208]:
huskies_1B = (
    df_huskies_tryouts_1B.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(1, "weight")
)  # heaviest 1B
huskies_1B

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
82002,reedaj01,1993.0,5.0,10.0,USA,IN,Terre Haute,,,,,,,A. J.,Reed,Andrew Joseph,275.0,76.0,L,L,2016-06-25,2019-08-01,reeda002,reedaj01,124.7378,1.9304,33.47,64.617592,607223.0,16246.0,2016.0,2019.0,AJ Reed,25.0,2018.0,HOU,1.0,AL,3.0,1.0,8.0,-0.81,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.05,0.0,0.11,-0.8,-0.9,-0.9,-0.1,-0.09,-0.09,-0.01,-0.08,-0.01,-0.08,0.01,,N,3.71865,4.57865,0.08136,4.47116,1.828,1.873,0.4061,0.4061,0.4948,0.4889,-100.0,0.952,1.244,,,,,,,,,,,,,,,,,,,,,,,,,,HOU,AL,1B,1.0,24.0,6.0,0.0,0.0,1.0,,,,,


In [209]:
huskies_2B = (
    df_huskies_tryouts_2B.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(2, "weight")
)  # 2 heaviest 2B
huskies_2B

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
89128,schoojo01,1991.0,10.0,16.0,Curacao,,Willemstad,,,,,,,Jonathan,Schoop,Jonathan Rufino Jezus,247.0,73.0,R,R,2013-09-25,2022-07-01,schoj001,schoojo01,112.037224,1.8542,32.59,60.423484,570731.0,11265.0,2013.0,2022.0,Jonathan Schoop,26.0,2018.0,BAL,1.0,AL,367.0,85.0,755.0,-3.49,-1.58,0.52,12.0,0.0,0.0,0.0,0.0,12.0,2.45,0.0,12.96,22.9,9.9,-2.1,14.5,1.19,-0.14,1.62,2.42,1.62,1.09,1.23,8500000.0,N,4.55394,4.57865,0.08136,4.42616,1.878,1.871,0.5121,0.4975,0.5176,0.4842,94.665417,115.729,144.102,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,2B,84.0,2256.0,147.0,260.0,8.0,69.0,,,,,
8457,blanktr01,1996.0,8.0,3.0,USA,PA,Pottsville,,,,,,,Travis,Blankenhorn,Travis Allan,230.0,74.0,L,R,2020-09-15,2021-08-21,blant002,blanktr01,104.32616,1.8796,29.53,55.504448,663905.0,18395.0,2020.0,2021.0,Travis Blankenhorn,24.0,2021.0,MIN,1.0,AL,0.0,1.0,0.7,0.0,-0.01,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,-1.0,-0.09,0.0,-0.09,-0.09,-0.09,0.0,0.0,,N,4.67464,4.68464,0.08598,4.68464,1.891,1.892,0.4059,0.499,0.4069,0.5,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,2B,0.0,2.0,0.0,0.0,1.0,0.0,,,,,


In [210]:
huskies_3B = (
    df_huskies_tryouts_3B.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(1, "weight")
)  # 1 heaviest 3B
huskies_3B

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
87857,sandopa01,1986.0,8.0,11.0,Venezuela,Carabobo,Puerto Cabello,,,,,,,Pablo,Sandoval,Pablo Emilio,268.0,70.0,B,R,2008-08-14,2021-07-29,sandp001,sandopa01,121.562656,1.778,38.45,68.370448,467055.0,5409.0,2008.0,2021.0,Pablo Sandoval,33.0,2020.0,ATL,2.0,NL,4.0,1.0,9.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.14,0.3,0.1,0.1,0.0,0.01,0.01,0.0,0.02,0.0,0.02,0.01,,N,4.99966,4.87966,0.08989,4.74298,1.921,1.907,0.5117,0.5117,0.502,0.4865,50.26296,1.331,0.865,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,3B,1.0,27.0,0.0,3.0,0.0,0.0,,,,,


In [212]:
huskies_SS = (
    df_huskies_tryouts_SS.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(2, "weight")
)  # 2 heaviest SS
huskies_SS

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
101423,uribeju01,1979.0,3.0,22.0,D.R.,San Cristobal,Sabana Grande de Palenque,,,,,,,Juan,Uribe,Juan C.,245.0,72.0,R,R,2001-04-08,2016-07-30,uribj002,uribeju01,111.13004,1.8288,33.23,60.766645,346874.0,454.0,2001.0,2016.0,Juan Uribe,23.0,2002.0,COL,1.0,NL,618.0,155.0,1316.0,-39.55,3.23,-1.29,12.8,-1.0,0.0,0.0,,11.8,8.4,0.0,19.26,1.9,-17.4,-29.2,20.2,-1.98,-3.16,2.04,-0.08,2.04,-1.26,1.9,222000.0,N,4.29255,4.481,0.08222,4.35672,1.857,1.861,0.488,0.4801,0.5136,0.4869,55.607568,219.349,254.53,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,SS,150.0,3948.0,261.0,504.0,27.0,118.0,,,,,
1482,almoner01,1978.0,2.0,1.0,D.R.,Distrito Nacional,Santo Domingo,,,,,,,Erick,Almonte,Erick R.,245.0,74.0,R,R,2001-09-04,2011-04-25,almoe001,almoner01,111.13004,1.8796,31.46,59.124303,400095.0,808.0,2001.0,2011.0,Erick Almonte,25.0,2003.0,NYY,1.0,AL,111.0,31.0,252.3,-2.54,0.33,0.14,-7.0,-1.0,0.0,0.0,0.0,-8.0,1.51,0.0,4.16,-4.4,-8.6,-0.6,-6.5,-0.81,-0.05,-0.6,-0.42,-0.6,0.34,0.39,,N,4.88581,4.90387,0.08597,4.7696,1.916,1.909,0.4735,0.4982,0.48,0.4868,79.312405,36.112,42.48,,,,,,,,,,,,,,,,,,,,,,,,,,NYA,AL,SS,29.0,757.0,49.0,67.0,12.0,13.0,,,,,


In [211]:
huskies_OF = (
    df_huskies_tryouts_OF.sort_values("weight", ascending=False)
    .drop_duplicates(subset=["playerID"])
    .nlargest(5, "weight")
)  # 5 heaviest OF
huskies_OF

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
51060,judgeaa01,1992.0,4.0,26.0,USA,CA,Linden,,,,,,,Aaron,Judge,Aaron James,282.0,79.0,R,R,2016-08-13,2022-06-30,judga001,judgeaa01,127.912944,2.0066,31.77,63.74611,592450.0,15640.0,2016.0,2022.0,Aaron Judge,24.0,2016.0,NYY,1.0,AL,95.0,27.0,216.0,-4.33,-0.57,-0.21,-1.0,0.0,1.0,0.0,0.0,0.0,-0.94,0.0,3.4,-2.7,-6.1,-6.1,-0.9,-0.62,-0.62,-0.08,-0.29,-0.08,-0.29,0.33,,N,4.27899,4.50306,0.07941,4.37699,1.857,1.863,0.4763,0.4763,0.4964,0.4868,61.221673,30.932,36.07,,,,,,,,,,,,,,,,,,,,,,,,,,NYA,AL,OF,24.0,648.0,35.0,2.0,1.0,0.0,,,,,
56821,leeca01,1976.0,6.0,20.0,Panama,Cocle,Aguadulce,,,,,,,Carlos,Lee,Carlos,270.0,74.0,R,R,1999-05-07,2012-10-03,lee-c001,leeca01,122.46984,1.8796,34.67,65.157395,150324.0,243.0,1999.0,2012.0,Carlos Lee,29.0,2005.0,MIL,1.0,NL,688.0,162.0,1404.0,6.78,-2.53,0.5,-5.0,0.0,-2.0,0.0,-1.0,-8.0,-6.11,0.0,21.44,12.1,-9.4,-1.4,-14.1,-1.09,-0.24,-1.53,1.01,-1.53,1.86,2.1,8000000.0,N,4.54497,4.55336,0.08379,4.42099,1.876,1.869,0.4941,0.4991,0.491,0.4862,109.547108,233.094,264.319,,,,,,,,,,,,,,,,,,,,,,,,,,MIL,NL,OF,161.0,4212.0,308.0,8.0,6.0,3.0,,,,,
76775,pegueca01,1987.0,2.0,22.0,D.R.,Elias Pina,Hondo Valle,,,,,,,Carlos,Peguero,Carlos Angel,260.0,77.0,L,L,2011-04-19,2015-06-03,peguc001,pegueca01,117.93392,1.9558,30.83,60.299581,451713.0,8760.0,2011.0,2015.0,Carlos Peguero,28.0,2015.0,BOS,2.0,AL,6.0,4.0,17.0,-0.34,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.08,0.0,0.21,-0.2,-0.4,-0.4,-0.1,-0.04,-0.04,-0.01,-0.02,-0.01,-0.02,0.02,,N,4.2065,4.304,0.0751,4.25026,1.841,1.844,0.4895,0.4895,0.4979,0.4942,48.855279,1.963,2.129,,,,,,,,,,,,,,,,,,,,,,,,,,BOS,AL,OF,1.0,51.0,5.0,0.0,0.0,0.0,,,,,
46534,howarfr01,1936.0,8.0,8.0,USA,OH,Columbus,,,,,,,Frank,Howard,Frank Oliver,255.0,79.0,R,R,1958-09-10,1973-09-30,howaf102,howarfr01,115.66596,2.0066,28.73,57.642759,116205.0,1006080.0,1958.0,1973.0,Frank Howard,21.0,1958.0,LAD,1.0,NL,30.0,8.0,55.0,-0.87,-0.59,-0.22,0.0,0.0,0.0,0.0,,0.0,-0.37,0.0,1.13,-0.9,-2.1,-2.1,-0.4,-0.22,-0.22,-0.04,-0.11,-0.04,-0.11,0.11,,N,4.14755,4.4038,0.07419,4.26254,1.843,1.85,0.4724,0.4724,0.4951,0.4849,66.292119,10.287,12.426,,,,,,,,,,,,,,,,,,,,,,,,,,LAN,NL,OF,8.0,165.0,12.0,1.0,0.0,0.0,,,,,
98066,taylomi01,1985.0,12.0,19.0,USA,MD,Cheverly,,,,,,,Michael,Taylor,Michael David,255.0,77.0,R,R,2011-09-02,2014-09-28,taylm001,taylomi01,115.66596,1.9558,30.24,59.139973,446345.0,2591.0,2011.0,2014.0,Michael Taylor,26.0,2012.0,OAK,1.0,AL,21.0,6.0,47.0,-3.0,-0.01,-0.21,3.0,0.0,0.0,0.0,0.0,3.0,-0.24,0.0,0.79,0.3,-0.5,-3.5,2.8,-0.05,-0.38,0.3,0.02,0.3,-0.31,0.07,,N,3.83386,4.41052,0.07575,4.27927,1.824,1.852,0.4911,0.4365,0.5506,0.486,-7.203378,6.592,8.459,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,OF,5.0,141.0,14.0,0.0,0.0,0.0,,,,,


In [213]:
huskies = pd.concat([
    huskies_P,
    huskies_C,
    huskies_1B,
    huskies_2B,
    huskies_3B,
    huskies_SS,
    huskies_OF], ignore_index=True
)

In [215]:
huskies.POS.value_counts().sum()

26

# BEHOLD. THE WONKAVILLE HUSKIES.

In [None]:
# check to make sure everyone's best season is in here
# make sure we've got a rotation and relievers
# clean up columns


In [222]:
huskies.to_csv('huskies.csv')

In [226]:
huskies.weight.describe()

count     26.000000
mean     271.423077
std       18.465477
min      230.000000
25%      260.000000
50%      275.000000
75%      281.500000
max      315.000000
Name: weight, dtype: float64

In [None]:
roster_of = df_huskies_tryouts_OF.groupby('playerID', as_index=False)['WAR'].max()



In [None]:
roster_of.nlargest(5, 'WAR')

In [None]:
husky_P = df_huskies_tryouts_P[df_huskies_tryouts_P['WAR'] == df_huskies_tryouts_P.WAR.max()]
husky_C = df_huskies_tryouts_C[df_huskies_tryouts_C['WAR'] == df_huskies_tryouts_C.WAR.max()]
husky_1B = df_huskies_tryouts_1B[df_huskies_tryouts_1B['WAR'] == df_huskies_tryouts_1B.WAR.max()]
husky_2B = df_huskies_tryouts_2B[df_huskies_tryouts_2B['WAR'] == df_huskies_tryouts_2B.WAR.max()]
husky_3B = df_huskies_tryouts_3B[df_huskies_tryouts_3B['WAR'] == df_huskies_tryouts_3B.WAR.max()]
husky_SS = df_huskies_tryouts_SS[df_huskies_tryouts_SS['WAR'] == df_huskies_tryouts_SS.WAR.max()]
husky_OF = df_huskies_tryouts_OF[df_huskies_tryouts_OF['WAR'] == df_huskies_tryouts_OF.WAR.max()]

In [None]:
huskies = pd.concat([
    husky_P,
    husky_C,
    husky_1B,
    husky_2B,
    husky_3B,
    husky_SS,
    husky_OF
], ignore_index=True)
huskies.reset_index()
huskies.WAR

----

# Historical Team Histories

Let's get our team data together so we can figure out:
1. The worst team each year
2. The best team each year
3. The best team that didn't make the playoffs each year

### Team Prep

In [None]:
teams.sample(10)

Let's work off of copies and leave our source DataFrames in one place so that we have a home to which we may tearfully return.

In [None]:
series_post_sorted = series_post

In [None]:
series_post_sorted.columns

In [None]:
df_teams = teams

#### Add Differentials, Which Should've Been Here in the First Place

Let's add some differential metrics that might be useful. In Pythonglish, this is what we're doing:
```python
for Runs, Strikeouts, Walks, Home Runs, Hits in team_stats:
    df_teams['StatDiff'] = df_teams['Team_Stat'] - df_teams['Opponent_stat']
```

In [None]:
df_teams['RDiff'] = df_teams['R'] - df_teams['RA']
df_teams['SODiff'] = df_teams['SO'] - df_teams['SOA']
df_teams['BBDiff'] = df_teams['BB'] - df_teams['BBA']
df_teams['HRDiff'] = df_teams['HR'] - df_teams['HRA']
df_teams['HDiff'] = df_teams['H'] - df_teams['HA']

#### Add Winning Percentage

Let's also give ourselves a winning percentage column, because baseball seasons haven't always been the same length.

In [None]:
df_teams['WP'] = df_teams['W'] / (df_teams['L'] + df_teams['W'])

## Locating the First Losers

Now let's find the best team in each year that didn't make the playoffs and add it to a 'first_losers' DataFrame

First, let's make two dicts — one for playoff winners and one for playoff losers — which serves to identify any team that made the playoffs. After some cleaning, the inverse of our by-year dict will comprise the teams that didn't make the playoffs.

We want to end up with something that looks like this:

```python
history = {
    1871: ['playoff teams'],
    1872: ['playoff teams'],
    [...]
    2021: ['playoff teams']
}
```

In [None]:
all_teams = {k: list(v) for k, v in teams.groupby('yearID')['teamID']}
playoff_winners = {k: list(v) for k, v in series_post_sorted.groupby('yearID')['teamIDwinner']}
playoff_losers = {k: list(v) for k, v in series_post_sorted.groupby('yearID')['teamIDloser']}


Let's make a little algo to jury-rig ourselves a little .unique() function

In [None]:
def unique(playoff_teams):
    x = np.array(playoff_teams)
    return np.unique(x)


### It's Loopin' Time
Now let's make a loop that zooms over our entire table to generate each year's list of playoff teams

In [None]:
history = {}
for year in range(1871, 2022):
    try:
        playoff_teams = playoff_winners[year] + playoff_losers[year]
        playoff_teams = unique(playoff_teams)
        year = {
            year: list(playoff_teams)
        }
        print('---')
        print(year)
        history.update(year)
    except:
        print('---')
        print(year)
        print("There weren't any playoffs this year.")

## Next we're going to add a bool column to our monster team dataframe where TRUE = made playoffs and FALSE = missed playoffs.

In [None]:
df_teams = teams

In [None]:
df_teams.groupby(['yearID']).teamID.unique()

I want to get the teamID from each row in df_teams and check whether it's in df_teams.groupby(['yearID']).teamID.unique()

In [None]:
for team in df_teams.groupby(['yearID']):
    season = df_teams.yearID
    df_teams['playoff_teams'] = dict(df_teams[df_teams["yearID"] == season].teamID)


In [None]:
for team in df_teams.groupby(['yearID']):
    season = df_teams.yearID
    df_teams.insert(5, "playoff_teams", list(
        df_teams[df_teams["yearID"] == season].teamID), True)


In [None]:
for team in range(len(df_teams)):
    team_season = df_teams.yearID
    try:
        if df_teams.loc[team]['teamID'] in list(df_teams[df_teams["yearID"] == team_season].teamID):
            df_teams['playoffs'] == 1
            print('- - -')
            print(f'{df_teams.iloc[team].yearID} : {df_teams.iloc[team].teamID}')
            print('Pass')
        else:
            df_teams['playoffs'] == 0
            print(f'{df_teams.iloc[team].yearID} : {df_teams.iloc[team].teamID}')
            print("- - -")
            print("Pass")
    except:
        print("- - -")
        print(f'{df_teams.iloc[team].yearID} : {df_teams.iloc[team].teamID}')
        print("Exception")


In [None]:
df_teams.sample(1)

In [None]:
history.items()

In [None]:
conditions = [
    df_teams.loc[0].teamID
    in list(df_teams[df_teams["yearID"] == 1871].teamID)  # ! DING DING DING
]
values = [True]

df_teams['made_playoffs']

In [None]:
df_teams.loc[df_teams['teamID'] in list(df_teams[df_teams['yearID'] == 1871].teamID), 'made_playoffs'] == True

In [None]:
df_teams.loc[0].teamID in list(df_teams[df_teams['yearID'] == 1871].teamID) # ! DING DING DING

In [None]:
if df_teams.loc[2980].teamID in list(history[2021]):# ! DING DING DING

In [None]:
df_teams

In [None]:
season = 1871
for team in range(len(df_teams)):
    try:
        if df_teams.iloc[team].teamID in list(df_teams[df_teams['yearID'] == season].teamID):
            print('- - -')
            print(f'{df_teams.iloc[team].yearID} + {df_teams.iloc[team].yearID}')
            print('Pass')
            df_teams['playoffs'] == 1
        else:
            df_teams['playoffs'] == 0
            print("- - -")
            print("Pass")
    except:
        print("- - -")
        print("Exception")
    season += 1


In [None]:
season = 1871
for team in range(1871, 2022):
    try:
        if df_teams[df_teams[season]].teamID in df_teams.groupby(['yearID']).teamID.unique():
            df_teams['playoffs'] == True
        else:
            df_teams['playoffs'] == False
            print("- - -")
            print(season)
            print("Pass")
    except:
        print("- - -")
        print(season)
        print("Exception")
season += 1


In [None]:
season = 1871
for team in df_teams:
    try:
        if df_teams['teamID'] in history[season]:
            df_teams['playoffs'] == True
        else: df_teams['playoffs'] == False
        print("- - -")
        print(season)
        print("Pass")
    except:
        print("- - -")
        print(season)
        print("Exception")
    season += 1

In [None]:
df_teams['playoffs']

----

### Erafying Things. For Posterity.

Let's split our teams into temporal eras. There are four major eras in baseball history. Or rather, two gigantic ones — the latter of which has three distinct sub-areas. 

In [None]:
deadball_era = df_teams.mask(df_teams['yearID'] <= 1920)
liveball_era = df_teams.mask(df_teams['yearID'] > 1920)

In [None]:
liveball = deadball_era.dropna(axis=0, how='all')
liveball

In [None]:
deadball = liveball_era.dropna(axis=0, how='all')
deadball

Now let's split the live-ball era into groups of its three main playoff structures: No divisions, Divisions, and Divisions with a Wildcard round.

In [None]:
league_era = liveball[(liveball['LgWin'].notna()) & (liveball['DivWin'].isna())]
division_era = liveball[(liveball['LgWin'].notna()) & (liveball['DivWin'].notna()) & (liveball['WCWin'].isna())]
wildcard_era = liveball[(liveball['LgWin'].notna()) & (liveball['DivWin'].notna()) & (liveball['WCWin'].notna())]


----

### Meet the First Losers. And Their Friends.

In [None]:
league_era['yearID']

In [None]:
history[2012]  #EVERYTHING WORKS TO HERE

In [None]:
teams.sample(20)

In [None]:
losers = {}
season = 2012
for year in league_era['yearID']:
    try:
        playoff_teams = history[season]
        print('---')
        print(season)
        print('in')
        print(playoff_teams)
        league_era[league_era["yearID"] == season]
        playoff_missers = {
            season: league_era[~league_era["teamID"].isin(playoff_teams)]
        }
        print('out')
        print(playoff_missers.values())
        # losers.update(playoff_missers)
    except:
        pass
    season += 1

In [None]:
losers.keys()

In [None]:
league_era.merge(pd.DataFrame([{'yearID': k, 'year': i} for k, v in history.items() for i in v]))

In [None]:
print(list(df.columns))

In [None]:
wildcard_era_first_losers = wildcard_era.merge(series_post_sorted, left_on='teamIDBR', right_on='teamIDloser')
wildcard_era_first_losers = wildcard_era_first_losers.merge(series_post_sorted, left_on='teamIDBR', right_on='teamIDwinner')

In [None]:
df_teams = df_teams.set_index(['yearID', 'lgID', 'divID'])

### Meet the Wonkaville Huskies

In [None]:
df_huskies = pd.merge(df_huskiesBatters, df_huskiesPitchers, how='right', on='playerID')

In [None]:
df_huskies.columns

In [None]:
sns.jointplot(data=df_simple, x="height", y="weight", kind = "reg", truncate = False)

In [None]:
sns.choose_diverging_palette()

In [None]:

# Compute the correlation matrix
corr = df_huskiesBatters.corr(method="spearman")

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(290, 10, n=40, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(
    corr,
    mask=mask,
    cmap=cmap,
    vmax=1,
    center=0,
    square=True,
    linewidths=0.25,
    cbar_kws={"shrink": .5},
)



In [None]:
corr_mat = df.corr().stack().reset_index(name="correlation")

##### Old