In [49]:
## Let's try to figure out who the shortest, heaviest baseball players are.

## Here's our overall plan of attack

# Setting Things Up ✅
## Import CSVs ✅
### Separate CSVs --> DataFrames for People, Pitching Data, Batting Data ✅
## Squish everything into one mondo DF ✅
## Add Physical Data ✅
### Height ✅
### Weight ✅
## Calculate BMI ✅
### Convert Imperial to Metric ✅
### BMI-ify ✅
### Throw BMI back into df ✅
## Assemble per-position lists sorted by BMI, then mWAR

# Knocking Things Down
## Find worst team that made playoffs in 2021
### Describe team fWAR/bWAR
### Describe individual fWAR/bWAR
## Pull from BMI lists per position until high BMI roster is full
### mWAR shall be higher on a team basis.
### mWAR shall be higher per position.
# If there's enough time:
## Repeat for:
### Tallness
### Shortness
### Heavy
### Light

# If there's enough time:
## Repeat for:
### Tallness
### Shortness
### Heavy
### Light

## Setting Things Up

### Import the necessaries

In [127]:
import numpy as np
import pandas as pd
import pybaseball
from deepdiff import DeepDiff
from pybaseball import bwar_pitch
from pybaseball import bwar_bat
from pybaseball import cache
from pybaseball.lahman import *
from pybaseball import chadwick_register

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 100)


In [51]:
# Constants
KG_TO_LB = 0.453592
M_TO_IN = 0.0254

In [52]:
chadwick = chadwick_register(save=True)
chadwick = pd.DataFrame(chadwick)

In [53]:
download_lahman()

### Read infinity baseball data to DataFrames

In [54]:
# a table of all player biographical info and ids
people = pd.DataFrame(people())

# park id, name, alias, city, state, and country
parks = pd.DataFrame(parks())

# all star roster data: player, year, team, league, position
allstar = pd.DataFrame(all_star_full())

# each player's games played per position for each season
appearances = pd.DataFrame(appearances())

# batting stats by year, regular season
batting = pd.DataFrame(batting())

# batting stats by year, post season
batting_post = pd.DataFrame(batting_post())

# fielding stats by year 
fielding = pd.DataFrame(fielding())

# games played in left, center, right field 
fielding_of = pd.DataFrame(fielding_of())

# LF/CF/RF splits
fielding_of_split = pd.DataFrame(fielding_of_split())

# postseason fielding 
fielding_post = pd.DataFrame(fielding_post())

# home game attendance by park by year 
home_games = pd.DataFrame(home_games())

# historical player pitching stats
pitching = pd.DataFrame(pitching())

# postseason pitching stats
pitching_post = pd.DataFrame(pitching_post())

# playoff series winners and losers 
series_post = pd.DataFrame(series_post())

# data on teams by year: record, division, stadium, attendance, etc
teams = pd.DataFrame(teams())

# current and historical franchises, whether they're still active, and their ids
teams_franchises = pd.DataFrame(teams_franchises())

# split season data for teams
teams_half = pd.DataFrame(teams_half()) 

# fangraphs batting since 2008
fangraphs_batting = pd.DataFrame(pybaseball.batting_stats_range(
    start_dt="2008-01-01", end_dt="2021-12-31"))

# fangraphs pitching since 2008
fangraphs_pitching = pd.DataFrame(pybaseball.pitching_stats_range(
    start_dt="2008-01-01", end_dt="2021-12-31"))

# fangraphs team pitching since 2008
fangraphs_team_pitching = pd.DataFrame(
    pybaseball.team_pitching(start_season="2008", end_season="2021"))

# fangraphs team batting since 2008
fangraphs_team_batting = pd.DataFrame(
    pybaseball.team_batting(start_season="2008", end_season="2021"))

# fangraphs team fielding since 2008
fangraphs_team_fielding = pd.DataFrame(
    pybaseball.team_fielding(start_season="2008", end_season="2021"))

# bref pitching WAR
bwar_pitch = pd.DataFrame(bwar_pitch(return_all=True))

# bref batting WAR
bwar_bat = pd.DataFrame(bwar_bat(return_all=True))


  table = table.drop('', 1)


----

### Add BMI Calcs

In [55]:
# BMI Calculations
people['KG'] = people['weight'] * KG_TO_LB
people['meters'] = people['height'] * M_TO_IN
people['BMI'] = people['KG'] / people['meters'] ** 2
people['ratio'] = people['meters'] * people['BMI']


----

## Assemble Monster DataFrame of Everything About Player Ever

### Hello Everybody

In [56]:
people.shape

(20543, 28)

In [57]:
people.columns

Index(['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry',
       'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay',
       'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast',
       'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame',
       'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio'],
      dtype='object')

In [58]:
chadwick.shape

(24258, 8)

In [59]:
chadwick.columns

Index(['name_last', 'name_first', 'key_mlbam', 'key_retro', 'key_bbref',
       'key_fangraphs', 'mlb_played_first', 'mlb_played_last'],
      dtype='object')

In [60]:
# Let's change some of these column names to save ourselves some merging hassle

In [61]:
chadwick = chadwick.rename(columns={
    "name_last": "nameLast",
    "name_first": "nameFirst",
    "key_fangraphs": "fangraphsID",
    "key_bbref": "playerID",
    "key_retro": "retroID",
    "key_mlbam": "mlbID"
})

In [62]:
chadwick.columns

Index(['nameLast', 'nameFirst', 'mlbID', 'retroID', 'playerID', 'fangraphsID',
       'mlb_played_first', 'mlb_played_last'],
      dtype='object')

Merge Chadwick, check.

In [63]:
# Merge chadwick into people
df = people.merge(
    chadwick,
    left_on=["playerID", "retroID", "nameLast", "nameFirst"],
    right_on=["playerID", "retroID", "nameLast", "nameFirst"], how='outer'
)

In [64]:
df.columns

Index(['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry',
       'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay',
       'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast',
       'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame',
       'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'mlbID',
       'fangraphsID', 'mlb_played_first', 'mlb_played_last'],
      dtype='object')

In [65]:
df.shape

(24587, 32)

----

### Add BWAR Stats 

#### Add BWAR Batting

##### BWAR Batting Prep

In [66]:
bwar_bat.columns

Index(['name_common', 'age', 'mlb_ID', 'player_ID', 'year_ID', 'team_ID',
       'stint_ID', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp',
       'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher',
       'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p',
       'runs_replacement', 'runs_above_rep', 'runs_above_avg',
       'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def',
       'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG',
       'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent',
       'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off',
       'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_plus', 'TOB_lg', 'TB_lg'],
      dtype='object')

In [67]:
bwar_bat = bwar_bat.rename(columns={
    'player_ID': 'playerID',
    'mlb_ID': 'mlbID',
    'stint_ID': 'stint'
})

In [68]:
bwar_bat.columns

Index(['name_common', 'age', 'mlbID', 'playerID', 'year_ID', 'team_ID',
       'stint', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp',
       'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher',
       'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p',
       'runs_replacement', 'runs_above_rep', 'runs_above_avg',
       'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def',
       'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG',
       'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent',
       'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off',
       'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_plus', 'TOB_lg', 'TB_lg'],
      dtype='object')

In [69]:
bwar_bat.columns

Index(['name_common', 'age', 'mlbID', 'playerID', 'year_ID', 'team_ID',
       'stint', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp',
       'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher',
       'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p',
       'runs_replacement', 'runs_above_rep', 'runs_above_avg',
       'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def',
       'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG',
       'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent',
       'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off',
       'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_plus', 'TOB_lg', 'TB_lg'],
      dtype='object')

----

##### BWAR Batting Merge

In [70]:
df = df.merge(
    bwar_bat, left_on=["playerID", "mlbID"], right_on=["playerID", "mlbID"], how="outer"
)

In [71]:
df.columns

Index(['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry',
       'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay',
       'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast',
       'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame',
       'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'mlbID',
       'fangraphsID', 'mlb_played_first', 'mlb_played_last', 'name_common',
       'age', 'year_ID', 'team_ID', 'stint', 'lg_ID', 'PA', 'G', 'Inn',
       'runs_bat', 'runs_br', 'runs_dp', 'runs_field', 'runs_infield',
       'runs_outfield', 'runs_catcher', 'runs_good_plays', 'runs_defense',
       'runs_position', 'runs_position_p', 'runs_replacement',
       'runs_above_rep', 'runs_above_avg', 'runs_above_avg_off',
       'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def', 'WAR', 'WAR_def',
       'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG', 'oppRpG',
       'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent', 'pyth_

#### Add BWAR Pitching

##### BWAR Pitching Prep

In [72]:
bwar_pitch.columns

Index(['name_common', 'age', 'mlb_ID', 'player_ID', 'year_ID', 'team_ID',
       'stint_ID', 'lg_ID', 'G', 'GS', 'IPouts', 'IPouts_start',
       'IPouts_relief', 'RA', 'xRA', 'xRA_sprp_adj', 'xRA_extras_adj',
       'xRA_def_pitcher', 'PPF', 'PPF_custom', 'xRA_final', 'BIP', 'BIP_perc',
       'RS_def_total', 'runs_above_avg', 'runs_above_avg_adj',
       'runs_above_rep', 'RpO_replacement', 'GR_leverage_index_avg', 'WAR',
       'salary', 'teamRpG', 'oppRpG', 'pyth_exponent', 'waa_win_perc', 'WAA',
       'WAA_adj', 'oppRpG_rep', 'pyth_exponent_rep', 'waa_win_perc_rep',
       'WAR_rep', 'ERA_plus', 'ER_lg'],
      dtype='object')

----

merge bwar_pitch, check

In [73]:
bwar_pitch = bwar_pitch.rename(columns={
    "stint_ID": "stint",
    "mlb_ID": "mlbID",
    "player_ID": "playerID"
})

In [74]:
bwar_pitch.stint

0        1
1        1
2        1
3        1
4        1
        ..
53652    1
53653    1
53654    1
53655    1
53656    1
Name: stint, Length: 53657, dtype: int64

##### BWAR Pitching Merge

In [75]:
print(df.columns.tolist())

['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry', 'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay', 'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast', 'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame', 'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'mlbID', 'fangraphsID', 'mlb_played_first', 'mlb_played_last', 'name_common', 'age', 'year_ID', 'team_ID', 'stint', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp', 'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher', 'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p', 'runs_replacement', 'runs_above_rep', 'runs_above_avg', 'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def', 'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG', 'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent', 'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off', 'waa_win_perc_def', 'waa_win_perc_rep', 'OPS_p

In [76]:
df = df.merge(
    bwar_pitch,
    left_on=[
        'G',
        'WAA',
        'WAR',
        'WAR_rep',
        'age',
        'lg_ID',
        'mlbID',
        'oppRpG',
        'oppRpG_rep',
        'playerID',
        'pyth_exponent',
        'pyth_exponent_rep',
        'runs_above_avg',
        'runs_above_rep',
        'salary',
        'stint',
        'teamRpG',
        'team_ID',
        'waa_win_perc',
        'waa_win_perc_rep',
        'year_ID'
    ],
    right_on=[
        'G',
        'WAA',
        'WAR',
        'WAR_rep',
        'age',
        'lg_ID',
        'mlbID',
        'oppRpG',
        'oppRpG_rep',
        'playerID',
        'pyth_exponent',
        'pyth_exponent_rep',
        'runs_above_avg',
        'runs_above_rep',
        'salary',
        'stint',
        'teamRpG',
        'team_ID',
        'waa_win_perc',
        'waa_win_perc_rep',
        'year_ID'
    ], how='outer'
)

In [77]:
print(df.columns.tolist())

['playerID', 'birthYear', 'birthMonth', 'birthDay', 'birthCountry', 'birthState', 'birthCity', 'deathYear', 'deathMonth', 'deathDay', 'deathCountry', 'deathState', 'deathCity', 'nameFirst', 'nameLast', 'nameGiven', 'weight', 'height', 'bats', 'throws', 'debut', 'finalGame', 'retroID', 'bbrefID', 'KG', 'meters', 'BMI', 'ratio', 'mlbID', 'fangraphsID', 'mlb_played_first', 'mlb_played_last', 'name_common_x', 'age', 'year_ID', 'team_ID', 'stint', 'lg_ID', 'PA', 'G', 'Inn', 'runs_bat', 'runs_br', 'runs_dp', 'runs_field', 'runs_infield', 'runs_outfield', 'runs_catcher', 'runs_good_plays', 'runs_defense', 'runs_position', 'runs_position_p', 'runs_replacement', 'runs_above_rep', 'runs_above_avg', 'runs_above_avg_off', 'runs_above_avg_def', 'WAA', 'WAA_off', 'WAA_def', 'WAR', 'WAR_def', 'WAR_off', 'WAR_rep', 'salary', 'pitcher', 'teamRpG', 'oppRpG', 'oppRpPA_rep', 'oppRpG_rep', 'pyth_exponent', 'pyth_exponent_rep', 'waa_win_perc', 'waa_win_perc_off', 'waa_win_perc_def', 'waa_win_perc_rep', 'OPS

----

#### Add Fielding_OF

##### Fielding_OF Prep

In [78]:
fielding_of.columns

Index(['playerID', 'yearID', 'stint', 'Glf', 'Gcf', 'Grf'], dtype='object')

In [79]:
fielding_of = fielding_of.rename(columns={
    "yearID": "year_ID"
})

##### Fielding_OF Merge

In [80]:
df = df.merge(
    fielding_of,
    left_on=["playerID", "year_ID", "stint"],
    right_on=["playerID", "year_ID", "stint"], how='left'
)


In [81]:
df.shape

(177729, 104)

----

#### Add Fielding

##### Fielding Check

In [82]:
fielding.columns

Index(['playerID', 'yearID', 'stint', 'teamID', 'lgID', 'POS', 'G', 'GS',
       'InnOuts', 'PO', 'A', 'E', 'DP', 'PB', 'WP', 'SB', 'CS', 'ZR'],
      dtype='object')

In [83]:
fielding = fielding.rename(columns={
    "yearID": "year_ID"
})

##### Fielding Merge

In [84]:
df = df.merge(
    fielding,
    left_on=["playerID", "year_ID", "G", "stint"],
    right_on=["playerID", "year_ID", "G", "stint"], how='left'
)


----

### Meet the Monster

In [85]:
df.sample(25) ### works to here

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID,KG,meters,BMI,ratio,mlbID,fangraphsID,mlb_played_first,mlb_played_last,name_common_x,age,year_ID,team_ID,stint,lg_ID,PA,G,Inn,runs_bat,runs_br,runs_dp,runs_field,runs_infield,runs_outfield,runs_catcher,runs_good_plays,runs_defense,runs_position,runs_position_p,runs_replacement,runs_above_rep,runs_above_avg,runs_above_avg_off,runs_above_avg_def,WAA,WAA_off,WAA_def,WAR,WAR_def,WAR_off,WAR_rep,salary,pitcher,teamRpG,oppRpG,oppRpPA_rep,oppRpG_rep,pyth_exponent,pyth_exponent_rep,waa_win_perc,waa_win_perc_off,waa_win_perc_def,waa_win_perc_rep,OPS_plus,TOB_lg,TB_lg,name_common_y,GS_x,IPouts,IPouts_start,IPouts_relief,RA,xRA,xRA_sprp_adj,xRA_extras_adj,xRA_def_pitcher,PPF,PPF_custom,xRA_final,BIP,BIP_perc,RS_def_total,runs_above_avg_adj,RpO_replacement,GR_leverage_index_avg,WAA_adj,ERA_plus,ER_lg,Glf,Gcf,Grf,teamID,lgID,POS,GS_y,InnOuts,PO,A,E,DP,PB,WP,SB,CS,ZR
147724,johnsdo03,,,,,,,,,,,,,,,,,,,,,,,,,,,,116580.0,,,,,23.0,1950.0,SLB,2.0,AL,,25.0,,,,,,,,,,,,,,3.122,-5.967,,,-0.61,,,0.17,,,0.8263,,,5.11423,5.37591,,5.48149,1.954,1.96,0.4756,,,0.4661,,,,Don Johnson,12.0,288.0,237.0,51.0,72.0,57.458,0.0,,-6.178,108.0,103.767,66.033,355.0,0.0702,-88.0,-6.542,0.222,0.7308,-0.0484,81.375385,52.894,,,,SLA,AL,P,,,2.0,17.0,3.0,0.0,,,,,
175864,willsfr01,,,,,,,,,,,,,,,,,,,,,,,,,,,,124370.0,,,,,32.0,1991.0,TOR,1.0,AL,,4.0,,,,,,,,,,,,,,-5.439,-5.865,,,-0.5388,,,-0.46,,,0.044,435000.0,,4.47535,5.9416,,4.5814,1.95,1.874,0.3653,,,0.489,,,,Frank Wills,0.0,13.0,0.0,13.0,8.0,2.358,-0.181,,0.12,103.0,103.778,2.135,19.0,0.0041,29.2,-5.865,0.199,0.8475,0.0314,27.55,2.204,,,,TOR,AL,P,0.0,13.0,0.0,2.0,0.0,0.0,,,,,
11156,bridgto01,1906.0,12.0,28.0,USA,TN,Gordonsville,1968.0,4.0,19.0,USA,TN,Nashville,Tommy,Bridges,Thomas Jefferson Davis,155.0,70.0,R,R,1930-08-13,1946-07-20,bridt101,bridgto01,70.30676,1.778,22.239938,39.54261,111458.0,1001421.0,1930.0,1946.0,Tommy Bridges,27.0,1934.0,DET,1.0,AL,114.0,36.0,,-18.93,0.17,0.13,0.0,,,,,0.0,0.01,12.83,0.0,-5.8,-5.8,-5.8,0.0,-0.56,-0.54,-0.01,-0.56,-0.01,-0.54,0.0,,Y,5.0236,5.18444,0.09004,5.06569,1.939,1.941,0.4847,0.4847,0.5,0.4888,-15.598209,38.01,40.68,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,P,,,6.0,42.0,2.0,2.0,,,,,
6522,bellje01,1947.0,10.0,6.0,USA,TN,Madison,,,,,,,Jerry,Bell,Jerry Houston,190.0,76.0,B,R,1971-09-06,1974-05-15,bellj101,bellje01,86.18248,1.9304,23.12727,44.644882,110827.0,1000812.0,1971.0,1974.0,Jerry Bell,25.0,1973.0,MIL,1.0,AL,0.0,0.0,183.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,Y,,4.28517,0.07295,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
58678,lockmwh01,1926.0,7.0,25.0,USA,NC,Lowell,2009.0,3.0,17.0,USA,AZ,Scottsdale,Whitey,Lockman,Carroll Walter,175.0,73.0,L,R,1945-07-05,1960-06-24,lockw101,lockmwh01,79.3786,1.8542,23.088211,42.810161,117857.0,1007694.0,1945.0,1960.0,Whitey Lockman,32.0,1959.0,CIN,2.0,NL,89.0,52.0,154.0,-2.45,-0.69,0.28,-0.7,0.0,0.0,0.0,,-0.7,-0.79,0.0,3.35,-1.0,-4.4,-3.7,-1.5,-0.46,-0.37,-0.15,-0.12,-0.15,-0.03,0.34,,N,4.36131,4.43151,0.07523,4.36703,1.858,1.858,0.4912,0.4926,0.497,0.4932,67.422953,30.287,35.549,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
98176,teagata01,1983.0,12.0,21.0,USA,TX,Dallas,,,,,,,Taylor,Teagarden,Taylor Hill,210.0,72.0,R,R,2008-07-18,2015-07-26,teagt001,teagata01,95.25432,1.8288,28.480805,52.085696,460003.0,5199.0,2008.0,2015.0,Taylor Teagarden,24.0,2008.0,TEX,1.0,AL,53.0,16.0,136.7,6.71,0.35,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.41,0.0,1.99,10.5,8.5,7.5,1.4,0.85,0.73,0.15,1.04,0.15,0.92,0.19,,N,5.17088,4.704,0.08092,4.57978,1.921,1.887,0.5516,0.5453,0.5089,0.4874,208.660361,17.882,19.872,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
177453,ceasedy01,,,,,,,,,,,,,,,,,,,,,,,,,,,,656302.0,,,,,26.0,2022.0,CHW,1.0,AL,,16.0,,,,,,,,,,,,,,20.369,12.819,,,1.3056,,,2.03,,,0.809,,,4.20931,3.50219,,4.68532,1.79,1.864,0.5816,,,0.4502,,,,Dylan Cease,16.0,258.0,258.0,0.0,34.0,43.328,2.028,0.0,-0.617,101.0,101.841,46.819,197.0,0.1028,-6.0,11.314,0.186,1.0,-0.0879,155.129167,37.231,,,,,,,,,,,,,,,,,
155449,mcmuljo01,,,,,,,,,,,,,,,,,,,,,,,,,,,,118836.0,,,,,24.0,1873.0,ATH,1.0,,,1.0,,,,,,,,,,,,,,4.263,2.795,,,0.1998,,,0.26,,,0.0751,,,8.8006,5.9406,,10.26076,2.153,2.317,0.6998,,,0.412,,,,John McMullin,1.0,24.0,,,5.0,7.98,0.0,,0.485,104.0,,7.795,31.0,0.0147,33.0,2.86,0.388,1.0,-0.0137,165.5,3.31,51.0,0.0,0.0,PH1,,P,,24.0,0.0,3.0,1.0,0.0,,,,,
84625,robinje02,1961.0,12.0,14.0,USA,CA,Ventura,2014.0,10.0,26.0,USA,MO,Overland Park,Jeff,Robinson,Jeffrey Mark,210.0,78.0,R,R,1987-04-12,1992-07-20,robij002,robinje02,95.25432,1.9812,24.267668,48.079104,121316.0,1011073.0,1987.0,1992.0,Jeff Robinson,28.0,1990.0,DET,1.0,AL,0.0,0.0,145.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,336000.0,Y,,4.32902,0.07417,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
64701,mcdougi01,1928.0,5.0,19.0,USA,CA,San Francisco,2010.0,11.0,28.0,USA,NJ,Monmouth County,Gil,McDougald,Gilbert James,175.0,72.0,R,R,1951-04-20,1960-10-02,mcdog101,mcdougi01,79.3786,1.8288,23.734004,43.404746,118655.0,1008471.0,1951.0,1960.0,Gil McDougald,26.0,1954.0,NYY,1.0,AL,474.0,126.0,957.6,9.38,0.14,0.19,6.3,3.0,0.0,0.0,,9.3,3.94,0.0,14.54,37.5,23.0,13.7,13.2,2.44,1.43,1.43,3.94,1.43,2.93,1.5,,N,4.30958,4.20124,0.0757,4.08586,1.841,1.827,0.5198,0.5117,0.5115,0.4873,116.907553,156.275,150.784,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


----

## Historical Team Histories

Let's get our team data together so we can figure out:
1. The worst team each year
2. The best team each year
3. The best team that didn't make the playoffs each year

### Team Prep

In [86]:
teams.sample(10)

Unnamed: 0,yearID,lgID,teamID,franchID,divID,Rank,G,Ghome,W,L,DivWin,WCWin,LgWin,WSWin,R,AB,H,2B,3B,HR,BB,SO,SB,CS,HBP,SF,RA,ER,ERA,CG,SHO,SV,IPouts,HA,HRA,BBA,SOA,E,DP,FP,name,park,attendance,BPF,PPF,teamIDBR,teamIDlahman45,teamIDretro
1723,1977,NL,MON,WSN,E,5,162,81.0,75,87,N,,N,N,665,5675,1474,294,50,138,478.0,877.0,88.0,50.0,21.0,39.0,736,660,4.01,31,11,33,4443,1426,135,579,856,129,128,0.98,Montreal Expos,Stade Olympique,1433757.0,97,97,MON,MON,MON
141,1884,UA,MLU,MLU,,2,12,,8,4,,,N,,53,395,88,25,0,0,20.0,70.0,,,,,34,26,2.25,12,3,0,312,49,1,13,139,53,4,0.892,Milwaukee Brewers,,,60,60,MIL,MLU,MLU
2946,2020,NL,PIT,PIT,C,5,60,32.0,19,41,N,N,N,N,219,1932,425,76,6,59,167.0,521.0,16.0,11.0,11.0,15.0,298,267,4.68,1,0,6,1539,451,80,249,536,47,53,0.978,Pittsburgh Pirates,PNC Park,0.0,97,99,PIT,PIT,PIT
1149,1947,NL,SLN,STL,,2,156,77.0,89,65,,,N,N,780,5422,1462,235,65,115,612.0,511.0,28.0,,,,634,548,3.53,65,12,20,4191,1417,106,495,642,127,169,0.979,St. Louis Cardinals,Sportsman's Park IV,1247913.0,104,101,STL,SLN,SLN
731,1921,NL,PIT,PIT,,2,154,76.0,90,63,,,N,N,692,5379,1533,231,104,37,341.0,371.0,134.0,93.0,,,595,498,3.17,88,10,10,4245,1448,37,322,500,171,129,0.973,Pittsburgh Pirates,Forbes Field,701567.0,102,101,PIT,PIT,PIT
491,1907,NL,PIT,PIT,,2,157,77.0,91,63,,,N,N,634,4957,1261,133,78,19,469.0,456.0,264.0,,46.0,,510,348,2.3,111,24,5,4089,1207,12,368,497,256,75,0.959,Pittsburgh Pirates,Exposition Park,319506.0,102,99,PIT,PIT,PIT
1041,1941,NL,BSN,ATL,,7,156,76.0,62,92,,,N,N,592,5414,1357,231,38,48,471.0,608.0,61.0,,,,720,608,3.95,62,10,9,4155,1440,75,554,446,190,174,0.97,Boston Braves,Braves Field,263680.0,94,97,BSN,BSN,BSN
972,1936,AL,SLA,BAL,,7,155,77.0,57,95,,,N,N,804,5391,1502,299,66,79,625.0,627.0,62.0,20.0,,,1064,935,6.24,54,3,13,4044,1776,115,609,399,188,143,0.969,St. Louis Browns,Sportsman's Park IV,93267.0,102,106,SLB,SLA,SLA
1969,1987,NL,ATL,ATL,W,5,161,81.0,69,92,N,,N,N,747,5428,1401,284,24,152,641.0,834.0,135.0,68.0,38.0,34.0,829,734,4.63,16,4,32,4283,1529,163,587,837,116,170,0.982,Atlanta Braves,Atlanta-Fulton County Stadium,1217402.0,104,106,ATL,ATL,ATL
1520,1969,AL,CAL,ANA,W,3,163,81.0,71,91,N,,N,N,528,5316,1221,151,29,88,516.0,929.0,54.0,39.0,,,652,566,3.54,25,9,39,4314,1294,126,517,885,135,164,0.978,California Angels,Anaheim Stadium,758388.0,94,95,CAL,CAL,CAL


Let's work off of copies and leave our source DataFrames in one place so that we have a home to which we may tearfully return.

In [87]:
series_post_sorted = series_post

In [88]:
series_post_sorted.columns

Index(['yearID', 'round', 'teamIDwinner', 'lgIDwinner', 'teamIDloser',
       'lgIDloser', 'wins', 'losses', 'ties'],
      dtype='object')

In [89]:
df_teams = teams

#### Add Differentials, Which Should've Been Here in the First Place

Let's add some differential metrics that might be useful. In Pythonglish, this is what we're doing:
```python
for Runs, Strikeouts, Walks, Home Runs, Hits in team_stats:
    df_teams['StatDiff'] = df_teams['Team_Stat'] - df_teams['Opponent_stat']
```

In [90]:
df_teams['RDiff'] = df_teams['R'] - df_teams['RA']
df_teams['SODiff'] = df_teams['SO'] - df_teams['SOA']
df_teams['BBDiff'] = df_teams['BB'] - df_teams['BBA']
df_teams['HRDiff'] = df_teams['HR'] - df_teams['HRA']
df_teams['HDiff'] = df_teams['H'] - df_teams['HA']

#### Add Winning Percentage

Let's also give ourselves a winning percentage column, because baseball seasons haven't always been the same length.

In [91]:
df_teams['WP'] = df_teams['W'] / (df_teams['L'] + df_teams['W'])

## Locating the First Losers

Now let's find the best team in each year that didn't make the playoffs and add it to a 'first_losers' DataFrame

First, let's make two dicts — one for playoff winners and one for playoff losers — which serves to identify any team that made the playoffs. After some cleaning, the inverse of our by-year dict will comprise the teams that didn't make the playoffs.

We want to end up with something that looks like this:

```python
history = {
    1871: ['playoff teams'],
    1872: ['playoff teams'],
    [...]
    2021: ['playoff teams']
}
```

In [117]:
all_teams = {k: list(v) for k, v in teams.groupby('yearID')['teamID']}
playoff_winners = {k: list(v) for k, v in series_post_sorted.groupby('yearID')['teamIDwinner']}
playoff_losers = {k: list(v) for k, v in series_post_sorted.groupby('yearID')['teamIDloser']}




Let's make a little algo to jury-rig ourselves a little .unique() function

In [93]:
def unique(playoff_teams):
    x = np.array(playoff_teams)
    return np.unique(x)


### It's Loopin' Time
Now let's make a loop that zooms over our entire table to generate each year's list of playoff teams

In [94]:
history = {}
for year in range(1884, 2022):
    try:
        playoff_teams = playoff_winners[year] + playoff_losers[year]
        playoff_teams = unique(playoff_teams)
        year = {
            year: list(playoff_teams)
        }
        print('---')
        print(year)
        history.update(year)
    except:
        print('---')
        print(year)
        print("There weren't any playoffs this year.")

---
{1884: ['NY4', 'PRO']}
---
{1885: ['CHN', 'SL4']}
---
{1886: ['CHN', 'SL4']}
---
{1887: ['DTN', 'SL4']}
---
{1888: ['NY1', 'SL4']}
---
{1889: ['BR3', 'NY1']}
---
{1890: ['BRO', 'LS2']}
---
1891
There weren't any playoffs this year.
---
{1892: ['BSN', 'CL4']}
---
1893
There weren't any playoffs this year.
---
1894
There weren't any playoffs this year.
---
1895
There weren't any playoffs this year.
---
1896
There weren't any playoffs this year.
---
1897
There weren't any playoffs this year.
---
1898
There weren't any playoffs this year.
---
1899
There weren't any playoffs this year.
---
1900
There weren't any playoffs this year.
---
1901
There weren't any playoffs this year.
---
1902
There weren't any playoffs this year.
---
{1903: ['BOS', 'PIT']}
---
1904
There weren't any playoffs this year.
---
{1905: ['NY1', 'PHA']}
---
{1906: ['CHA', 'CHN']}
---
{1907: ['CHN', 'DET']}
---
{1908: ['CHN', 'DET']}
---
{1909: ['DET', 'PIT']}
---
{1910: ['CHN', 'PHA']}
---
{1911: ['NY1', 'PHA']}
---


# Next we're going to add a bool column to our monster team dataframe where TRUE = made playoffs and FALSE = missed playoffs.

----

### Erafying Things. For Posterity.

Let's split our teams into temporal eras. There are four major eras in baseball history. Or rather, two gigantic ones — the latter of which has three distinct sub-areas. 

In [50]:
deadball_era = df_teams.mask(df_teams['yearID'] <= 1920)
liveball_era = df_teams.mask(df_teams['yearID'] > 1920)

In [51]:
liveball = deadball_era.dropna(axis=0, how='all')
liveball

Unnamed: 0,yearID,lgID,teamID,franchID,divID,Rank,G,Ghome,W,L,DivWin,WCWin,LgWin,WSWin,R,AB,H,2B,3B,HR,BB,SO,SB,CS,HBP,SF,RA,ER,ERA,CG,SHO,SV,IPouts,HA,HRA,BBA,SOA,E,DP,FP,name,park,attendance,BPF,PPF,teamIDBR,teamIDlahman45,teamIDretro,RDiff,SODiff,BBDiff,HRDiff,HDiff,WP
719,1921.0,AL,BOS,BOS,,5.0,154.0,77.0,75.0,79.0,,,N,N,668.0,5206.0,1440.0,248.0,69.0,17.0,428.0,344.0,83.0,65.0,,,696.0,603.0,3.98,88.0,9.0,5.0,4092.0,1521.0,53.0,452.0,446.0,157.0,151.0,0.975,Boston Red Sox,Fenway Park I,279273.0,97.0,99.0,BOS,BOS,BOS,-28.0,-102.0,-24.0,-36.0,-81.0,0.487013
720,1921.0,NL,BRO,LAD,,5.0,152.0,78.0,77.0,75.0,,,N,N,667.0,5263.0,1476.0,209.0,85.0,59.0,325.0,400.0,91.0,73.0,,,681.0,560.0,3.70,82.0,8.0,12.0,4089.0,1556.0,46.0,361.0,471.0,232.0,142.0,0.964,Brooklyn Robins,Ebbets Field,613245.0,105.0,104.0,BRO,BRO,BRO,-14.0,-71.0,-36.0,13.0,-80.0,0.506579
721,1921.0,NL,BSN,ATL,,4.0,153.0,74.0,79.0,74.0,,,N,N,721.0,5385.0,1561.0,209.0,100.0,61.0,377.0,470.0,94.0,100.0,,,697.0,600.0,3.90,74.0,11.0,12.0,4155.0,1488.0,54.0,420.0,382.0,199.0,122.0,0.969,Boston Braves,Braves Field,318627.0,94.0,96.0,BSN,BSN,BSN,24.0,88.0,-43.0,7.0,73.0,0.516340
722,1921.0,AL,CHA,CHW,,7.0,154.0,77.0,62.0,92.0,,,N,N,683.0,5329.0,1509.0,242.0,82.0,35.0,445.0,474.0,97.0,93.0,,,858.0,749.0,4.94,84.0,7.0,9.0,4095.0,1603.0,52.0,549.0,392.0,199.0,155.0,0.969,Chicago White Sox,Comiskey Park,543650.0,98.0,98.0,CHW,CHA,CHA,-175.0,82.0,-104.0,-17.0,-94.0,0.402597
723,1921.0,NL,CHN,CHC,,7.0,153.0,76.0,64.0,89.0,,,N,N,668.0,5321.0,1553.0,234.0,56.0,37.0,343.0,374.0,70.0,97.0,,,773.0,665.0,4.39,73.0,7.0,7.0,4089.0,1605.0,67.0,409.0,441.0,166.0,129.0,0.974,Chicago Cubs,Wrigley Field,410107.0,100.0,101.0,CHC,CHN,CHN,-105.0,-67.0,-66.0,-30.0,-52.0,0.418301
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2980,2021.0,NL,SLN,STL,C,2.0,162.0,81.0,90.0,72.0,N,Y,N,N,706.0,5351.0,1303.0,261.0,22.0,198.0,478.0,1341.0,89.0,22.0,86.0,44.0,672.0,626.0,3.98,3.0,15.0,50.0,4251.0,1234.0,152.0,608.0,1225.0,84.0,137.0,0.986,St. Louis Cardinals,Busch Stadium III,2102530.0,92.0,92.0,STL,SLN,SLN,34.0,116.0,-130.0,46.0,69.0,0.555556
2981,2021.0,AL,TBA,TBD,E,1.0,162.0,81.0,100.0,62.0,Y,N,N,N,857.0,5507.0,1336.0,288.0,36.0,222.0,585.0,1542.0,88.0,42.0,72.0,41.0,651.0,593.0,3.67,1.0,13.0,42.0,4367.0,1264.0,184.0,436.0,1478.0,80.0,130.0,0.986,Tampa Bay Rays,Tropicana Field,761072.0,92.0,91.0,TBR,TBA,TBA,206.0,64.0,149.0,38.0,72.0,0.617284
2982,2021.0,AL,TEX,TEX,W,5.0,162.0,81.0,60.0,102.0,N,N,N,N,625.0,5405.0,1254.0,225.0,24.0,167.0,433.0,1381.0,106.0,29.0,58.0,31.0,815.0,758.0,4.79,0.0,3.0,31.0,4273.0,1402.0,232.0,513.0,1239.0,83.0,146.0,0.986,Texas Rangers,Globe Life Field,2110258.0,99.0,101.0,TEX,TEX,TEX,-190.0,142.0,-80.0,-65.0,-148.0,0.370370
2983,2021.0,AL,TOR,TOR,E,4.0,162.0,80.0,91.0,71.0,N,N,N,N,846.0,5476.0,1455.0,285.0,13.0,262.0,496.0,1218.0,81.0,20.0,51.0,35.0,663.0,610.0,3.91,1.0,14.0,34.0,4216.0,1257.0,209.0,473.0,1468.0,90.0,122.0,0.984,Toronto Blue Jays,Sahlen Field,805901.0,102.0,101.0,TOR,TOR,TOR,183.0,-250.0,23.0,53.0,198.0,0.561728


In [52]:
deadball = liveball_era.dropna(axis=0, how='all')
deadball

Unnamed: 0,yearID,lgID,teamID,franchID,divID,Rank,G,Ghome,W,L,DivWin,WCWin,LgWin,WSWin,R,AB,H,2B,3B,HR,BB,SO,SB,CS,HBP,SF,RA,ER,ERA,CG,SHO,SV,IPouts,HA,HRA,BBA,SOA,E,DP,FP,name,park,attendance,BPF,PPF,teamIDBR,teamIDlahman45,teamIDretro,RDiff,SODiff,BBDiff,HRDiff,HDiff,WP
0,1871.0,,BS1,BNA,,3.0,31.0,,20.0,10.0,,,N,,401.0,1372.0,426.0,70.0,37.0,3.0,60.0,19.0,73.0,16.0,,,303.0,109.0,3.55,22.0,1.0,3.0,828.0,367.0,2.0,42.0,23.0,243.0,24.0,0.834,Boston Red Stockings,South End Grounds I,,103.0,98.0,BOS,BS1,BS1,98.0,-4.0,18.0,1.0,59.0,0.666667
1,1871.0,,CH1,CNA,,2.0,28.0,,19.0,9.0,,,N,,302.0,1196.0,323.0,52.0,21.0,10.0,60.0,22.0,69.0,21.0,,,241.0,77.0,2.76,25.0,0.0,1.0,753.0,308.0,6.0,28.0,22.0,229.0,16.0,0.829,Chicago White Stockings,Union Base-Ball Grounds,,104.0,102.0,CHI,CH1,CH1,61.0,0.0,32.0,4.0,15.0,0.678571
2,1871.0,,CL1,CFC,,8.0,29.0,,10.0,19.0,,,N,,249.0,1186.0,328.0,35.0,40.0,7.0,26.0,25.0,18.0,8.0,,,341.0,116.0,4.11,23.0,0.0,0.0,762.0,346.0,13.0,53.0,34.0,234.0,15.0,0.818,Cleveland Forest Citys,National Association Grounds,,96.0,100.0,CLE,CL1,CL1,-92.0,-9.0,-27.0,-6.0,-18.0,0.344828
3,1871.0,,FW1,KEK,,7.0,19.0,,7.0,12.0,,,N,,137.0,746.0,178.0,19.0,8.0,2.0,33.0,9.0,16.0,4.0,,,243.0,97.0,5.17,19.0,1.0,0.0,507.0,261.0,5.0,21.0,17.0,163.0,8.0,0.803,Fort Wayne Kekiongas,Hamilton Field,,101.0,107.0,KEK,FW1,FW1,-106.0,-8.0,12.0,-3.0,-83.0,0.368421
4,1871.0,,NY2,NNA,,5.0,33.0,,16.0,17.0,,,N,,302.0,1404.0,403.0,43.0,21.0,1.0,33.0,15.0,46.0,15.0,,,313.0,121.0,3.72,32.0,1.0,0.0,879.0,373.0,7.0,42.0,22.0,235.0,14.0,0.840,New York Mutuals,Union Grounds (Brooklyn),,90.0,88.0,NYU,NY2,NY2,-11.0,-7.0,-9.0,-6.0,30.0,0.484848
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,1920.0,NL,PHI,PHI,,8.0,153.0,77.0,62.0,91.0,,,N,N,565.0,5264.0,1385.0,229.0,54.0,64.0,283.0,531.0,100.0,83.0,,,714.0,557.0,3.63,77.0,8.0,11.0,4140.0,1480.0,35.0,444.0,419.0,232.0,135.0,0.964,Philadelphia Phillies,Baker Bowl,330998.0,104.0,109.0,PHI,PHI,PHI,-149.0,112.0,-161.0,29.0,-95.0,0.405229
715,1920.0,NL,PIT,PIT,,4.0,155.0,78.0,79.0,75.0,,,N,N,530.0,5219.0,1342.0,162.0,90.0,16.0,374.0,405.0,181.0,117.0,,,552.0,454.0,2.89,92.0,17.0,10.0,4245.0,1389.0,25.0,280.0,444.0,186.0,119.0,0.971,Pittsburgh Pirates,Forbes Field,429037.0,103.0,103.0,PIT,PIT,PIT,-22.0,-39.0,94.0,-9.0,-47.0,0.512987
716,1920.0,AL,SLA,BAL,,4.0,154.0,78.0,76.0,77.0,,,N,N,797.0,5358.0,1651.0,279.0,83.0,50.0,427.0,339.0,118.0,79.0,,,766.0,617.0,4.03,84.0,9.0,14.0,4134.0,1481.0,53.0,578.0,444.0,231.0,119.0,0.963,St. Louis Browns,Sportsman's Park IV,419311.0,103.0,103.0,SLB,SLA,SLA,31.0,-105.0,-151.0,-3.0,170.0,0.496732
717,1920.0,NL,SLN,STL,,5.0,155.0,76.0,75.0,79.0,,,N,N,675.0,5495.0,1589.0,238.0,96.0,32.0,373.0,484.0,126.0,114.0,,,682.0,543.0,3.43,72.0,9.0,12.0,4278.0,1488.0,30.0,479.0,529.0,256.0,136.0,0.961,St. Louis Cardinals,Robison Field/Sportsman's Park IV,326836.0,98.0,98.0,STL,SLN,SLN,-7.0,-45.0,-106.0,2.0,101.0,0.487013


Now let's split the live-ball era into groups of its three main playoff structures: No divisions, Divisions, and Divisions with a Wildcard round.

In [53]:
league_era = liveball[(liveball['LgWin'].notna()) & (liveball['DivWin'].isna())]
division_era = liveball[(liveball['LgWin'].notna()) & (liveball['DivWin'].notna()) & (liveball['WCWin'].isna())]
wildcard_era = liveball[(liveball['LgWin'].notna()) & (liveball['DivWin'].notna()) & (liveball['WCWin'].notna())]


----

### Meet the First Losers. And Their Friends.

In [85]:
league_era['yearID']

719     1921.0
720     1921.0
721     1921.0
722     1921.0
723     1921.0
         ...  
1512    1968.0
1513    1968.0
1514    1968.0
1515    1968.0
1516    1968.0
Name: yearID, Length: 798, dtype: float64

In [93]:
history[2012]

['ATL', 'BAL', 'CIN', 'DET', 'NYA', 'OAK', 'SFN', 'SLN', 'TEX', 'WAS']

In [98]:
losers = {}
season = 2012
for year in league_era['yearID']:
    try:
        playoff_teams = history[season]
        print('---')
        print(season)
        print('in')
        print(playoff_teams)
        league_era[league_era["yearID"] == season]
        playoff_missers = {
            season: league_era[~league_era["teamID"].isin(playoff_teams)]
        }
        print('out')
        print(playoff_missers.values())
        # losers.update(playoff_missers)
    except:
        pass
    season += 1

---
2012
in
['ATL', 'BAL', 'CIN', 'DET', 'NYA', 'OAK', 'SFN', 'SLN', 'TEX', 'WAS']
out
dict_values([      yearID lgID teamID franchID divID  Rank      G  Ghome     W     L  \
719   1921.0   AL    BOS      BOS   NaN   5.0  154.0   77.0  75.0  79.0   
720   1921.0   NL    BRO      LAD   NaN   5.0  152.0   78.0  77.0  75.0   
721   1921.0   NL    BSN      ATL   NaN   4.0  153.0   74.0  79.0  74.0   
722   1921.0   AL    CHA      CHW   NaN   7.0  154.0   77.0  62.0  92.0   
723   1921.0   NL    CHN      CHC   NaN   7.0  153.0   76.0  64.0  89.0   
...      ...  ...    ...      ...   ...   ...    ...    ...   ...   ...   
1508  1968.0   AL    MIN      MIN   NaN   7.0  162.0   81.0  79.0  83.0   
1510  1968.0   NL    NYN      NYM   NaN   9.0  163.0   82.0  73.0  89.0   
1512  1968.0   NL    PHI      PHI   NaN   7.0  162.0   81.0  76.0  86.0   
1513  1968.0   NL    PIT      PIT   NaN   6.0  163.0   81.0  80.0  82.0   
1516  1968.0   AL    WS2      TEX   NaN  10.0  161.0   81.0  65.0  96.0   


In [91]:
losers.keys()

dict_keys([])

In [66]:
league_era.merge(pd.DataFrame([{'yearID': k, 'year': i} for k, v in history.items() for i in v]))

Unnamed: 0,yearID,lgID,teamID,franchID,divID,Rank,G,Ghome,W,L,DivWin,WCWin,LgWin,WSWin,R,AB,H,2B,3B,HR,BB,SO,SB,CS,HBP,SF,RA,ER,ERA,CG,SHO,SV,IPouts,HA,HRA,BBA,SOA,E,DP,FP,name,park,attendance,BPF,PPF,teamIDBR,teamIDlahman45,teamIDretro,RDiff,SODiff,BBDiff,HRDiff,HDiff,WP,year
0,1921.0,AL,BOS,BOS,,5.0,154.0,77.0,75.0,79.0,,,N,N,668.0,5206.0,1440.0,248.0,69.0,17.0,428.0,344.0,83.0,65.0,,,696.0,603.0,3.98,88.0,9.0,5.0,4092.0,1521.0,53.0,452.0,446.0,157.0,151.0,0.975,Boston Red Sox,Fenway Park I,279273.0,97.0,99.0,BOS,BOS,BOS,-28.0,-102.0,-24.0,-36.0,-81.0,0.487013,NY1
1,1921.0,AL,BOS,BOS,,5.0,154.0,77.0,75.0,79.0,,,N,N,668.0,5206.0,1440.0,248.0,69.0,17.0,428.0,344.0,83.0,65.0,,,696.0,603.0,3.98,88.0,9.0,5.0,4092.0,1521.0,53.0,452.0,446.0,157.0,151.0,0.975,Boston Red Sox,Fenway Park I,279273.0,97.0,99.0,BOS,BOS,BOS,-28.0,-102.0,-24.0,-36.0,-81.0,0.487013,NYA
2,1921.0,NL,BRO,LAD,,5.0,152.0,78.0,77.0,75.0,,,N,N,667.0,5263.0,1476.0,209.0,85.0,59.0,325.0,400.0,91.0,73.0,,,681.0,560.0,3.70,82.0,8.0,12.0,4089.0,1556.0,46.0,361.0,471.0,232.0,142.0,0.964,Brooklyn Robins,Ebbets Field,613245.0,105.0,104.0,BRO,BRO,BRO,-14.0,-71.0,-36.0,13.0,-80.0,0.506579,NY1
3,1921.0,NL,BRO,LAD,,5.0,152.0,78.0,77.0,75.0,,,N,N,667.0,5263.0,1476.0,209.0,85.0,59.0,325.0,400.0,91.0,73.0,,,681.0,560.0,3.70,82.0,8.0,12.0,4089.0,1556.0,46.0,361.0,471.0,232.0,142.0,0.964,Brooklyn Robins,Ebbets Field,613245.0,105.0,104.0,BRO,BRO,BRO,-14.0,-71.0,-36.0,13.0,-80.0,0.506579,NYA
4,1921.0,NL,BSN,ATL,,4.0,153.0,74.0,79.0,74.0,,,N,N,721.0,5385.0,1561.0,209.0,100.0,61.0,377.0,470.0,94.0,100.0,,,697.0,600.0,3.90,74.0,11.0,12.0,4155.0,1488.0,54.0,420.0,382.0,199.0,122.0,0.969,Boston Braves,Braves Field,318627.0,94.0,96.0,BSN,BSN,BSN,24.0,88.0,-43.0,7.0,73.0,0.516340,NY1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1591,1968.0,NL,SFN,SFG,,2.0,163.0,81.0,88.0,74.0,,,N,N,599.0,5441.0,1301.0,162.0,33.0,108.0,508.0,904.0,50.0,37.0,,,529.0,442.0,2.71,77.0,20.0,16.0,4407.0,1302.0,86.0,344.0,942.0,162.0,125.0,0.975,San Francisco Giants,Candlestick Park,837220.0,101.0,100.0,SFG,SFN,SFN,70.0,-38.0,164.0,22.0,-1.0,0.543210,SLN
1592,1968.0,NL,SLN,STL,,1.0,162.0,81.0,97.0,65.0,,,Y,N,583.0,5561.0,1383.0,227.0,48.0,73.0,378.0,897.0,110.0,45.0,,,472.0,409.0,2.49,63.0,30.0,32.0,4437.0,1282.0,82.0,375.0,971.0,140.0,135.0,0.978,St. Louis Cardinals,Busch Stadium II,2011167.0,99.0,97.0,STL,SLN,SLN,111.0,-74.0,3.0,-9.0,101.0,0.598765,DET
1593,1968.0,NL,SLN,STL,,1.0,162.0,81.0,97.0,65.0,,,Y,N,583.0,5561.0,1383.0,227.0,48.0,73.0,378.0,897.0,110.0,45.0,,,472.0,409.0,2.49,63.0,30.0,32.0,4437.0,1282.0,82.0,375.0,971.0,140.0,135.0,0.978,St. Louis Cardinals,Busch Stadium II,2011167.0,99.0,97.0,STL,SLN,SLN,111.0,-74.0,3.0,-9.0,101.0,0.598765,SLN
1594,1968.0,AL,WS2,TEX,,10.0,161.0,81.0,65.0,96.0,,,N,N,524.0,5400.0,1208.0,160.0,37.0,124.0,454.0,960.0,29.0,19.0,,,665.0,582.0,3.64,26.0,11.0,28.0,4317.0,1402.0,118.0,517.0,826.0,148.0,144.0,0.976,Washington Senators,R.F.K. Stadium,546661.0,97.0,98.0,WSA,WS2,WS2,-141.0,134.0,-63.0,6.0,-194.0,0.403727,DET


##### Old

In [54]:
wildcard_era_first_losers = wildcard_era.merge(series_post_sorted, left_on='teamIDBR', right_on='teamIDloser')
wildcard_era_first_losers = wildcard_era_first_losers.merge(series_post_sorted, left_on='teamIDBR', right_on='teamIDwinner')

In [55]:
wildcard_era_first_losers

Unnamed: 0,yearID_x,lgID,teamID,franchID,divID,Rank,G,Ghome,W,L,DivWin,WCWin,LgWin,WSWin,R,AB,H,2B,3B,HR,BB,SO,SB,CS,HBP,SF,RA,ER,ERA,CG,SHO,SV,IPouts,HA,HRA,BBA,SOA,E,DP,FP,name,park,attendance,BPF,PPF,teamIDBR,teamIDlahman45,teamIDretro,RDiff,SODiff,BBDiff,HRDiff,HDiff,WP,yearID_y,round_x,teamIDwinner_x,lgIDwinner_x,teamIDloser_x,lgIDloser_x,wins_x,losses_x,ties_x,yearID,round_y,teamIDwinner_y,lgIDwinner_y,teamIDloser_y,lgIDloser_y,wins_y,losses_y,ties_y
0,1995.0,NL,ATL,ATL,E,1.0,144.0,72.0,90.0,54.0,Y,N,Y,Y,645.0,4814.0,1202.0,210.0,27.0,168.0,520.0,933.0,73.0,43.0,40.0,34.0,540.0,494.0,3.44,18.0,11.0,34.0,3875.0,1184.0,107.0,436.0,1087.0,100.0,113.0,0.982,Atlanta Braves,Atlanta-Fulton County Stadium,2561831.0,103.0,102.0,ATL,ATL,ATL,105.0,-154.0,84.0,61.0,18.0,0.625000,1969,NLCS,NYN,NL,ATL,NL,3,0,0,1991,NLCS,ATL,NL,PIT,NL,4,3,0
1,1995.0,NL,ATL,ATL,E,1.0,144.0,72.0,90.0,54.0,Y,N,Y,Y,645.0,4814.0,1202.0,210.0,27.0,168.0,520.0,933.0,73.0,43.0,40.0,34.0,540.0,494.0,3.44,18.0,11.0,34.0,3875.0,1184.0,107.0,436.0,1087.0,100.0,113.0,0.982,Atlanta Braves,Atlanta-Fulton County Stadium,2561831.0,103.0,102.0,ATL,ATL,ATL,105.0,-154.0,84.0,61.0,18.0,0.625000,1969,NLCS,NYN,NL,ATL,NL,3,0,0,1992,NLCS,ATL,NL,PIT,NL,4,3,0
2,1995.0,NL,ATL,ATL,E,1.0,144.0,72.0,90.0,54.0,Y,N,Y,Y,645.0,4814.0,1202.0,210.0,27.0,168.0,520.0,933.0,73.0,43.0,40.0,34.0,540.0,494.0,3.44,18.0,11.0,34.0,3875.0,1184.0,107.0,436.0,1087.0,100.0,113.0,0.982,Atlanta Braves,Atlanta-Fulton County Stadium,2561831.0,103.0,102.0,ATL,ATL,ATL,105.0,-154.0,84.0,61.0,18.0,0.625000,1969,NLCS,NYN,NL,ATL,NL,3,0,0,1995,NLCS,ATL,NL,CIN,NL,4,0,0
3,1995.0,NL,ATL,ATL,E,1.0,144.0,72.0,90.0,54.0,Y,N,Y,Y,645.0,4814.0,1202.0,210.0,27.0,168.0,520.0,933.0,73.0,43.0,40.0,34.0,540.0,494.0,3.44,18.0,11.0,34.0,3875.0,1184.0,107.0,436.0,1087.0,100.0,113.0,0.982,Atlanta Braves,Atlanta-Fulton County Stadium,2561831.0,103.0,102.0,ATL,ATL,ATL,105.0,-154.0,84.0,61.0,18.0,0.625000,1969,NLCS,NYN,NL,ATL,NL,3,0,0,1995,NLDS1,ATL,NL,COL,NL,3,1,0
4,1995.0,NL,ATL,ATL,E,1.0,144.0,72.0,90.0,54.0,Y,N,Y,Y,645.0,4814.0,1202.0,210.0,27.0,168.0,520.0,933.0,73.0,43.0,40.0,34.0,540.0,494.0,3.44,18.0,11.0,34.0,3875.0,1184.0,107.0,436.0,1087.0,100.0,113.0,0.982,Atlanta Braves,Atlanta-Fulton County Stadium,2561831.0,103.0,102.0,ATL,ATL,ATL,105.0,-154.0,84.0,61.0,18.0,0.625000,1969,NLCS,NYN,NL,ATL,NL,3,0,0,1995,WS,ATL,NL,CLE,AL,4,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56159,2017.0,NL,MIA,FLA,E,2.0,162.0,78.0,77.0,85.0,N,N,N,N,778.0,5602.0,1497.0,271.0,31.0,194.0,486.0,1282.0,91.0,30.0,67.0,41.0,822.0,772.0,4.82,1.0,7.0,34.0,4328.0,1450.0,193.0,627.0,1202.0,73.0,156.0,0.988,Miami Marlins,Marlins Park,1583014.0,93.0,93.0,MIA,FLO,MIA,-44.0,80.0,-141.0,1.0,47.0,0.475309,2020,NLDS2,ATL,NL,MIA,NL,3,0,0,2020,NLWC3,MIA,NL,CHN,NL,2,0,0
56160,2018.0,NL,MIA,FLA,E,5.0,161.0,81.0,63.0,98.0,N,N,N,N,589.0,5488.0,1303.0,222.0,24.0,128.0,455.0,1384.0,45.0,31.0,73.0,31.0,809.0,762.0,4.76,1.0,12.0,30.0,4326.0,1388.0,192.0,605.0,1249.0,83.0,133.0,0.986,Miami Marlins,Marlins Park,811104.0,89.0,90.0,MIA,FLO,MIA,-220.0,135.0,-150.0,-64.0,-85.0,0.391304,2020,NLDS2,ATL,NL,MIA,NL,3,0,0,2020,NLWC3,MIA,NL,CHN,NL,2,0,0
56161,2019.0,NL,MIA,FLA,E,5.0,162.0,81.0,57.0,105.0,N,N,N,N,615.0,5512.0,1326.0,265.0,18.0,146.0,395.0,1469.0,55.0,30.0,73.0,33.0,808.0,760.0,4.74,2.0,8.0,27.0,4333.0,1340.0,236.0,615.0,1378.0,94.0,135.0,0.984,Miami Marlins,Marlins Park,811302.0,94.0,96.0,MIA,FLO,MIA,-193.0,91.0,-220.0,-90.0,-14.0,0.351852,2020,NLDS2,ATL,NL,MIA,NL,3,0,0,2020,NLWC3,MIA,NL,CHN,NL,2,0,0
56162,2020.0,NL,MIA,FLA,E,2.0,60.0,26.0,31.0,29.0,N,Y,N,N,263.0,1935.0,472.0,82.0,5.0,60.0,191.0,537.0,51.0,14.0,25.0,9.0,304.0,272.0,4.86,1.0,0.0,18.0,1512.0,506.0,82.0,226.0,451.0,39.0,60.0,0.981,Miami Marlins,Marlins Park,0.0,97.0,99.0,MIA,FLO,MIA,-41.0,86.0,-35.0,-22.0,-34.0,0.516667,2020,NLDS2,ATL,NL,MIA,NL,3,0,0,2020,NLWC3,MIA,NL,CHN,NL,2,0,0


In [406]:
first_losers.tail()

Unnamed: 0,yearID,lgID,teamID,franchID,divID,Rank,G,Ghome,W,L,DivWin,WCWin,LgWin,WSWin,R,AB,H,2B,3B,HR,BB,SO,SB,CS,HBP,SF,RA,ER,ERA,CG,SHO,SV,IPouts,HA,HRA,BBA,SOA,E,DP,FP,name,park,attendance,BPF,PPF,teamIDBR,teamIDlahman45,teamIDretro,RDiff,WP
2302,1999,NL,CIN,CIN,C,2,163,82.0,96,67,N,N,N,N,865,5649,1536,312,37,209,569.0,1125.0,164.0,54.0,45.0,44.0,711,647,3.98,6,11,55,4386,1309,190,636,1081,105,139,0.983,Cincinnati Reds,Cinergy Field,2061222.0,103,103,CIN,CIN,CIN,154,0.588957
2409,2002,AL,SEA,SEA,W,3,162,81.0,93,69,N,N,N,N,814,5569,1531,285,31,152,629.0,1003.0,137.0,58.0,51.0,72.0,699,654,4.07,8,12,43,4336,1422,178,441,1063,88,134,0.985,Seattle Mariners,Safeco Field,3542938.0,97,95,SEA,SEA,SEA,115,0.574074
2389,2002,AL,BOS,BOS,E,2,162,81.0,93,69,N,N,N,N,859,5640,1560,348,33,177,545.0,944.0,80.0,28.0,72.0,53.0,665,603,3.75,5,17,51,4338,1339,146,430,1157,104,140,0.983,Boston Red Sox,Fenway Park II,2650862.0,103,102,BOS,BOS,BOS,194,0.574074
2902,2019,AL,CLE,CLE,C,2,162,81.0,93,69,N,N,N,N,769,5425,1354,286,18,223,563.0,1332.0,103.0,35.0,50.0,46.0,657,601,3.76,6,16,42,4313,1308,207,450,1508,83,110,0.985,Cleveland Indians,Progressive Field,1738642.0,104,102,CLE,CLE,CLE,112,0.574074
2482,2005,AL,CLE,CLE,C,2,162,81.0,93,69,N,N,N,N,790,5609,1522,337,30,207,503.0,1093.0,62.0,36.0,54.0,50.0,642,582,3.61,6,10,51,4358,1363,157,413,1050,106,156,0.983,Cleveland Indians,Jacobs Field,2013763.0,96,96,CLE,CLE,CLE,148,0.574074
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2725,2013,AL,HOU,HOU,W,5,162,81.0,51,111,N,N,N,N,610,5457,1307,266,16,148,426.0,1535.0,110.0,61.0,52.0,38.0,848,766,4.79,2,5,32,4320,1530,191,616,1084,125,168,0.979,Houston Astros,Minute Maid Park,1651883.0,99,101,HOU,HOU,HOU,-238,0.314815
2446,2004,NL,ARI,ARI,W,5,162,81.0,51,111,N,N,N,N,615,5544,1401,295,38,135,441.0,1022.0,53.0,32.0,35.0,37.0,899,794,4.98,5,6,33,4308,1480,197,668,1153,139,144,0.977,Arizona Diamondbacks,Bank One Ballpark,2519560.0,105,107,ARI,ARI,ARI,-284,0.314815
2904,2019,AL,DET,DET,C,5,161,81.0,47,114,N,N,N,N,582,5549,1333,292,41,149,391.0,1595.0,57.0,20.0,48.0,42.0,915,835,5.24,0,3,31,4299,1555,250,536,1368,110,127,0.981,Detroit Tigers,Comerica Park,1501430.0,102,104,DET,DET,DET,-333,0.291925
2867,2018,AL,BAL,BAL,E,5,162,81.0,47,115,N,N,N,N,622,5507,1317,242,15,188,422.0,1412.0,81.0,22.0,57.0,35.0,892,824,5.18,2,7,28,4293,1552,234,589,1203,104,159,0.982,Baltimore Orioles,Oriole Park at Camden Yards,1564192.0,96,97,BAL,BAL,BAL,-270,0.290123


In [395]:
df_teams[(df_teams['yearID'] == 1989) & (df_teams['DivWin'] == 'Y')]
df_teams[(df_teams['yearID'] == 1989) & (df_teams['DivWin'] == 'Y')]
df_teams[(df_teams['yearID'] == 1989) & (df_teams['DivWin'] == 'Y')]

Unnamed: 0,yearID,lgID,teamID,franchID,divID,Rank,G,Ghome,W,L,DivWin,WCWin,LgWin,WSWin,R,AB,H,2B,3B,HR,BB,SO,SB,CS,HBP,SF,RA,ER,ERA,CG,SHO,SV,IPouts,HA,HRA,BBA,SOA,E,DP,FP,name,park,attendance,BPF,PPF,teamIDBR,teamIDlahman45,teamIDretro,RDiff
2026,1989,NL,CHN,CHC,E,1,162,81.0,93,69,Y,,N,N,702,5513,1438,235,45,124,472.0,921.0,136.0,57.0,26.0,50.0,623,556,3.43,18,10,55,4381,1369,106,532,918,124,130,0.98,Chicago Cubs,Wrigley Field,2491942.0,108,108,CHC,CHN,CHN,79
2038,1989,AL,OAK,OAK,W,1,162,81.0,99,63,Y,,Y,Y,712,5416,1414,220,25,127,562.0,855.0,157.0,55.0,34.0,62.0,576,497,3.09,17,20,57,4345,1287,103,510,930,129,159,0.979,Oakland Athletics,Oakland Coliseum,2667225.0,97,95,OAK,OAK,OAK,136
2043,1989,NL,SFN,SFG,W,1,162,81.0,92,70,Y,,Y,N,699,5469,1365,241,52,141,508.0,1071.0,87.0,54.0,40.0,39.0,600,535,3.3,12,16,47,4371,1320,120,471,802,114,135,0.982,San Francisco Giants,Candlestick Park,2059701.0,97,96,SFG,SFN,SFN,99
2046,1989,AL,TOR,TOR,E,1,162,81.0,89,73,Y,,N,N,731,5581,1449,265,40,142,521.0,923.0,144.0,58.0,31.0,53.0,651,584,3.58,12,12,38,4401,1408,99,478,849,127,164,0.98,Toronto Blue Jays,Exhibition Stadium /Skydome,3375883.0,94,94,TOR,TOR,TOR,80


KeyError: "['NL', 'SDN', 'SDP', 6, 162, 81.0, 65, 97, 'N', nan, 668, 5456, 1419, 209, 48, 113, 577.0, 992.0, 198.0, 91.0, 27.0, 36.0, 763, 680, 4.27, 14, 10, 33, 4300, 1402, 175, 602, 897, 147, 135, 0.976, 'San Diego Padres', 'Jack Murphy Stadium', 1454061.0, 96] not in index"

In [376]:
df_teams = df_teams.set_index(['yearID', 'lgID', 'divID'])

KeyError: 'PIT'

### Meet the Wonkaville Huskies

In [132]:
df_huskiesBatters = df_batters.loc[df_batters.BMI >= 34.55]

In [133]:
df_huskiesBatters.sort_values('BMI').describe()

Unnamed: 0,birthYear,birthMonth,birthDay,deathYear,deathMonth,deathDay,weight,height,yearID,stint,...,BB,SO,IBB,HBP,SH,SF,GIDP,KG,meters,BMI
count,26.0,25.0,25.0,5.0,5.0,5.0,26.0,26.0,26.0,26.0,...,26.0,22.0,0.0,24.0,17.0,0.0,1.0,26.0,26.0,26.0
mean,1969.153846,5.8,17.52,1952.4,7.6,8.6,269.961538,72.115385,1901.730769,1.115385,...,9.884615,14.909091,,0.875,6.0,,0.0,122.452394,1.831731,36.408324
std,40.878055,3.316625,8.529947,49.45503,3.646917,7.602631,30.1801,4.348121,14.17761,0.325813,...,16.310308,13.606403,,1.650099,8.951257,,,13.689452,0.110442,1.728873
min,1853.0,1.0,1.0,1891.0,2.0,2.0,155.0,55.0,1872.0,1.0,...,0.0,0.0,,0.0,0.0,,0.0,70.30676,1.397,34.622243
25%,1977.0,4.0,11.0,1915.0,6.0,2.0,261.25,71.0,1890.25,1.0,...,1.0,3.25,,0.0,0.0,,0.0,118.50091,1.8034,34.891189
50%,1983.5,6.0,18.0,1966.0,9.0,6.0,270.0,72.0,1903.5,1.0,...,3.5,11.0,,0.0,2.0,,0.0,122.46984,1.8288,35.875979
75%,1989.0,8.0,24.0,1975.0,10.0,14.0,283.75,75.0,1912.0,1.0,...,9.25,25.25,,1.0,7.0,,0.0,128.70673,1.905,37.928366
max,1998.0,12.0,30.0,2015.0,11.0,19.0,320.0,78.0,1924.0,2.0,...,68.0,43.0,,6.0,33.0,,0.0,145.14944,1.9812,40.292666


In [134]:
df_huskiesPitchers = df[df_pitchers.BMI > 34.55]

NameError: name 'df' is not defined

In [None]:
df_huskiesPitchers.head()

In [None]:
df_huskies = pd.merge(df_huskiesBatters, df_huskiesPitchers, how='right', on='playerID')

In [None]:
df_huskies.columns

In [None]:
sns.jointplot(data=df_simple, x="height", y="weight", kind = "reg", truncate = False)

In [None]:
sns.choose_diverging_palette()

In [None]:

# Compute the correlation matrix
corr = df_huskiesBatters.corr(method="spearman")

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(290, 10, n=40, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(
    corr,
    mask=mask,
    cmap=cmap,
    vmax=1,
    center=0,
    square=True,
    linewidths=0.25,
    cbar_kws={"shrink": .5},
)



In [None]:
corr_mat = df.corr().stack().reset_index(name="correlation")