To determine the best players, position players had to play a minimum of 50 games at the position they were selected for.  Pitchers had to have at least 10 plate appearances to be selected.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
my_90s_team = {"Player": ["Allen Watson", "Michael Barrett", "Jeff Bagwell", "Chuck Knoblauch", "Corey Koskie", "Derek Jeter", "Barry Bonds", "Preston Wilson", "Bobby Higginson"], "Position": ["P", "C", "1B", "2B", "3B", "SS", "LF", "CF", "RF"], "Year": ["1995", "1999", "1994", "1992", "1999", "1996", "1990", "1999", "1996"], "Salary": [155000, 200500, 2525000, 325000, 200000, 130000, 850000, 201000, 170000], "OBP": [0.447, 0.345, 0.451, 0.384, 0.387, 0.370, 0.406, 0.350, 0.404]}
my_00s_team = {"Player": ["Wes Obermueller", "Brian McCann", "Ryan Howard", "Brian Roberts", "Garett Atkins", "Hanley Ramirez", "Denard Span", "B.J. Upton", "Nick Markakis"], "Position": ["P", "C", "1B", "2B", "3B", "SS", "LF", "CF", "RF"], "Year": ["2004", "2006", "2006", "2005", "2006", "2008", "2009", "2007", "2008"], "Salary": [314000, 333500, 355000, 390000, 335000, 439000, 435000, 386900, 455000], "OBP": [0.400, 0.388, 0.425, 0.387, 0.409, 0.400, 0.392, 0.386, 0.406]}

In [3]:
batting = pd.read_csv("data/Batting.csv")
salaries = pd.read_csv("data/Salaries.csv")
master = pd.read_csv("data/Master.csv")
appearances = pd.read_csv("data/Appearances.csv")

In [4]:
master = master.drop(["birthYear", "birthMonth", "birthDay", "birthCountry", "birthState", "birthCity", "deathYear", "deathMonth", "deathDay", "weight", "height", "bats", "throws", "debut", "finalGame", "retroID", "bbrefID"], axis=1)
master = master.drop(["deathCountry", "deathState", "deathCity", "nameGiven"], axis=1)

In [5]:
batting_90s = batting[batting.yearID.isin([1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999])]
salaries_90s = salaries[salaries.yearID.isin([1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999])]
appearances_90s = appearances[appearances.yearID.isin([1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999])]
everything_90s = pd.merge(batting_90s, salaries_90s, how="inner", left_on=["yearID", "playerID"], right_on=["yearID", "playerID"])
everything_90s = pd.merge(everything_90s, appearances_90s, how="inner", left_on=["yearID", "playerID"], right_on=["yearID", "playerID"])

In [6]:
everything_90s = everything_90s.set_index(["playerID"])
everything_90s = everything_90s.drop(["stint", "teamID_x", "lgID_x", "lgID_y"], axis=1)
everything_90s = everything_90s.drop(["teamID", "lgID", "G_all", "GS", "G_batting", "G_defense", "G_pr", "G_ph", "G_dh"], axis=1)
everything_90s = everything_90s.drop(["R", "2B", "3B", "HR", "RBI", "SB", "CS", "SO", "SH", "GIDP", "IBB"], axis=1)
everything_90s = everything_90s.sort(axis=0)
everything_90s["OBP"] = (everything_90s.H + everything_90s.HBP + everything_90s.BB) / (everything_90s.AB + everything_90s.BB + everything_90s.HBP +everything_90s.SF)
everything_90s = everything_90s[pd.notnull(everything_90s['OBP'])]
everything_90s = everything_90s.drop_duplicates(["yearID", "G", "AB"])
everything_90s = everything_90s.drop(["H", "BB", "HBP", "SF"], axis=1)

In [7]:
everything_hitters_90s = everything_90s.drop(everything_90s[everything_90s.G < 100].index)
everything_hitters_90s = pd.merge(everything_hitters_90s, master, how="inner", left_index=True, right_on="playerID")
everything_hitters_90s = everything_hitters_90s.set_index(["playerID"])
everything_hitters_90s = everything_hitters_90s.sort(columns='OBP', axis=0, ascending=False)
everything_hitters_90s = everything_hitters_90s.drop(["teamID_y", "G_p", "G_of"], axis=1)

In [8]:
hit_90s_c = everything_hitters_90s[["yearID", "G", "AB", "G_c", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_1b = everything_hitters_90s[["yearID", "G", "AB", "G_1b", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_2b = everything_hitters_90s[["yearID", "G", "AB", "G_2b", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_3b = everything_hitters_90s[["yearID", "G", "AB", "G_3b", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_ss = everything_hitters_90s[["yearID", "G", "AB", "G_ss", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_lf = everything_hitters_90s[["yearID", "G", "AB", "G_lf", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_cf = everything_hitters_90s[["yearID", "G", "AB", "G_cf", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_rf = everything_hitters_90s[["yearID", "G", "AB", "G_rf", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_c = hit_90s_c.drop(hit_90s_c[hit_90s_c.G_c < 50].index)
hit_90s_1b = hit_90s_1b.drop(hit_90s_1b[hit_90s_1b.G_1b < 50].index)
hit_90s_2b = hit_90s_2b.drop(hit_90s_2b[hit_90s_2b.G_2b < 50].index)
hit_90s_3b = hit_90s_3b.drop(hit_90s_3b[hit_90s_3b.G_3b < 50].index)
hit_90s_ss = hit_90s_ss.drop(hit_90s_ss[hit_90s_ss.G_ss < 50].index)
hit_90s_lf = hit_90s_lf.drop(hit_90s_lf[hit_90s_lf.G_lf < 50].index)
hit_90s_cf = hit_90s_cf.drop(hit_90s_cf[hit_90s_cf.G_cf < 50].index)
hit_90s_rf = hit_90s_rf.drop(hit_90s_rf[hit_90s_rf.G_rf < 50].index)

In [9]:
pitchers_90s = everything_90s.drop(everything_90s[everything_90s.G_p <= 1].index)
pitchers_90s = pitchers_90s.drop(["G_c", "G_1b", "G_c", "G_2b", "G_3b", "G_ss", "G_lf", "G_cf", "G_rf", "G_of"], axis=1)
pitchers_90s = pd.merge(pitchers_90s, master, how="inner", left_index=True, right_on="playerID")
pitchers_90s = pitchers_90s.set_index(["playerID"])
pitchers_90s = pitchers_90s.sort(columns="OBP", axis=0, ascending=False)
pitchers_90s = pitchers_90s.drop(pitchers_90s[pitchers_90s.AB < 10].index)

In [10]:
with pd.option_context('display.max_rows', 999, 'display.max_columns', 20):
    print(hit_90s_rf)

           yearID    G   AB  G_rf nameFirst   nameLast   salary       OBP
playerID                                                                 
oneilpa01    1994  103  368    90      Paul    O'Neill  3858334  0.460497
gwynnto01    1994  110  419   105      Tony      Gwynn  3633333  0.453586
oneilpa01    1996  150  546   146      Paul    O'Neill  5300000  0.410606
gwynnto01    1997  149  592   143      Tony      Gwynn  4575000  0.409231
higgibo02    1996  130  440    57     Bobby  Higginson   170000  0.404297
gwynnto01    1995  135  535   133      Tony      Gwynn  4658334  0.403813
gwynnto01    1996  116  451   111      Tony      Gwynn  4300000  0.400402
oneilpa01    1997  149  553   146      Paul    O'Neill  5500000  0.398744
gwynnto01    1993  122  489   121      Tony      Gwynn  4333333  0.397749
oneilpa01    1995  127  460   107      Paul    O'Neill  2850000  0.386740
gwynnto01    1999  111  411   104      Tony      Gwynn  4400000  0.381166
higgibo02    1997  146  546    56     

In [11]:
batting_00s = batting[batting.yearID.isin([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009])]
salaries_00s = salaries[salaries.yearID.isin([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009])]
appearances_00s = appearances[appearances.yearID.isin([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009])]
everything_00s = pd.merge(batting_00s, salaries_00s, how="inner", left_on=["yearID", "playerID"], right_on=["yearID", "playerID"])
everything_00s = pd.merge(everything_00s, appearances_00s, how="inner", left_on=["yearID", "playerID"], right_on=["yearID", "playerID"])

In [12]:
everything_00s = everything_00s.set_index(["playerID"])
everything_00s = everything_00s.drop(["stint", "teamID_x", "lgID_x", "lgID_y"], axis=1)
everything_00s = everything_00s.drop(["teamID", "lgID", "G_all", "GS", "G_batting", "G_defense", "G_pr", "G_ph", "G_dh"], axis=1)
everything_00s = everything_00s.drop(["R", "2B", "3B", "HR", "RBI", "SB", "CS", "SO", "SH", "GIDP", "IBB"], axis=1)
everything_00s["OBP"] = (everything_00s.H + everything_00s.HBP + everything_00s.BB) / (everything_00s.AB + everything_00s.BB + everything_00s.HBP +everything_00s.SF)
everything_00s = everything_00s.sort(axis=0)
everything_00s = everything_00s[pd.notnull(everything_00s['OBP'])]
everything_00s = everything_00s.drop_duplicates(["yearID", "G", "AB"])
everything_00s = everything_00s.drop(["H", "BB", "HBP", "SF"], axis=1)

In [13]:
everything_hitters_00s = everything_00s.drop(everything_00s[everything_00s.G < 100].index)
everything_hitters_00s = pd.merge(everything_hitters_00s, master, how="inner", left_index=True, right_on="playerID")
everything_hitters_00s = everything_hitters_00s.set_index(["playerID"])
everything_hitters_00s = everything_hitters_00s.sort(columns='OBP', axis=0, ascending=False)
everything_hitters_00s = everything_hitters_00s.drop(["teamID_y", "G_p", "G_of"], axis=1)

In [14]:
hit_00s_c = everything_hitters_00s[["yearID", "G", "AB", "G_c", "nameFirst", "nameLast", "salary", "OBP"]]
hit_00s_1b = everything_hitters_00s[["yearID", "G", "AB", "G_1b", "nameFirst", "nameLast", "salary", "OBP"]]
hit_00s_2b = everything_hitters_00s[["yearID", "G", "AB", "G_2b", "nameFirst", "nameLast", "salary", "OBP"]]
hit_00s_3b = everything_hitters_00s[["yearID", "G", "AB", "G_3b", "nameFirst", "nameLast", "salary", "OBP"]]
hit_00s_ss = everything_hitters_00s[["yearID", "G", "AB", "G_ss", "nameFirst", "nameLast", "salary", "OBP"]]
hit_00s_lf = everything_hitters_00s[["yearID", "G", "AB", "G_lf", "nameFirst", "nameLast", "salary", "OBP"]]
hit_00s_cf = everything_hitters_00s[["yearID", "G", "AB", "G_cf", "nameFirst", "nameLast", "salary", "OBP"]]
hit_00s_rf = everything_hitters_00s[["yearID", "G", "AB", "G_rf", "nameFirst", "nameLast", "salary", "OBP"]]
hit_00s_c = hit_00s_c.drop(hit_00s_c[hit_00s_c.G_c < 50].index)
hit_00s_1b = hit_00s_1b.drop(hit_00s_1b[hit_00s_1b.G_1b < 50].index)
hit_00s_2b = hit_00s_2b.drop(hit_00s_2b[hit_00s_2b.G_2b < 50].index)
hit_00s_3b = hit_00s_3b.drop(hit_00s_3b[hit_00s_3b.G_3b < 50].index)
hit_00s_ss = hit_00s_ss.drop(hit_00s_ss[hit_00s_ss.G_ss < 50].index)
hit_00s_lf = hit_00s_lf.drop(hit_00s_lf[hit_00s_lf.G_lf < 50].index)
hit_00s_cf = hit_00s_cf.drop(hit_00s_cf[hit_00s_cf.G_cf < 50].index)
hit_00s_rf = hit_00s_rf.drop(hit_00s_rf[hit_00s_rf.G_rf < 50].index)

In [15]:
pitchers_00s = everything_00s.drop(everything_00s[everything_00s.G_p <= 1].index)
pitchers_00s = pitchers_00s.drop(["G_c", "G_1b", "G_c", "G_2b", "G_3b", "G_ss", "G_lf", "G_cf", "G_rf", "G_of"], axis=1)
pitchers_00s = pd.merge(pitchers_00s, master, how="inner", left_index=True, right_on="playerID")
pitchers_00s = pitchers_00s.set_index(["playerID"])
pitchers_00s = pitchers_00s.sort(columns="OBP", axis=0, ascending=False)
pitchers_00s = pitchers_00s.drop(pitchers_00s[pitchers_00s.AB < 10].index)

In [29]:
with pd.option_context('display.max_rows', 999, 'display.max_columns', 20):
    print(hit_00s_rf)

           yearID    G   AB  G_rf nameFirst     nameLast    salary       OBP
playerID                                                                    
markani01    2008  157  595   156      Nick     Markakis    455000  0.406026
hawpebr01    2007  152  516   142      Brad        Hawpe    403000  0.386777
hawpebr01    2009  145  501   141      Brad        Hawpe   5500000  0.384354
hawpebr01    2008  138  488   133      Brad        Hawpe   3925000  0.381371
ethiean01    2008  141  525   109     Andre       Ethier    424500  0.374790
uptonju01    2009  138  526   136    Justin        Upton    412000  0.366269
darrmi02     2001  105  289    69      Mike         Darr    215000  0.362538
markani01    2007  161  637   161      Nick     Markakis    400000  0.362482
ethiean01    2009  160  596   158     Andre       Ethier   3100000  0.360584
uptonju01    2008  108  356   101    Justin        Upton    393000  0.352518
markani01    2006  147  491   127      Nick     Markakis    327000  0.350649

In [17]:
pitchers_00s[0:100]

Unnamed: 0_level_0,yearID,G,AB,teamID_y,salary,G_p,OBP,nameFirst,nameLast
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
obermwe01,2004,26,39,MIL,314000,25,0.400000,Wes,Obermueller
daalom01,2000,12,18,ARI,5625000,12,0.380952,Omar,Daal
karlsc01,2000,17,14,COL,3700000,6,0.375000,Scott,Karl
hudsolu01,2005,21,25,CIN,318000,19,0.370370,Luke,Hudson
hamptmi01,2002,36,64,COL,9503543,30,0.353846,Mike,Hampton
vargaja01,2006,12,16,FLO,327000,12,0.352941,Jason,Vargas
owingmi01,2008,36,52,ARI,402000,22,0.350877,Micah,Owings
hamptmi01,2009,21,37,HOU,2000000,21,0.350000,Mike,Hampton
hamptmi01,2005,12,25,ATL,15125000,12,0.346154,Mike,Hampton
bohanbr01,2001,21,31,COL,4000000,20,0.343750,Brian,Bohanon
