To determine the best players, position players had to play a minimum of 50 games at the position they were selected for.  Pitchers had to have at least 10 plate appearances to be selected.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [39]:
my_90s_team = {"Player": ["Allen Watson", "Michael Barrett", "Jeff Bagwell", "Chuck Knoblauch", "Corey Koskie", "Derek Jeter", "Barry Bonds", "Preston Wilson", "Bobby Higginson"], "Position": ["P", "C", "1B", "2B", "3B", "SS", "LF", "CF", "RF"], "Year": ["1995", "1999", "1994", "1992", "1999", "1996", "1990", "1999", "1996"], "Salary": [155000, 200500, 2525000, 325000, 200000, 130000, 850000, 201000, 170000], "OBP": [0.447, 0.345, 0.451, 0.384, 0.387, 0.370, 0.406, 0.350, 0.404]}

In [2]:
batting = pd.read_csv("data/Batting.csv")
salaries = pd.read_csv("data/Salaries.csv")
master = pd.read_csv("data/Master.csv")
appearances = pd.read_csv("data/Appearances.csv")

In [3]:
master = master.drop(["birthYear", "birthMonth", "birthDay", "birthCountry", "birthState", "birthCity", "deathYear", "deathMonth", "deathDay", "weight", "height", "bats", "throws", "debut", "finalGame", "retroID", "bbrefID"], axis=1)
master = master.drop(["deathCountry", "deathState", "deathCity", "nameGiven"], axis=1)

In [4]:
batting_90s = batting[batting.yearID.isin([1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999])]
salaries_90s = salaries[salaries.yearID.isin([1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999])]
appearances_90s = appearances[appearances.yearID.isin([1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999])]
everything_90s = pd.merge(batting_90s, salaries_90s, how="inner", left_on=["yearID", "playerID"], right_on=["yearID", "playerID"])
everything_90s = pd.merge(everything_90s, appearances_90s, how="inner", left_on=["yearID", "playerID"], right_on=["yearID", "playerID"])

In [5]:
everything_90s = everything_90s.set_index(["playerID"])
everything_90s = everything_90s.drop(["stint", "teamID_x", "lgID_x", "lgID_y"], axis=1)
everything_90s = everything_90s.drop(["teamID", "lgID", "G_all", "GS", "G_batting", "G_defense", "G_pr", "G_ph", "G_dh"], axis=1)
everything_90s = everything_90s.drop(["R", "2B", "3B", "HR", "RBI", "SB", "CS", "SO", "SH", "GIDP", "IBB"], axis=1)

In [7]:
everything_90s = everything_90s.sort(axis=0)
everything_90s["OBP"] = (everything_90s.H + everything_90s.HBP + everything_90s.BB) / (everything_90s.AB + everything_90s.BB + everything_90s.HBP +everything_90s.SF)
everything_90s = everything_90s[pd.notnull(everything_90s['OBP'])]
everything_90s = everything_90s.drop_duplicates(["yearID", "G", "AB"])
everything_90s = everything_90s.drop(["H", "BB", "HBP", "SF"], axis=1)

In [8]:
everything_hitters_90s = everything_90s.drop(everything_90s[everything_90s.G < 100].index)
everything_hitters_90s = pd.merge(everything_hitters_90s, master, how="inner", left_index=True, right_on="playerID")
everything_hitters_90s = everything_hitters_90s.set_index(["playerID"])
everything_hitters_90s = everything_hitters_90s.sort(columns='OBP', axis=0, ascending=False)
everything_hitters_90s = everything_hitters_90s.drop(["teamID_y", "G_p", "G_of"], axis=1)

In [12]:
everything_hitters_90s.head()

Unnamed: 0_level_0,yearID,G,AB,salary,G_c,G_1b,G_2b,G_3b,G_ss,G_lf,G_cf,G_rf,OBP,nameFirst,nameLast
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
thomafr04,1994,113,399,2755000,0,99,0,0,0,0,0,0,0.487427,Frank,Thomas
olerujo01,1993,158,551,1562500,0,137,0,0,0,0,0,0,0.472754,John,Olerud
bondsba01,1996,158,517,8416667,0,0,0,0,0,149,6,0,0.460741,Barry,Bonds
oneilpa01,1994,103,368,3858334,0,0,0,0,0,12,0,90,0.460497,Paul,O'Neill
thomafr04,1996,141,527,7150000,0,139,0,0,0,0,0,0,0.459168,Frank,Thomas


In [17]:
hit_90s_c = everything_hitters_90s[["yearID", "G", "AB", "G_c", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_1b = everything_hitters_90s[["yearID", "G", "AB", "G_1b", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_2b = everything_hitters_90s[["yearID", "G", "AB", "G_2b", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_3b = everything_hitters_90s[["yearID", "G", "AB", "G_3b", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_ss = everything_hitters_90s[["yearID", "G", "AB", "G_ss", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_lf = everything_hitters_90s[["yearID", "G", "AB", "G_lf", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_cf = everything_hitters_90s[["yearID", "G", "AB", "G_cf", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_rf = everything_hitters_90s[["yearID", "G", "AB", "G_rf", "nameFirst", "nameLast", "salary", "OBP"]]
hit_90s_c = hit_90s_c.drop(hit_90s_c[hit_90s_c.G_c < 50].index)
hit_90s_1b = hit_90s_1b.drop(hit_90s_1b[hit_90s_1b.G_1b < 50].index)
hit_90s_2b = hit_90s_2b.drop(hit_90s_2b[hit_90s_2b.G_2b < 50].index)
hit_90s_3b = hit_90s_3b.drop(hit_90s_3b[hit_90s_3b.G_3b < 50].index)
hit_90s_ss = hit_90s_ss.drop(hit_90s_ss[hit_90s_ss.G_ss < 50].index)
hit_90s_lf = hit_90s_lf.drop(hit_90s_lf[hit_90s_lf.G_lf < 50].index)
hit_90s_cf = hit_90s_cf.drop(hit_90s_cf[hit_90s_cf.G_cf < 50].index)
hit_90s_rf = hit_90s_rf.drop(hit_90s_rf[hit_90s_rf.G_rf < 50].index)

In [20]:
hit_90s_c.head()

Unnamed: 0_level_0,yearID,G,AB,G_c,nameFirst,nameLast,salary,OBP
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
steinte01,1999,101,338,96,Terry,Steinbach,1000000,0.357895
sciosmi01,1991,119,345,115,Mike,Scioscia,2183333,0.353383
sciosmi01,1990,135,435,132,Mike,Scioscia,1233333,0.348089
barremi01,1999,126,433,59,Michael,Barrett,200500,0.345416
steinte01,1996,145,514,137,Terry,Steinbach,4200000,0.341506


In [25]:
pitchers_90s = everything_90s.drop(everything_90s[everything_90s.G_p <= 1].index)
pitchers_90s = pitchers_90s.drop(["G_c", "G_1b", "G_c", "G_2b", "G_3b", "G_ss", "G_lf", "G_cf", "G_rf", "G_of"], axis=1)
pitchers_90s = pd.merge(pitchers_90s, master, how="inner", left_index=True, right_on="playerID")
pitchers_90s = pitchers_90s.set_index(["playerID"])
pitchers_90s = pitchers_90s.sort(columns="OBP", axis=0, ascending=False)

In [27]:
pitchers_90s = pitchers_90s.drop(pitchers_90s[pitchers_90s.AB < 10].index)

In [38]:
with pd.option_context('display.max_rows', 999, 'display.max_columns', 20):
    print(hit_90s_rf)

           yearID    G   AB  G_rf nameFirst   nameLast   salary       OBP
playerID                                                                 
oneilpa01    1994  103  368    90      Paul    O'Neill  3858334  0.460497
gwynnto01    1994  110  419   105      Tony      Gwynn  3633333  0.453586
oneilpa01    1996  150  546   146      Paul    O'Neill  5300000  0.410606
gwynnto01    1997  149  592   143      Tony      Gwynn  4575000  0.409231
higgibo02    1996  130  440    57     Bobby  Higginson   170000  0.404297
gwynnto01    1995  135  535   133      Tony      Gwynn  4658334  0.403813
gwynnto01    1996  116  451   111      Tony      Gwynn  4300000  0.400402
oneilpa01    1997  149  553   146      Paul    O'Neill  5500000  0.398744
gwynnto01    1993  122  489   121      Tony      Gwynn  4333333  0.397749
oneilpa01    1995  127  460   107      Paul    O'Neill  2850000  0.386740
gwynnto01    1999  111  411   104      Tony      Gwynn  4400000  0.381166
higgibo02    1997  146  546    56     