In [None]:
import pandas as pd
import numpy as np

batting_df = pd.read_csv('./data/Batting.csv')

#display lots of columns
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 40)

In [None]:
batting_df[:5]

In [None]:
# remove statistics which didn't get recorded until sometime in 1900s
batting_df.drop(batting_df.columns[17:], axis=1, inplace=True)

# fill NaN values with 0
batting_df.fillna(0, inplace=True)
batting_df[:5]

In [None]:
# function that gives us the score of a player for one season
def score(row):
    return row['R'] + row['H'] + row['2B'] + (2 * row['3B']) + (3 * row['HR']) + row['RBI'] + row['SB']

In [None]:
# calculate score
batting_df['score'] = batting_df.apply(lambda row: score(row), axis=1)
batting_df[:5]
# sort by score
# batting_df.sort('score', ascending=False)

In [None]:
# WE ARE CONSIDERING ALL DATA FOR NOW

# makes a copy of these columns
# teams_relevant = teams_df[['playerID', 'yearID', 'teamID', 'R', 'H', '2B', '3B', 'HR', 'RBI', 'SB']]
# teams_relevant[:10]
# teams_relevant['score'] = teams_relevant.apply(lambda row: score(row),axis=1)
# teams_relevant[:10]

In [None]:
def create_player_year_id(df):
    return df['playerID'] + "_" + str(df['yearID'])

In [None]:
batting_index = batting_df.apply(create_player_year_id, axis=1)
batting_df.set_index(batting_index, inplace=True)
# index by playerID and year since each player is distinct from himself each year
# we can access stuff by df[index[something]]
batting_df[:10]

In [None]:
agg_batting_df = batting_df.groupby(batting_df.index).agg('sum')
agg_batting_df.drop(['yearID', 'stint'], axis=1, inplace=True)
agg_batting_df['playerID'] = agg_batting_df.index
agg_batting_df['playerID'] = agg_batting_df['playerID'].apply(lambda x: x[:-5])
agg_batting_df

In [None]:
master_df = pd.read_csv('./data/Master.csv')
master_df = master_df[['playerID', 'birthYear', 'nameFirst', 'nameLast', 'weight', 'height']]
master_df[:10]

In [None]:
# merge birthyear, height, weight, etc info 
batters2 = pd.merge(left=batting_df, right=master_df, how='left', left_on='playerID', right_on='playerID')
# make weight height mean for those that didn't exist
# batters2.fillna(batters2.mean(), inplace=True)
batters2[:10]

In [None]:
batting_index = batters2.apply(create_player_year_id, axis=1)
batters2.set_index(batting_index, inplace=True)
# index by playerID and year since each player is distinct from himself each year
# we can access stuff by df[index[something]]

#remove everything before 1974
batters2 = batters2[batters2.yearID >= 1985]
batters2[:10]

In [None]:
# we need to combine same IDs, different stints into one person
# grouped = batters2.groupby(batters2.index)[['G', 'AB', 'R', 'H', '2B', '3B', 'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'score']].sum()
bats2_grouped = batters2.groupby(batters2.index).agg({'playerID': np.max, 'yearID': np.max, 'G': np.sum, 'AB': np.sum, 'R': np.sum, 'H': np.sum, '2B': np.sum, '3B': np.sum, 'HR': np.sum, 'RBI': np.sum, 'SB': np.sum, 'CS': np.sum, 'BB': np.sum, 'SO': np.sum, 'score': np.sum, 'birthYear': np.max, 'nameFirst': np.max, 'nameLast': np.max, 'weight': np.max, 'height': np.max})

In [None]:
bats2_grouped.sort('score', ascending=False, inplace=True)
bats2_grouped[:10]

In [None]:
# df = fielding.groupby(level=0, group_keys=False).apply(lambda x: x.ix[x.G.idxmax()])
# # get most common fielding positions
# fielding_positions = df[['playerID', 'yearID', 'POS', 'G']]
# fielding_positions[:10]

# # given a row from the fielding_positions df, replaces POS=OF with the most played outfield position in fieldingOF
# def getOFPosition(row):
#     if row['POS'] == 'OF':
#         print row.name
#         return fieldingOF.loc[row.name].Glf
# #         return 'OF'
#     else:
#         return row['POS']

# positions = fielding_positions.apply(getOFPosition, axis=1)

#fielding_positions.iloc[0].name

#fieldingOF.loc['allisar01_1871'][['Glf', 'Gcf', 'Grf']].idxmax()

# fieldingOF.loc[fielding_positions.loc['aaronha01_1974'].name]

In [None]:
appearances = pd.read_csv('./data/Appearances.csv')

In [None]:
appearances = appearances[['yearID', 'playerID', 'G_c', 'G_1b', 'G_2b', 'G_3b', 'G_ss', 'G_lf', 'G_cf', 'G_rf', 'G_dh']]
# start at 1974 becuase DH position starts there
appearances = appearances[appearances.yearID >= 1985]
appearances[:10]

In [None]:
# add player/year ID to appearances
appearances_index = appearances.apply(create_player_year_id, axis=1)
appearances.set_index(appearances_index, inplace=True)
# remove player and year id from appearances so we can get max of positions
appearances.drop(['playerID', 'yearID'], axis=1, inplace=True)
# drop all pitchers (all 0)
appearances[:10]


In [None]:
# remove duplicates
appearances = appearances.groupby(appearances.index).sum()

In [None]:
def getPlayerPosition(row):
    if (appearances.loc[row.name].max() == 0):
        # probably a pitcher so remove
        return 'NA'
    else:
        return appearances.loc[row.name].idxmax()

# def getPlayerPosition(row):
#     return appearances.loc[row.name].max()

In [None]:
# batters2 = batters2[batters2.index.isin(appearances.index)]
# batters2

bats2_grouped = bats2_grouped[bats2_grouped.index.isin(appearances.index)]
bats2_grouped

In [None]:
positions = bats2_grouped.apply(getPlayerPosition, axis=1)

In [None]:
bats2_grouped['position'] = positions

# remove pitchers

bats2_grouped = bats2_grouped[bats2_grouped['position'] != 'NA']
bats2_grouped[:10]

In [None]:
# add salaries
salaries = pd.read_csv('./data/Salaries.csv')
salaries[:10]

In [None]:
salary_index = salaries.apply(create_player_year_id, axis=1)
salaries.set_index(salary_index, inplace=True)

In [None]:
salaries.drop(['yearID', 'teamID', 'lgID', 'playerID'], axis=1, inplace=True)
salaries[:10]

In [None]:
bats2_final = pd.merge(left=bats2_grouped, right= salaries, how='left', left_index=True, right_index=True)

In [None]:
bats2_final.dropna(axis=0, how='any', inplace=True)
bats2_final[:10]

In [None]:
pitchers = pd.read_csv('./data/Pitching.csv')

In [None]:
pitchers = pitchers[pitchers['yearID'] >= 1985]
pitchers[:10]

In [None]:
pitchers = pitchers[['playerID', 'yearID', 'teamID', 'W', 'L', 'G', 'SHO', 'SV', 'IPouts', 'H', 'ER', 'HR', 'BB', 'SO', 'BAOpp', 'R']]
pitchers_index = pitchers.apply(create_player_year_id, axis=1)
pitchers.set_index(pitchers_index, inplace=True)
pitchers[:10]

In [None]:
# merge repeated entries from stints
pitchers_grouped = pitchers.groupby(pitchers.index).agg({'playerID': np.max, 'yearID': np.max, 'teamID': np.max, 'W': np.sum, 'L': np.sum, 'G': np.sum, 'SHO': np.sum, 'SV': np.sum, 'IPouts': np.sum, 'H': np.sum, 'ER': np.sum, 'HR': np.sum, 'BB': np.sum, 'SO': np.sum, 'BAOpp': np.mean, 'R': np.sum})
pitchers_grouped[:10]

In [None]:
def pitcher_ERA(row):
    if row['IPouts'] == 0:
        return 15
    return (row['ER'] / row['IPouts']) * 27
    
def pitcher_score(row):
    return row['IPouts'] - (3 * row['ER']) - row['H'] - row['BB'] + row['SO'] + (5 * row['W'])

In [None]:
pitchers_grouped['ERA'] = pitchers_grouped.apply(lambda row: pitcher_ERA(row), axis=1)

pitchers_grouped['score'] = pitchers_grouped.apply(lambda row: pitcher_score(row), axis=1)

pitchers_grouped.sort('score', ascending=False, inplace=True)

pitchers_grouped[:10]

In [None]:
pitchers2 = pd.merge(left=pitchers_grouped, right=master_df, how='left', left_on='playerID', right_on='playerID')

In [None]:
# add index again

pitchers2_index = pitchers2.apply(create_player_year_id, axis=1)
pitchers2.set_index(pitchers2_index, inplace=True)
pitchers2[:10]


In [None]:
# add salaries to pitchers

pitchers_complete = pd.merge(left=pitchers2, right= salaries, how='left', left_index=True, right_index=True)
pitchers_complete[:10]

In [None]:
pitchers_complete.dropna(axis=0, how='any', inplace=True)
pitchers_complete[:10]

In [None]:
# STUFF WE DID FOR PROPOSAL

In [None]:
#arranged = bats2_final[(bats2_final['yearID'] == 2013) & (bats2_final['salary'] < 3900000)].sort('score', ascending=False)
#arranged[arranged['position'] == 'G_dh']

In [None]:
# pitcher_baseline = pitchers_complete[pitchers_complete['yearID'] == 2013].groupby('teamID')[['salary', 'score']].sum()
# pitcher_baseline = pitcher_baseline[pitcher_baseline['salary'] < 15000000].sort('score', ascending=False)
# pitcher_baseline


In [None]:
pitchers_complete.describe()

In [None]:
# testing on 2010 - 2014
pitchers_2010 = pitchers_complete[pitchers_complete["yearID"] == 2010]
pitchers_2010 = pitchers_2010.drop('score', axis=1)

pitchers_2011 = pitchers_complete[pitchers_complete["yearID"] == 2011]
pitchers_2011_rel = pitchers_2011[["playerID", "score"]]
pitchers_test = pd.merge(left=pitchers_2010, right=pitchers_2011_rel, left_on='playerID', right_on='playerID')

In [None]:
batters_2010 = bats2_final[bats2_final["yearID"] == 2010]
batters_2010 = batters_2010.drop('score', axis=1)

batters_2011 = bats2_final[bats2_final["yearID"] == 2011]
batters_2011 = batters_2011[["playerID", "score"]]

batters_test = pd.merge(left=batters_2010, right=batters_2011, left_on='playerID', right_on='playerID')

In [None]:
# Import the linear regression class
from sklearn.linear_model import LinearRegression
# Sklearn also has a helper that makes it easy to do cross validation
from sklearn.cross_validation import KFold
import numpy as np

# The columns we'll use to predict the target
pitcher_predictors = ["BB", "G", "H", "IPouts", "L", "HR", "BAOpp", "SO", 'W', "SV", "R", "ER", "SHO", "ERA", "weight", "height", "salary"]

# Initialize our algorithm class
alg = LinearRegression()

kf = KFold(pitchers_test.shape[0], n_folds=3, random_state=1)
pitcher_predictions = []
for train, test in kf:
    # The predictors we're using the train the algorithm.  Note how we only take the rows in the train folds.
    train_predictors = (pitchers_test[pitcher_predictors].iloc[train,:])
    # The target we're using to train the algorithm.
    train_target = pitchers_test["score"].iloc[train]
    # Training the algorithm using the predictors and target.
    alg.fit(train_predictors, train_target)
    # We can now make predictions on the test fold
    test_predictions = alg.predict(pitchers_test[pitcher_predictors].iloc[test,:])
    pitcher_predictions.append(test_predictions)
                       
pitcher_predictions = np.concatenate(pitcher_predictions, axis=0)

In [None]:
prediction_diff = [abs(pitcher_predictions[i] - pitchers_test["score"][i]) for i in range(len(pitcher_predictions))]
sum(prediction_diff) / len(prediction_diff)

In [None]:
pitcher_predictions[:10]

In [None]:
batter_predictors = ["RBI", "H", "BB", "weight", "height", "HR", "R", "SO", "2B", "SB", "CS", "3B", "salary"]

kf2 = KFold(batters_test.shape[0], n_folds=3, random_state=1)
batter_predictions = []
for train, test in kf2:
    train_predictors = (batters_test[batter_predictors].iloc[train,:])
    train_target = batters_test["score"].iloc[train]
    alg.fit(train_predictors, train_target)
    test_predictions = alg.predict(batters_test[batter_predictors].iloc[test,:])
    batter_predictions.append(test_predictions)
    
batter_predictions = np.concatenate(batter_predictions, axis=0)

In [None]:
prediction_diff = [abs(batter_predictions[i] - batters_test["score"][i]) for i in range(len(batter_predictions))]
sum(prediction_diff) / len(prediction_diff)

In [None]:
pitchers_test["predicted_score"] = pitcher_predictions
pitchers_test[:10]

In [None]:
batters_test["predicted_score"] = batter_predictions
batters_test[:10]

In [None]:
def batter_to_tuple(row):
    return (row['playerID'], row['salary'], row['predicted_score'], row['position'])
def pitcher_team_to_tuple(row):
    return (row['teamID'], row['salary'], row['predicted_score'], 'pstaff')

In [None]:
batters_data = batters_test.apply(lambda row: batter_to_tuple(row), axis=1)

In [None]:
# pitching_teams = np.unique(pitchers_2011["teamID"])
pitchers_2011_teams = pitchers_2011[["playerID", "teamID"]]
pitchers_test = pitchers_test.drop('teamID', axis=1)

# update teams for next year
pitchers_test = pd.merge(left=pitchers_test, right=pitchers_2011_teams, left_on='playerID', right_on='playerID')

In [None]:
team_group = pitchers_test.groupby("teamID")

In [None]:

team_group = team_group.aggregate(np.sum)

In [None]:
# re-add index as column
team_group['teamID'] = team_group.index

In [None]:
pitchers_data = team_group.apply(lambda row: pitcher_team_to_tuple(row), axis=1)

In [None]:
pitchers_data

In [None]:
masterlist = np.array(pitchers_data).tolist() + np.array(batters_data).tolist()

In [None]:
masterlist

In [None]:
import copy
# masterlist = [('ARI', 14366500.0, 910.9371240368031, 'pstaff'), ('ATL', 36412500.0, 1388.9218978966571, 'pstaff'), ('BAL', 8625000.0, 776.4856986396633, 'pstaff'), ('BOS', 70271833.0, 2125.840506229697, 'pstaff'), ('CHA', 41200000.0, 1222.4210172676098, 'pstaff'), ('CHN', 55827000.0, 1878.720510980691, 'pstaff'), ('CIN', 26493000.0, 945.6670579918168, 'pstaff'), ('CLE', 10093800.0, 792.3801894562247, 'pstaff'), ('COL', 32632000.0, 1567.3965013009336, 'pstaff'), ('DET', 26489552.0, 1163.53546790145, 'pstaff'), ('FLO', 24677300.0, 1841.0638631064674, 'pstaff'), ('HOU', 13221000.0, 1044.2697135189546, 'pstaff'), ('KCA', 14075000.0, 769.3449678719517, 'pstaff'), ('LAA', 45848000.0, 1821.8821538346342, 'pstaff'), ('LAN', 52150333.0, 2067.7618133416613, 'pstaff'), ('MIL', 25601776.0, 1846.9702176509727, 'pstaff'), ('MIN', 17568000.0, 1538.440251328942, 'pstaff'), ('NYA', 66247339.0, 1356.5802238733766, 'pstaff'), ('NYN', 23773666.0, 785.8256831092024, 'pstaff'), ('OAK', 23070000.0, 1520.804958643793, 'pstaff'), ('PHI', 75768333.0, 2697.507315892099, 'pstaff'), ('PIT', 10328000.0, 771.2522274030191, 'pstaff'), ('SDN', 23810800.0, 1373.0594231685877, 'pstaff'), ('SEA', 11815900.0, 1037.120063794589, 'pstaff'), ('SFN', 48180333.0, 1913.2363061153706, 'pstaff'), ('SLN', 30475500.0, 1244.122420901785, 'pstaff'), ('TBA', 14339771.0, 1328.8420282479756, 'pstaff'), ('TEX', 31284640.0, 1860.1385589263705, 'pstaff'), ('TOR', 20911800.0, 1675.408736287713, 'pstaff'), ('WAS', 12659002.0, 788.0272924525746, 'pstaff'), ('abreubo01', 9000000.0, 371.99653631403953, 'G_rf'), ('andruel01', 418420.0, 309.61369249331347, 'G_ss'), ('ankieri01', 2750000.0, 137.61217993905967, 'G_cf'), ('avilaal01', 404900.0, 144.25527881477947, 'G_c'), ('avilemi01', 429000.0, 247.29799534887465, 'G_2b'), ('aybarer01', 2050000.0, 269.07743537993736, 'G_ss'), ('bakerje03', 975000.0, 129.22593337132056, 'G_3b'), ('bakerjo01', 415000.0, 71.99373161071308, 'G_c'), ('barajro01', 500000.0, 186.0381340219757, 'G_c'), ('barmecl01', 3325000.0, 197.51565425729353, 'G_2b'), ('bartlja01', 4000000.0, 256.6505999472788, 'G_ss'), ('bartoda02', 410000.0, 342.9887638286865, 'G_1b'), ('bautijo02', 2400000.0, 591.6649829082061, 'G_rf'), ('bayja01', 8625000.0, 221.77153310339426, 'G_lf'), ('beckhgo01', 445000.0, 240.3832762690299, 'G_2b'), ('beltrad01', 9000000.0, 377.49250289164377, 'G_3b'), ('beltrca01', 19401569.0, 168.2886918229422, 'G_cf'), ('berkmla01', 14500000.0, 257.1149887053001, 'G_1b'), ('betanyu01', 3300000.0, 276.72355633858535, 'G_ss'), ('blakeca01', 6250000.0, 247.5024836477074, 'G_3b'), ('blancan01', 510000.0, 102.85240788320331, 'G_2b'), ('blanche01', 750000.0, 71.65415520657466, 'G_c'), ('blankky01', 410600.0, 74.72328727057138, 'G_lf'), ('bloomwi01', 1700000.0, 152.10250066531515, 'G_rf'), ('blumge01', 1500000.0, 116.47677899623062, 'G_ss'), ('bonifem01', 420000.0, 126.93416158509774, 'G_cf'), ('borboju01', 600000.0, 240.61120123528823, 'G_cf'), ('bournmi01', 2400000.0, 332.70528584777946, 'G_cf'), ('bowkejo01', 410000.0, 115.0454606928333, 'G_rf'), ('bradlmi01', 11000000.0, 147.49642211898598, 'G_lf'), ('brantmi02', 400800.0, 184.87755924707056, 'G_cf'), ('branyru01', 1500000.0, 238.33745330626354, 'G_dh'), ('braunry02', 1287500.0, 416.25632729497295, 'G_lf'), ('brignre01', 403700.0, 174.53119460583991, 'G_2b'), ('bruceja01', 440000.0, 360.91824185680923, 'G_rf'), ('buckjo01', 2000000.0, 210.33710162948563, 'G_c'), ('bucktr01', 410000.0, 58.50672497834263, 'G_lf'), ('burrepa01', 9000000.0, 252.3518983923971, 'G_lf'), ('buterdr01', 400000.0, 93.80948174620352, 'G_c'), ('butlebi03', 470000.0, 300.3310764779268, 'G_1b'), ('byrdma01', 3000000.0, 255.94481848983762, 'G_cf'), ('cabreas01', 444600.0, 165.82951285097104, 'G_ss'), ('cabreme01', 3100000.0, 201.59530859986896, 'G_lf'), ('cabremi01', 20000000.0, 507.5318385028463, 'G_1b'), ('cabreor01', 2020000.0, 218.13660815131303, 'G_ss'), ('callaal01', 460000.0, 248.78249220282768, 'G_3b'), ('camermi01', 7750000.0, 113.53020314516047, 'G_cf'), ('canoro01', 9000000.0, 450.6508879659392, 'G_2b'), ('cantujo01', 6000000.0, 209.22511573860868, 'G_3b'), ('carroja01', 1535677.0, 196.05006391032796, 'G_ss'), ('casilal01', 437500.0, 158.36946155232098, 'G_ss'), ('castrra01', 800000.0, 117.4543041530917, 'G_c'), ('cedenro02', 1125000.0, 190.6940379097983, 'G_ss'), ('cervefr01', 410800.0, 153.9118146486402, 'G_c'), ('chaveer01', 12500000.0, 69.75650593112236, 'G_dh'), ('choosh01', 461100.0, 392.2166392773321, 'G_rf'), ('coghlch01', 475000.0, 210.96657872144627, 'G_lf'), ('colvity01', 401000.0, 286.49871575445275, 'G_rf'), ('conrabr01', 400000.0, 160.80150308663679, 'G_3b'), ('coraal01', 1850000.0, 118.8116031055539, 'G_2b'), ('counscr01', 2100000.0, 125.96648471517483, 'G_ss'), ('craigal01', 400000.0, 99.81911940667848, 'G_rf'), ('crawfca02', 10000000.0, 495.17408187427486, 'G_lf'), ('crispco01', 5000000.0, 260.41713047930045, 'G_cf'), ('cruzne02', 440000.0, 322.3482066610087, 'G_rf'), ('cuddymi01', 9416666.0, 354.8977946358648, 'G_1b'), ('damonjo01', 8000000.0, 309.3334201295994, 'G_dh'), ('davisra01', 1350000.0, 288.005300590826, 'G_cf'), ('dejesda01', 4700000.0, 216.95766747010018, 'G_rf'), ('derosma01', 6000000.0, 81.75937420297119, 'G_lf'), ('desmoia01', 400000.0, 257.5449691870584, 'G_ss'), ('dewitbl01', 410000.0, 220.70164797843003, 'G_2b'), ('diazma02', 2550000.0, 147.15860618334466, 'G_lf'), ('dobbsgr01', 1350000.0, 103.67752011069672, 'G_3b'), ('doumiry01', 3650000.0, 200.21703892440016, 'G_c'), ('drewjd01', 14000000.0, 321.71732887486337, 'G_rf'), ('drewst01', 3400000.0, 390.51645968094533, 'G_ss'), ('dunnad01', 12000000.0, 360.6190306146656, 'G_1b'), ('ellisma01', 5500000.0, 214.49498696612298, 'G_2b'), ('ellsbja01', 496500.0, 90.03184031835642, 'G_cf'), ('encared01', 5175000.0, 237.723291300181, 'G_3b'), ('escobal02', 405500.0, 280.77694203699036, 'G_ss'), ('escobyu01', 435000.0, 223.38013918314428, 'G_ss'), ('ethiean01', 5750000.0, 338.0978122150177, 'G_rf'), ('everead01', 1550000.0, 79.54015336023205, 'G_ss'), ('fieldpr01', 11000000.0, 376.7308201991674, 'G_1b'), ('figgich01', 8500000.0, 278.9522965183272, 'G_2b'), ('fontemi01', 1000000.0, 150.17993015383956, 'G_2b'), ('fowlede01', 406000.0, 344.05193295189133, 'G_cf'), ('foxja02', 405000.0, 111.37698240573701, 'G_dh'), ('francbe01', 470000.0, 146.1043513991554, 'G_lf'), ('francje02', 5000000.0, 250.45671794857302, 'G_rf'), ('francju02', 400000.0, 48.2847192980563, 'G_3b'), ('freesda01', 400000.0, 131.68903568730497, 'G_3b'), ('fukudko01', 14000000.0, 272.99915519896786, 'G_rf'), ('furcara01', 9500000.0, 291.59895431879795, 'G_ss'), ('gardnbr01', 452500.0, 379.9115757902731, 'G_lf'), ('getzch01', 414500.0, 136.07596527368207, 'G_2b'), ('giambja01', 1750000.0, 125.23382442959337, 'G_1b'), ('gloadro01', 1000000.0, 133.79757370256814, 'G_1b'), ('gomesjo01', 800000.0, 293.24047015946167, 'G_lf'), ('gomezca01', 1100000.0, 171.52311789405488, 'G_cf'), ('gonzaad01', 4875000.0, 409.3228403573071, 'G_1b'), ('gonzaal02', 2750000.0, 314.83066754880986, 'G_ss'), ('gonzaal03', 415500.0, 87.30625813742418, 'G_2b'), ('gonzaca01', 406000.0, 509.84295775032024, 'G_lf'), ('gordoal01', 1150000.0, 158.29524348311512, 'G_lf'), ('grandcu01', 5500000.0, 368.08702548218173, 'G_cf'), ('guerrvl01', 5500000.0, 379.62575902181464, 'G_dh'), ('guillca01', 13000000.0, 161.30797882520505, 'G_2b'), ('gutiefr01', 2312500.0, 298.169915932641, 'G_cf'), ('gwynnto02', 419800.0, 208.3797536456247, 'G_cf'), ('hafnetr01', 11500000.0, 235.19776450907722, 'G_dh'), ('hairsje02', 2125000.0, 231.26754709045986, 'G_ss'), ('hairssc01', 2450000.0, 196.14784344960748, 'G_lf'), ('hallbi03', 8525000.0, 221.26108791436872, 'G_lf'), ('hamiljo03', 3250000.0, 441.04282104266287, 'G_lf'), ('hanigry01', 415000.0, 193.17932400211527, 'G_c'), ('hardyjj01', 5100000.0, 230.07846225961703, 'G_ss'), ('harriwi01', 1500000.0, 155.4040351327214, 'G_lf'), ('hartco01', 4800000.0, 367.5357576856989, 'G_rf'), ('hawpebr01', 7500000.0, 205.9565683922828, 'G_rf'), ('headlch01', 427700.0, 294.8058623090663, 'G_3b'), ('helmswe01', 950000.0, 129.87987471793457, 'G_3b'), ('heltoto01', 17775000.0, 237.8445566668218, 'G_1b'), ('hernara02', 3868376.0, 184.4581319436004, 'G_c'), ('heywaja01', 400000.0, 356.29597437268893, 'G_rf'), ('hillaa01', 4000000.0, 316.10485043530474, 'G_2b'), ('hillko01', 700000.0, 116.49439239050764, 'G_c'), ('hinsker01', 1000000.0, 195.87788393962558, 'G_lf'), ('hollima01', 16333327.0, 429.13173434031165, 'G_lf'), ('howarry01', 19000000.0, 323.9878838825743, 'G_1b'), ('hudsoor01', 5000000.0, 315.8522085782617, 'G_2b'), ('huffau01', 3000000.0, 484.6778611849332, 'G_1b'), ('hundlni01', 415700.0, 163.97249601247825, 'G_c'), ('hunteto01', 18500000.0, 304.47683809227493, 'G_cf'), ('iannech01', 1750000.0, 143.7012771611516, 'G_c'), ('ibanera01', 12166666.0, 324.9673953482142, 'G_lf'), ('infanom01', 2225000.0, 254.77618432408775, 'G_2b'), ('ingebr01', 6600000.0, 213.23327390866734, 'G_3b'), ('izturce01', 2600000.0, 189.54226332366653, 'G_ss'), ('izturma01', 2766666.0, 185.49207704298323, 'G_3b'), ('jacksau01', 400000.0, 331.86115730685, 'G_cf'), ('jacksco01', 3100000.0, 191.1922072429648, 'G_lf'), ('janispa01', 412500.0, 172.4743983401978, 'G_ss'), ('jaramja01', 416500.0, 87.67162732102543, 'G_c'), ('jeterde01', 22600000.0, 419.70140405681803, 'G_ss'), ('johnsch05', 400000.0, 195.31456823143702, 'G_3b'), ('johnske05', 2350000.0, 390.11890492649314, 'G_2b'), ('johnsre02', 800000.0, 116.73589588016523, 'G_lf'), ('johnsro07', 412500.0, 138.1774290953636, 'G_c'), ('jonesad01', 465000.0, 265.8974997513557, 'G_cf'), ('jonesan01', 500000.0, 244.2505460717768, 'G_rf'), ('jonesch06', 14000000.0, 307.9926949091036, 'G_3b'), ('jonesga02', 425000.0, 310.4257154408736, 'G_1b'), ('joycema01', 406000.0, 212.14283896398473, 'G_rf'), ('kearnau01', 750000.0, 219.3753879417324, 'G_lf'), ('kellydo01', 405000.0, 211.01677620029454, 'G_lf'), ('kempma01', 4000000.0, 285.9783735178715, 'G_cf'), ('kendrho01', 1750000.0, 273.3355452343861, 'G_2b'), ('kennead01', 1250000.0, 242.73812029843276, 'G_2b'), ('keppije01', 1150000.0, 344.21711989600004, 'G_2b'), ('kinslia01', 4200000.0, 330.652735064115, 'G_2b'), ('konerpa01', 12000000.0, 436.8713060455247, 'G_1b'), ('kotsama01', 1500000.0, 185.8453438465574, 'G_dh'), ('kottage01', 408500.0, 192.4474642738038, 'G_c'), ('kouzmke01', 3100000.0, 262.54231977784616, 'G_3b'), ('kubelja01', 4100000.0, 284.11094220540025, 'G_rf'), ('lairdge01', 3950000.0, 126.95804763179046, 'G_c'), ('langery01', 525000.0, 128.42827052119577, 'G_lf'), ('laporma01', 402700.0, 226.6009412535809, 'G_1b'), ('larocad01', 4500000.0, 300.5921612222318, 'G_1b'), ('larocan01', 451000.0, 152.01899383659799, 'G_3b'), ('leeca01', 19000000.0, 296.4945932871304, 'G_lf'), ('leede02', 13250000.0, 324.65760983167735, 'G_1b'), ('lewisfr02', 455000.0, 278.2589190547958, 'G_lf'), ('lindad01', 550000.0, 273.451556530787, 'G_dh'), ('loneyja01', 3100000.0, 294.37427547566, 'G_1b'), ('longoev01', 950000.0, 421.4724970722154, 'G_3b'), ('lopezjo01', 3000000.0, 241.70371424838396, 'G_3b'), ('lowrije01', 434000.0, 218.8978096867915, 'G_2b'), ('ludwiry01', 5450000.0, 250.24373349434478, 'G_rf'), ('markani01', 7100000.0, 392.13864481471416, 'G_rf'), ('marsolo01', 401200.0, 180.79829209138114, 'G_c'), ('martiru01', 5050000.0, 204.870042308729, 'G_c'), ('martivi01', 7700000.0, 354.71801436160007, 'G_c'), ('mathije01', 1300000.0, 109.51192986385945, 'G_c'), ('matsuhi01', 6000000.0, 308.87996704802396, 'G_dh'), ('mauerjo01', 12500000.0, 389.24565371560203, 'G_c'), ('maybica01', 405000.0, 201.48293374415414, 'G_cf'), ('mccanbr01', 5700000.0, 331.1522826889756, 'G_c'), ('mccoymi01', 400700.0, 108.83316758485171, 'G_2b'), ('mccutan01', 422500.0, 419.0606396509639, 'G_cf'), ('mcdonjo03', 1500000.0, 155.76734574042302, 'G_2b'), ('mcgehca01', 427500.0, 337.9428495032749, 'G_3b'), ('mclouna01', 5000000.0, 175.67324309598092, 'G_cf'), ('michaja01', 800000.0, 163.78021855806747, 'G_lf'), ('milesaa01', 2700000.0, 109.51226992084918, 'G_2b'), ('millela02', 452000.0, 181.38896424858245, 'G_lf'), ('molinjo01', 800000.0, 91.71106686806405, 'G_c'), ('molinya01', 4312500.0, 199.21495788856646, 'G_c'), ('montemi01', 2000000.0, 178.28441333107662, 'G_c'), ('mooread01', 401000.0, 94.08134706285114, 'G_c'), ('moraljo02', 412500.0, 89.54809470438511, 'G_c'), ('morame01', 1275000.0, 203.05357331480644, 'G_3b'), ('morgany01', 426500.0, 208.85976352133372, 'G_cf'), ('morneju01', 15000000.0, 309.07772594555723, 'G_1b'), ('morsemi01', 410000.0, 202.43159589600901, 'G_rf'), ('murphda07', 427670.0, 312.49226043126276, 'G_lf'), ('nadyxa01', 3300000.0, 151.1581908911882, 'G_1b'), ('napolmi01', 3600000.0, 250.5939941981693, 'G_1b'), ('navardi01', 2100000.0, 92.06922431482883, 'G_c'), ('nievewi01', 700000.0, 119.65512768977868, 'G_c'), ('nixja01', 420000.0, 166.07746161846916, 'G_3b'), ('olivomi01', 2000000.0, 182.07411505454132, 'G_c'), ('ordonma01', 17825976.0, 274.882312482131, 'G_rf'), ('ortizda01', 13000000.0, 360.2123621681095, 'G_dh'), ('overbly01', 7950000.0, 278.6408713739923, 'G_1b'), ('paganan01', 1500000.0, 368.96731156061287, 'G_cf'), ('parrage01', 405500.0, 151.45821652407375, 'G_lf'), ('patteer01', 405000.0, 152.88905255720223, 'G_lf'), ('pedrodu01', 3750000.0, 269.7247793016178, 'G_2b'), ('penabr01', 428000.0, 92.50940050361754, 'G_c'), ('penaca01', 10125000.0, 270.18286306310625, 'G_1b'), ('pencehu01', 3500000.0, 440.347057838243, 'G_rf'), ('pennicl01', 405000.0, 270.8590886429096, 'G_ss'), ('peraljh01', 4850000.0, 250.45504427116225, 'G_3b'), ('phillbr01', 6937500.0, 469.80867514401416, 'G_2b'), ('piefe01', 420000.0, 203.09498253236632, 'G_lf'), ('pierrju01', 7000000.0, 480.94303905501226, 'G_lf'), ('pierzaj01', 6750000.0, 258.0649789720932, 'G_c'), ('polanpl01', 5166666.0, 327.88061275889777, 'G_3b'), ('posadjo01', 13100000.0, 253.9909263894321, 'G_c'), ('pradoma01', 440000.0, 393.26903387062424, 'G_2b'), ('pujolal01', 14595953.0, 618.3020238504466, 'G_1b'), ('puntoni01', 4000000.0, 138.1311194815289, 'G_3b'), ('quentca01', 3200000.0, 338.6181721300017, 'G_rf'), ('quinthu01', 750000.0, 97.607251015581, 'G_c'), ('raburry01', 438000.0, 221.52289008028538, 'G_lf'), ('ramiral03', 1225000.0, 394.0558898215156, 'G_ss'), ('ramirar01', 16750000.0, 309.0035615820236, 'G_3b'), ('ramirha01', 7000000.0, 453.5144949422221, 'G_ss'), ('ramirma02', 18695006.0, 216.11511749525494, 'G_lf'), ('rasmuco01', 418000.0, 338.69145641828476, 'G_cf'), ('renteed01', 10000000.0, 173.4954765674374, 'G_ss'), ('reyesjo01', 9375000.0, 423.9654712811383, 'G_ss'), ('reynoma01', 833333.0, 269.00607652312357, 'G_3b'), ('riosal01', 10200000.0, 446.7909287881416, 'G_cf'), ('riverju01', 4250000.0, 284.1557755746044, 'G_lf'), ('roberbr01', 10000000.0, 187.40997046903902, 'G_2b'), ('rodrial01', 33000000.0, 389.2016205912129, 'G_3b'), ('rodriiv01', 3000000.0, 157.3842168964728, 'G_c'), ('rodrise01', 405500.0, 190.90604332170915, 'G_2b'), ('rolensc01', 7666666.0, 318.97529645745686, 'G_3b'), ('rolliji01', 8500000.0, 268.35041973247587, 'G_ss'), ('rosalad01', 410000.0, 176.1891738049764, 'G_2b'), ('rossco01', 4450000.0, 257.93963857402605, 'G_cf'), ('rossda01', 1600000.0, 114.66943871777968, 'G_c'), ('rowanaa01', 13600000.0, 220.62233202466172, 'G_cf'), ('ruizca01', 1900000.0, 224.80263378093576, 'G_c'), ('ryanbr01', 425000.0, 222.3176754107459, 'G_ss'), ('saltaja01', 418580.0, 92.20685687906726, 'G_c'), ('sanchfr01', 6000000.0, 247.77210320486785, 'G_2b'), ('sanchga01', 401000.0, 304.5043868261765, 'G_1b'), ('sandopa01', 465000.0, 282.2516308639428, 'G_3b'), ('santira01', 1250000.0, 199.68820137972403, 'G_ss'), ('schiena01', 416500.0, 193.54150978019214, 'G_rf'), ('schnebr01', 1125000.0, 145.94761723338073, 'G_c'), ('schumsk01', 2000000.0, 277.2355388219404, 'G_2b'), ('scottlu01', 4050000.0, 343.1239413520237, 'G_dh'), ('scutama01', 5500000.0, 374.5381213349094, 'G_ss'), ('shoppke01', 2250000.0, 85.1031568767749, 'G_c'), ('sizemgr01', 5766666.0, 114.09554147328913, 'G_cf'), ('smithse01', 407000.0, 284.7299404876276, 'G_lf'), ('snidetr01', 405800.0, 194.59040060594577, 'G_lf'), ('snydech02', 5250000.0, 194.12667161216137, 'G_c'), ('soriaal01', 19000000.0, 288.54347565136635, 'G_lf'), ('sotoge01', 575000.0, 247.86312477961752, 'G_c'), ('spande01', 750000.0, 362.4869991480717, 'G_cf'), ('spilbry01', 1300000.0, 220.13738223179058, 'G_rf'), ('stairma01', 700000.0, 116.3111161867439, 'G_lf'), ('stewaia01', 408000.0, 252.09026873544207, 'G_3b'), ('stubbdr01', 400000.0, 336.48719839965145, 'G_cf'), ('suzukic01', 18000000.0, 418.188067615887, 'G_rf'), ('suzukku01', 420000.0, 283.0138874144269, 'G_c'), ('sweenry01', 420000.0, 188.02091814838502, 'G_rf'), ('swishni01', 6850000.0, 378.723348837584, 'G_rf'), ('teahema01', 3750000.0, 169.46988099722643, 'G_3b'), ('teixema01', 20625000.0, 463.6036272523746, 'G_1b'), ('tejadmi01', 6000000.0, 331.6633715168732, 'G_3b'), ('thamema01', 900000.0, 173.69909952411996, 'G_dh'), ('theriry01', 2600000.0, 342.90715646403817, 'G_2b'), ('thomeji01', 1500000.0, 295.14216685196044, 'G_dh'), ('torrean02', 426000.0, 309.25366642651403, 'G_cf'), ('torreyo01', 750000.0, 192.1570714865482, 'G_c'), ('towlejr01', 406500.0, 90.85457313645273, 'G_c'), ('tulowtr01', 3500000.0, 417.4571183497283, 'G_ss'), ('ugglada01', 7800000.0, 404.12140251793943, 'G_2b'), ('uptonbj01', 3000000.0, 314.15508684107675, 'G_cf'), ('uptonju01', 708333.0, 284.74067416779053, 'G_rf'), ('uribeju01', 3250000.0, 316.7671936384141, 'G_ss'), ('utleych01', 15285714.0, 368.76056664921794, 'G_2b'), ('varitja01', 3000000.0, 139.0543229004523, 'G_c'), ('victosh01', 5000000.0, 426.5153910014275, 'G_cf'), ('vizquom01', 1375000.0, 225.11735595874325, 'G_3b'), ('vottojo01', 525000.0, 498.0662091337048, 'G_1b'), ('weeksri01', 2750000.0, 397.84080131552497, 'G_2b'), ('wellsve01', 15687500.0, 430.3575655918853, 'G_cf'), ('werthja01', 7500000.0, 400.3770901185276, 'G_rf'), ('whiteel03', 405000.0, 139.23571487599128, 'G_c'), ('wietema01', 400000.0, 197.83700326913618, 'G_c'), ('wiggity01', 3500000.0, 288.9837347371717, 'G_1b'), ('willijo03', 4600000.0, 276.14061345582604, 'G_lf'), ('willire03', 625000.0, 153.25891936345099, 'G_lf'), ('wilsobo02', 402000.0, 112.69501739683736, 'G_c'), ('wilsoja02', 5000000.0, 120.26357618187564, 'G_ss'), ('woodbr01', 410000.0, 96.43187002710744, 'G_3b'), ('wrighda03', 10250000.0, 383.9222697316697, 'G_3b'), ('youklke01', 9375000.0, 361.767851623316, 'G_1b'), ('youngch04', 3450000.0, 384.9815522003823, 'G_cf'), ('youngde03', 2600000.0, 323.2338517765238, 'G_lf'), ('youngmi02', 13174974.0, 395.337904946442, 'G_3b'), ('zimmery01', 6350000.0, 397.1636272960616, 'G_3b'), ('zobribe01', 438100.0, 292.98162745058687, 'G_rf')]
salary_cap = 50000000

positions = ['G_c', 'G_1b', 'G_2b', 'G_3b', 'G_ss', 'G_lf', 'G_cf', 'G_rf', 'G_dh', 'pstaff']
position_domains = dict([])
for pos in positions:
	position_domains[pos] = []
	
for player in masterlist:
	worth = True
	domain = copy.deepcopy(position_domains[player[3]])
	for player2 in domain:
		if player2[2] >= player[2] and player2[1] <= player[1]:
			worth = False
			break
		if player2[2] <= player[2] and player2[1] >= player[1]:
			position_domains[player[3]].remove(player2)
			break

	if worth: 
		position_domains[player[3]].append((player[0], player[1], player[2]))

# print(position_domains)
for pos in positions:
	position_domains[pos] = sorted(position_domains[pos], cmp = lambda x,y: int(x[1] - y[1]) )

teams = [(0, 0, [])]
for pos in positions:
	newTeams = []
	# print(max(teams, key = lambda x: x[2]))
	print(pos)
	if pos == 'pstaff':
		bestTeam = (0, 0, [])
		for team in teams:
			for player in reversed(position_domains[pos]):
				if team[0] + player[1] > salary_cap:
					continue
				if bestTeam[1] < team[1] + player[2]:
					players = copy.deepcopy(bestTeam[2])
					players.append(player[0])
					bestTeam = (team[0] + player[1], team[1] + player[2], players)
					break
		break

	for team in teams:
		print(team[2])
		for player in position_domains[pos]:
			if team[0] + player[1] > salary_cap:
				break
			players = copy.deepcopy(team[2])
			players.append(player[0])
			newTeams.append((team[0] + player[1], team[1] + player[2], players))
	teams = newTeams
print(bestTeam)
# print(max(teams, key = lambda x: x[2]))	
# print(problem.getSolutions())