## Import statements and globals

In [2]:
#from pulp import *
from gurobi import *
import math
import pandas as pd
import numpy as np
from sklearn import preprocessing


HitterPositions = ['C', '1B', '2B', 'SS', '3B', 'OF', 'outer', 'inner', 'util']
HitterMetrics = ['R', 'HR', 'RBI', 'SB', 'AVG']

PitcherPositions = ['SP', 'RP']
PitcherMetrics = ['W', 'SV', 'K', 'ERA', 'WHIP']


R = sorted([1183,1145,1125,1100,1082,1064,1046,1028,1006,980,949,893])
HR = sorted([383,366,355,345,338,328,324,314,305,296,282,261])
RBI = sorted([1137,1104,1079,1058,1041,1023,1007,987,964,942,910,855])
SB = sorted([152,138,128,121,114,108,103,96,90,84,76,64])
AVG = sorted([0.278,0.274,0.272,0.270,0.269,0.267,0.266,0.264,0.262,0.260,0.258,0.254])

K = sorted([1622,1557,1509,1470,1438,1404,1370,1332,1290,1245,1182,1070])
W = sorted([103,97,94,91,88,85,83,80,77,74,69,61])
SV = sorted([100,89,82,77,71,66,61,54,48,40,31,18])
ERA = sorted([3.497,3.651,3.754,3.834,3.909,4.041,4.116,4.192,4.276,4.375,4.525],reverse=True)
WHIP = sorted([1.131,1.164,1.184,1.200,1.214,1.228,1.242,1.256,1.270,1.285,1.303,1.329],reverse=True)


hitter_scores = {'R':R,'HR':HR,'RBI':RBI,'SB':SB,'AVG':AVG}
pitcher_scores = {'K':K,'W':W,'SV':SV,'ERA':ERA,'WHIP':WHIP}

hitter_maxes = [max(hitter_scores['R']),max(hitter_scores['HR']),max(hitter_scores['RBI']),max(hitter_scores['SB']),max(hitter_scores['AVG'])]
pitcher_maxes = [max(pitcher_scores['W']),max(pitcher_scores['SV']),max(pitcher_scores['K']),min(pitcher_scores['ERA']),min(pitcher_scores['WHIP'])]

hitter_mins = [hitter_scores['R'][5],hitter_scores['HR'][5],hitter_scores['RBI'][5],hitter_scores['SB'][5],hitter_scores['AVG'][5]]
pitcher_mins = [pitcher_scores['W'][5],pitcher_scores['SV'][5],pitcher_scores['K'][5],pitcher_scores['ERA'][5],pitcher_scores['WHIP'][5]]

hitter_mins

[1046, 324, 1007, 103, 0.266]

## Import data

In [3]:
def get_hitters():
    hitters = pd.read_csv('razzball-hitters.csv', index_col='#', usecols=['#','Name','Team','ESPN','R','HR', 'RBI', 'SB','AVG','AB','H'])
    hitters.rename_axis('Razzball_Rank', inplace=True)
    hitters.reset_index(inplace=True)

    # sort and rank
    for metric in HitterMetrics:
        hitters.sort_values(by=[metric],inplace=True, ascending=False)
        hitters.reset_index(inplace=True, drop=True)
        hitters.index.rename('{} rank'.format(metric), inplace=True)
        hitters.reset_index(inplace=True)
    hitters['Ovr'] = (hitters['AVG rank'] + hitters['SB rank'] + hitters['RBI rank'] + hitters['HR rank'] + hitters['R rank']) / 5
    #hitters['Ovr'] = (hitters['Ovr'] + hitters['Razzball_Rank']) / 2
    hitters.rename(columns={'ESPN':'POS'}, inplace=True)
    
    hitters = hitters.assign(POS=hitters.POS.str.split('/')).explode('POS')
    hitters.sort_values(by=['Ovr'],inplace=True,ascending=True)
    return hitters

def get_starting_pitchers():
    pitchers = pd.read_csv('razzball-pitchers.csv', index_col='#', usecols=['#','Name','Team','POS','W', 'SV', 'K', 'ERA', 'WHIP','IP','BB','H', 'ER'])
    pitchers.rename_axis('Razzball_Rank', inplace=True)
    pitchers.reset_index(inplace=True)
    pitchers.rename(columns={'H':'Hits'}, inplace=True)
    
    pitchers = pitchers.assign(POS=pitchers.POS.str.split('/')).explode('POS')
    sp = pitchers[pitchers['POS'] == 'SP'].reset_index(drop=True)
    rp = pitchers[pitchers['POS'] == 'RP'].reset_index(drop=True)
    
    for metric in PitcherMetrics:
        if(metric != 'SV'):
            sp.sort_values(by=[metric],inplace=True, ascending=(metric=='WHIP' or metric=='ERA'))
            sp.reset_index(inplace=True, drop=True)
            sp.rename_axis('{} rank'.format(metric), inplace=True)
            sp.reset_index(inplace=True)
    
    sp['Ovr'] = (sp['W rank'] + sp['K rank'] + sp['ERA rank'] + sp['WHIP rank']) / 4
    sp.sort_values(by=['Ovr'],inplace=True,ascending=True)
    
    return sp

def get_closing_pitchers():
    pitchers = pd.read_csv('razzball-pitchers.csv', index_col='#', usecols=['#','Name','Team','POS','W', 'SV', 'K', 'ERA', 'WHIP','IP','BB','H', 'ER'])
    pitchers.rename_axis('Razzball_Rank', inplace=True)
    pitchers.reset_index(inplace=True)
    pitchers.rename(columns={'H':'Hits'}, inplace=True)
    
    pitchers = pitchers.assign(POS=pitchers.POS.str.split('/')).explode('POS')
    rp = pitchers[pitchers['POS'] == 'RP'].reset_index(drop=True)
    
    for metric in PitcherMetrics:
        if(metric != 'W'):
            rp.sort_values(by=[metric],inplace=True, ascending=(metric=='WHIP' or metric=='ERA'))
            rp.reset_index(inplace=True, drop=True)
            rp.rename_axis('{} rank'.format(metric), inplace=True)
            rp.reset_index(inplace=True)
    
    rp['Ovr'] = (rp['SV rank'] + rp['K rank'] + rp['ERA rank'] + rp['WHIP rank']) / 4
    rp.sort_values(by=['Ovr'],inplace=True,ascending=True)
    
    return rp
    
def get_hitter_prices(hitters):
    prices = pd.read_csv('razzball-hitters-prices.csv', index_col='#', usecols=['#', 'Name', 'Team', '5×5 $', '$R', '$HR', '$RBI', '$SB', '$AVG (no OBP)'])
    prices.rename(columns={'5×5 $': '$'},inplace=True)
    prices['$'] = prices['$'].apply(lambda x: 1 if x <=1 else x)
    hitters = hitters.merge(prices, left_on=['Name', 'Team'], right_on=['Name','Team'], how='left')
    return hitters

def get_pitcher_prices(pitchers):
    prices = pd.read_csv('razzball-pitchers-prices.csv', index_col='#', usecols=['#','Name','Team','5×5 $','$W (no QS)','$SV (no HLD)','$K','$WHIP','$ERA'])
    prices.rename(columns={'5×5 $': '$'},inplace=True)
    prices['$'] = prices['$'].apply(lambda x: 1 if x <=1 else x)
    pitchers = pitchers.merge(prices, left_on=['Name', 'Team'], right_on=['Name','Team'], how='left')
    return pitchers
    
def split_by_pos(hitters, pos):
    if (pos == 'C'):
        return hitters[hitters['POS'] == 'C']
    elif (pos == '1B'):
        return hitters[hitters['POS'] == '1B']
    elif (pos == '2B'):
        return hitters[hitters['POS'] == '2B']
    elif (pos == 'SS'):
        return hitters[hitters['POS'] == 'SS']
    elif (pos == '3B'):
        return hitters[hitters['POS'] == '3B']
    elif (pos == 'OF'):
        return hitters[hitters['POS'] == 'OF']
    elif (pos == 'corner'):
        return hitters[hitters['POS'] == '1B'].append(hitters[hitters['POS'] == '3B'])
    elif (pos == 'inner'):
        return hitters[hitters['POS'] == '2B'].append(hitters[hitters['POS'] == 'SS'])
    elif (pos == 'SP'):
        return hitters[hitters['POS'] == 'SP']
    elif (pos == 'RP'):
        return hitters[hitters['POS'] == 'RP']
    else: # util
        return hitters
        

## Run calculations

In [4]:
def group_players(players):
    bandwidth = estimate_bandwidth(players, quantile=0.05, n_samples=100)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    labels = ms.fit_predict(players)
    #labels = ms.labels_
    cluster_centers = ms.cluster_centers_

    #labels_unique = np.unique(labels)
    #n_clusters_ = len(labels_unique)

    #print("number of estimated clusters : %d" % n_clusters_)
    players['group'] = labels
    return players

In [5]:
def group_pitchers_pos(pitchers):
    return [group_players(split_by_pos(pitchers, x).drop(['W rank', 'SV rank', 'K rank', 'ERA rank', 'WHIP rank', 'Razzball_Rank','Team','POS'], axis=1)) for x in PitcherPositions]

def group_by_pos(hitters):
    return [group_players(split_by_pos(hitters, x).drop(['AVG rank', 'SB rank', 'RBI rank', 'HR rank', 'R rank', 'Razzball_Rank','Team','POS'], axis=1)) for x in HitterPositions]

### Prep dataframes

In [6]:
hitter_tables = { 'C': "",
                  '1B': "",
                  '2B': "",
                  'SS': "",
                  '3B': "",
                  'inner': "",
                  'outer': "",
                  'OF': "",
                  'util': "" }

pitcher_tables = { 'SP': "",
                   'RP': "" }

In [7]:
hitters = get_hitters().set_index('Name')
groups = []
#C, 1B, 2B, SS, 3B, OF, outer, inner, util
#groups = group_by_pos(hitters)
groups = [split_by_pos(hitters, x) for x in HitterPositions]
#hitters['group'] = group_players(hitters.drop(['AVG rank', 'SB rank', 'RBI rank', 'HR rank', 'R rank', 'Razzball_Rank','Team','ESPN'], axis=1))
for i in range(len(groups)):
    groups[i] = groups[i].sort_values(by=['Ovr'],ascending=True)
    hitter_tables[HitterPositions[i]] = groups[i]
    #groups[i].to_csv('{}.csv'.format(HitterPositions[i]))

pitchers = get_pitchers().set_index('Name')
#pitchers
pitchers.sort_values(by=['Razzball_Rank'],inplace=True,ascending=True)
pitchers
#groups = group_pitchers_pos(pitchers)
#groups = [split_by_pos(pitchers, x) for x in PitcherPositions]
#for i in range (len(groups)):
#    groups[i] = groups[i].sort_values(by=['Ovr'],ascending=True)
#    pitcher_tables[PitcherPositions[i]] = groups[i]
    
#pitcher_tables['RP']


NameError: name 'get_pitchers' is not defined

In [9]:
def find_min():
    # 20 1B, 20 2B, 20 SS, 20 3B
    # 70 OF, 100 SP, 45 RP
    # = 50,400,000,000 iterations
    
    # 18 1B, 18 2B, 18 SS, 18 3B
    # 70 OF, 80 SP, 40 RP
    # = 23,514,624,000 iterations
    prob = LpProblem("IdealRosterProblem",LpMinimize)
    
    all_players = get_all_players()
    
    all_names = list(all_players.index)
    name_list = list(all_players['Name'])
    allCosts = dict(zip(all_names,all_players['$']))
    
    allRuns = dict(zip(all_names,all_players['R rank']))
    allHRs = dict(zip(all_names,all_players['HR rank']))
    allRBIs = dict(zip(all_names,all_players['RBI rank']))
    allSBs = dict(zip(all_names,all_players['SB rank']))
    allAVG = dict(zip(all_names,all_players['AVG rank']))
    
    allWs = dict(zip(all_names,all_players['W rank']))
    allKs = dict(zip(all_names,all_players['K rank']))
    allSVs = dict(zip(all_names,all_players['SV rank']))
    allERA = dict(zip(all_names,all_players['ERA rank']))
    allWHIP = dict(zip(all_names,all_players['WHIP rank']))
    allPOS = dict(zip(all_names,all_players['POS']))
    
    player_vars = LpVariable.dicts("Player",all_names,lowBound=0,upBound=1,cat='Integer')
    player_chosen = LpVariable.dicts("Chosen",name_list,0,1,cat='Integer')
    
    print("all plrs: ", len(all_players.index))
    print('vars: ', len(player_vars))
    print('nms: ', len(all_names))
    
    prob += lpSum([(allRuns[i]+allHRs[i]+allRBIs[i]+allSBs[i]+allAVG[i]+allWs[i]+allKs[i]+allSVs[i]+allERA[i]+allWHIP[i])*player_vars[i] for i in all_names])
    
    prob += lpSum([allCosts[i]*player_vars[i] for i in all_names]) <= 260
    
    prob += lpSum([(allPOS[i]=='C')*player_vars[i] for i in all_names]) == 1
    
    # update these based on position depth
    prob += lpSum([(allPOS[i]=='1B')*player_vars[i] for i in all_names]) == 1
    prob += lpSum([(allPOS[i]=='2B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='3B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='SS')*player_vars[i] for i in all_names]) == 2
    
    prob += lpSum([(allPOS[i]=='OF')*player_vars[i] for i in all_names]) == 6
    
    prob += lpSum([(allPOS[i]=='SP')*player_vars[i] for i in all_names]) == 8
    
    prob += lpSum([(allPOS[i]=='RP')*player_vars[i] for i in all_names]) == 3
    
    prob += lpSum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0
    
    
    # ensure no player is selected twice
    #sum(isEntryUsed[x] for x in range(len(entries)) if entries[x].name == name) <= 1
    #prob += lpSum([all_players.iloc[int(player_vars[i].name.split('_')[1])] for i in all_names]) == 1
    #prob += lpSum([all_players.iloc[i] for i in all_names if player_vars[i] == 1]) == 25
    for f in all_names:
        prob += player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.1
        prob += player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e5
    
    prob += lpSum(player_chosen)==25
    
    
    prob.solve()
    Team = pd.DataFrame()
    print("Status:", LpStatus[prob.status])
    #print(prob.variables())
    for v in prob.variables():
        if v.varValue>0 and not 'Chosen' in v.name:
            #print(v.name, "=", v.varValue)
            #print(all_players.index)
            #print(v.name.split('_')[1])
            #print(all_players.loc[int(v.name.split('_')[1])])
            Team = Team.append(all_players.loc[int(v.name.split('_')[1])])
            
            #print(all_players.loc[nm[1] + " " + nm[2]])
    return Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True)
            

def find_max():
    prob = LpProblem("IdealRosterProblem",LpMaximize)
    
    #all_players = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr').append(get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr'), ignore_index=True).append(get_pitcher_prices(get_closing_pitchers()).nsmallest(30,'Ovr'), ignore_index=True)
    #all_players.rename_axis('idx', inplace=True)
    
    #all_players.fillna(0, inplace=True)
    
    #norm_players = all_players[['R', 'HR', 'RBI', 'SB', 'AVG', 'W', 'SV', 'K', 'ERA', 'WHIP']]
    #norm_players = pd.DataFrame(preprocessing.normalize(norm_players),columns=['R', 'HR', 'RBI', 'SB', 'AVG', 'W', 'SV', 'K', 'ERA', 'WHIP'])
    
    
    #return(all_players[['normR', 'normHR', 'normRBI', 'normSB', 'normAVG', 'normW', 'normSV', 'normK', 'normERA', 'normWHIP']])
    all_players = get_all_players()
    
    
    all_names = list(all_players.index)
    name_list = list(all_players['Name'])
    allCosts = dict(zip(all_names,all_players['$']))
    
    #total_R = sum(all_players['R'])
    #total_HR = sum(all_players['HR'])
    #total_RBI = sum(all_players['RBI'])
    #total_SB = sum(all_players['SB'])
    #total_H = sum(all_players['H'])
    #total_AB = sum(all_players['AB'])
    
    total_R = np.mean(all_players['R'])
    total_HR = np.mean(all_players['HR'])
    total_RBI = np.mean(all_players['RBI'])
    total_SB = np.mean(all_players['SB'])
    total_H = np.mean(all_players['H'])
    total_AB = np.mean(all_players['AB'])
    
    
    #all_players['AdjAVG'] = all_players['AVG'].apply(lambda x: 0 if x == 0 else 1/x)
    
    allRuns = dict(zip(all_names,all_players['normR']))
    allHRs = dict(zip(all_names,all_players['normHR']))
    allRBIs = dict(zip(all_names,all_players['normRBI']))
    allSBs = dict(zip(all_names,all_players['normSB']))
    allAVG = dict(zip(all_names,all_players['normAVG']))
    #allH = dict(zip(all_names,all_players['normH']))
    #allAB = dict(zip(all_names,all_players['normAB']))
    #allAdjAVG = dict(zip(all_names,all_players['AVG']/max(all_players['AVG'])))
    
    #total_W = sum(all_players['W'])
    #total_K = sum(all_players['K'])
    #total_SV = sum(all_players['SV'])
    #total_IP = sum(all_players['IP'])
    #total_ER = sum(all_players['ER'])
    #total_BB = sum(all_players['BB'])
    #total_Hits = sum(all_players['Hits'])
    
    total_W = np.mean(all_players['W'])
    total_K = np.mean(all_players['K'])
    total_SV = np.mean(all_players['SV'])
    total_IP = np.mean(all_players['IP'])
    total_ER = np.mean(all_players['ER'])
    total_BB = np.mean(all_players['BB'])
    total_Hits = np.mean(all_players['Hits'])
    
    #all_players['AdjER'] = 0 if all_players['ER'] == 0 else 1/all_players['ER']
    all_players['AdjER'] = all_players['ER'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AdjBB'] = all_players['BB'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AdjHits'] = all_players['Hits'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AdjERA'] = all_players['normERA'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AdjWHIP'] = all_players['normWHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    
    allWs = dict(zip(all_names,all_players['normW']))
    allKs = dict(zip(all_names,all_players['normK']))
    allSVs = dict(zip(all_names,all_players['normSV']))
    allERA = dict(zip(all_names,all_players['normERA']))
    allWHIP = dict(zip(all_names,all_players['normWHIP']))
    allPOS = dict(zip(all_names,all_players['POS']))
    allIP = dict(zip(all_names,all_players['IP']/total_IP))
    allER = dict(zip(all_names,all_players['AdjER']))
    allBB = dict(zip(all_names,all_players['AdjBB']))
    allHits = dict(zip(all_names,all_players['AdjHits']))
    allAdjERA = dict(zip(all_names,all_players['AdjERA']))
    allAdjWHIP = dict(zip(all_names,all_players['AdjWHIP']))
    
    
    player_vars = LpVariable.dicts("Player",all_names,lowBound=0,upBound=1,cat='Integer')
    player_chosen = LpVariable.dicts("Chosen",name_list,0,1,cat='Integer')
    
    print("all plrs: ", len(all_players.index))
    print('vars: ', len(player_vars))
    print('nms: ', len(all_names))
    
    prob += lpSum([allRuns[i]*player_vars[i] for i in all_names])
    prob += lpSum([allHRs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allRBIs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allSBs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allAVG[i]*player_vars[i] for i in all_names])
    prob += lpSum([allWs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allKs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allSVs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allAdjERA[i]*player_vars[i] for i in all_names])
    prob += lpSum([allAdjWHIP[i]*player_vars[i] for i in all_names])
    
    #prob += lpSum(score({'R':[allRuns[i]*player_vars[i] for i in all_names]}))
    
    
    prob += lpSum([allCosts[i]*player_vars[i] for i in all_names]) <= 260
    
    prob += lpSum([(allPOS[i]=='C')*player_vars[i] for i in all_names]) == 1
    
    # update these based on position depth
    prob += lpSum([(allPOS[i]=='1B')*player_vars[i] for i in all_names]) == 1
    prob += lpSum([(allPOS[i]=='2B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='3B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='SS')*player_vars[i] for i in all_names]) == 2
    
    prob += lpSum([(allPOS[i]=='OF')*player_vars[i] for i in all_names]) == 6
    
    prob += lpSum([(allPOS[i]=='SP')*player_vars[i] for i in all_names]) == 8
    
    prob += lpSum([(allPOS[i]=='RP')*player_vars[i] for i in all_names]) == 3
    prob += lpSum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0
    
    
    # ensure no player is selected twice
    #sum(isEntryUsed[x] for x in range(len(entries)) if entries[x].name == name) <= 1
    #prob += lpSum([all_players.iloc[int(player_vars[i].name.split('_')[1])] for i in all_names]) == 1
    #prob += lpSum([all_players.iloc[i] for i in all_names if player_vars[i] == 1]) == 25
    for f in all_names:
        prob += player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.1
        prob += player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e5
    
    prob += lpSum(player_chosen)==25

    LpSolverDefault.msg = 1 
    prob.solve()
    Team = pd.DataFrame()
    print("Status:", LpStatus[prob.status])
    for v in prob.variables():
        if v.varValue>0 and not 'Chosen' in v.name:
            #print(v.name, "=", v.varValue)
            #print(all_players.index)
            #print(v.name.split('_')[1])
            #print(all_players.loc[int(v.name.split('_')[1])])
            Team = Team.append(all_players.loc[int(v.name.split('_')[1])])
            
            #print(all_players.loc[nm[1] + " " + nm[2]])
    return Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True)
 

def find_both():
    prob = LpProblem("IdealRosterProblem",LpMaximize)
    
    all_players = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr').append(get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr'), ignore_index=True).append(get_pitcher_prices(get_closing_pitchers()).nsmallest(30,'Ovr'), ignore_index=True)
    all_players.rename_axis('idx', inplace=True)
    
    all_players.fillna(0, inplace=True)
    
    all_names = list(all_players.index)
    name_list = list(all_players['Name'])
    allCosts = dict(zip(all_names,all_players['$']))
    
    total_R = sum(all_players['R'])
    total_HR = sum(all_players['HR'])
    total_RBI = sum(all_players['RBI'])
    total_SB = sum(all_players['SB'])
    total_H = sum(all_players['H'])
    total_AB = sum(all_players['AB'])
    #all_players['AdjAVG'] = all_players['AVG'].apply(lambda x: 0 if x == 0 else 1/x)
    
    allRuns = dict(zip(all_names,all_players['R']/total_R))
    allHRs = dict(zip(all_names,all_players['HR']/total_HR))
    allRBIs = dict(zip(all_names,all_players['RBI']/total_RBI))
    allSBs = dict(zip(all_names,all_players['SB']/total_SB))
    allAVG = dict(zip(all_names,all_players['AVG']))
    allH = dict(zip(all_names,all_players['H']/total_H))
    allAB = dict(zip(all_names,all_players['AB']/total_AB))
    allAdjAVG = dict(zip(all_names,all_players['AVG']/max(all_players['AVG'])))
    
    total_W = sum(all_players['W'])
    total_K = sum(all_players['K'])
    total_SV = sum(all_players['SV'])
    total_IP = sum(all_players['IP'])
    total_ER = sum(all_players['ER'])
    total_BB = sum(all_players['BB'])
    total_Hits = sum(all_players['Hits'])
    
    #all_players['AdjER'] = 0 if all_players['ER'] == 0 else 1/all_players['ER']
    all_players['AdjER'] = all_players['ER'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AdjBB'] = all_players['BB'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AdjHits'] = all_players['Hits'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AdjERA'] = all_players['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AdjWHIP'] = all_players['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    
    allWs = dict(zip(all_names,all_players['W']/total_W))
    allKs = dict(zip(all_names,all_players['K']/total_K))
    allSVs = dict(zip(all_names,all_players['SV']/total_SV))
    allERA = dict(zip(all_names,all_players['ERA']))
    allWHIP = dict(zip(all_names,all_players['WHIP']))
    allPOS = dict(zip(all_names,all_players['POS']))
    allIP = dict(zip(all_names,all_players['IP']/total_IP))
    allER = dict(zip(all_names,all_players['AdjER']))
    allBB = dict(zip(all_names,all_players['AdjBB']))
    allHits = dict(zip(all_names,all_players['AdjHits']))
    allAdjERA = dict(zip(all_names,all_players['AdjERA']))
    allAdjWHIP = dict(zip(all_names,all_players['AdjWHIP']))
    
    allRunsRank = dict(zip(all_names,all_players['R rank'].apply(lambda x: 1 if x == 0 else 1/x)))
    allHRsRank = dict(zip(all_names,all_players['HR rank'].apply(lambda x: 1 if x == 0 else 1/x)))
    allRBIsRank = dict(zip(all_names,all_players['RBI rank'].apply(lambda x: 1 if x == 0 else 1/x)))
    allSBsRank = dict(zip(all_names,all_players['SB rank'].apply(lambda x: 1 if x == 0 else 1/x)))
    allAVGRank = dict(zip(all_names,all_players['AVG rank'].apply(lambda x: 1 if x == 0 else 1/x)))
    
    allWsRank = dict(zip(all_names,all_players['W rank'].apply(lambda x: 1 if x == 0 else 1/x)))
    allKsRank = dict(zip(all_names,all_players['K rank'].apply(lambda x: 1 if x == 0 else 1/x)))
    allSVsRank = dict(zip(all_names,all_players['SV rank'].apply(lambda x: 1 if x == 0 else 1/x)))
    allERARank = dict(zip(all_names,all_players['ERA rank'].apply(lambda x: 1 if x == 0 else 1/x)))
    allWHIPRank = dict(zip(all_names,all_players['WHIP rank'].apply(lambda x: 1 if x == 0 else 1/x)))
    
    player_vars = LpVariable.dicts("Player",all_names,lowBound=0,upBound=1,cat='Integer')
    player_chosen = LpVariable.dicts("Chosen",name_list,0,1,cat='Integer')
    
    print("all plrs: ", len(all_players.index))
    print('vars: ', len(player_vars))
    print('nms: ', len(all_names))
    
    prob += lpSum([(allRuns[i]+allHRs[i]+allRBIs[i]+allSBs[i]+allAdjAVG[i]+allWs[i]+allKs[i]+allSVs[i]+allAdjERA[i]+allAdjWHIP[i])*player_vars[i] for i in all_names])
    
    prob += lpSum([(allRunsRank[i]+allSBsRank[i]+allAVGRank[i]+allERARank[i]+allWHIPRank[i])*player_vars[i] for i in all_names])

    
    prob += lpSum([allCosts[i]*player_vars[i] for i in all_names]) <= 260
    
    prob += lpSum([(allPOS[i]=='C')*player_vars[i] for i in all_names]) == 1
    
    # update these based on position depth
    prob += lpSum([(allPOS[i]=='1B')*player_vars[i] for i in all_names]) == 1
    prob += lpSum([(allPOS[i]=='2B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='3B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='SS')*player_vars[i] for i in all_names]) == 2
    
    prob += lpSum([(allPOS[i]=='OF')*player_vars[i] for i in all_names]) == 6
    
    prob += lpSum([(allPOS[i]=='SP')*player_vars[i] for i in all_names]) == 8
    
    prob += lpSum([(allPOS[i]=='RP')*player_vars[i] for i in all_names]) == 3
    prob += lpSum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0
    
    
    # ensure no player is selected twice
    #sum(isEntryUsed[x] for x in range(len(entries)) if entries[x].name == name) <= 1
    #prob += lpSum([all_players.iloc[int(player_vars[i].name.split('_')[1])] for i in all_names]) == 1
    #prob += lpSum([all_players.iloc[i] for i in all_names if player_vars[i] == 1]) == 25
    for f in all_names:
        prob += player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.1
        prob += player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e5
    
    prob += lpSum(player_chosen)==25
    
    LpSolverDefault.msg = 1 
    prob.solve()
    Team = pd.DataFrame()
    print("Status:", LpStatus[prob.status])
    for v in prob.variables():
        if 'dummy' not in v.name and v.varValue>0 and not 'Chosen' in v.name:
            #print(v.name, "=", v.varValue)
            #print(all_players.index)
            #print(v.name.split('_')[1])
            #print(all_players.loc[int(v.name.split('_')[1])])
            Team = Team.append(all_players.loc[int(v.name.split('_')[1])])
            
            #print(all_players.loc[nm[1] + " " + nm[2]])
    return Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True)
 
Team_max = find_max()
#Team_max[['Name','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']]
#print('-- Name --|-- Pos --|-- Avg --|-- R --|-- RBI --|-- HR --|-- SB --|-- WHIP --|-- ERA --|-- W --|-- SV --|-- K --|--$--')
#for i in range(len(Team_max)):
#    print(Team_max.iloc[i]['Name'],Team_max.iloc[i]['POS'],Team_max.iloc[i]['AVG'],Team_max.iloc[i]['R'],Team_max.iloc[i]['RBI'],Team_max.iloc[i]['HR'],Team_max.iloc[i]['SB'],Team_max.iloc[i]['WHIP'],Team_max.iloc[i]['ERA'],Team_max.iloc[i]['W'],Team_max.iloc[i]['SV'],Team_max.iloc[i]['K'],Team_max.iloc[i]['$'])
    
#print('Total','--   --',sum(Team_max['H'])/sum(Team_max['AB']),sum(Team_max['R']),sum(Team_max['RBI']),sum_max(Team['HR']),sum(Team_max['SB']),(sum(Team_max['BB'])+sum(Team_max['Hits']))/sum(Team_max['IP']),(sum(Team_max['ER'])/sum(Team_max['IP']))*9,sum(Team_max['W']),sum(Team_max['SV']),sum(Team_max['K']),sum(Team_max['$']))
#print(Team_max['POS'].describe())
#Team_max[['Name','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']]

Team_min = find_min()
#print(Team_max['POS'].describe())
#Team_min[['Name','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']]
#print(Team.columns)
#print('-- Name --|-- Pos --|-- Avg --|-- R --|-- RBI --|-- HR --|-- SB --|-- WHIP --|-- ERA --|-- W --|-- SV --|-- K --|--$--')
#for i in range(len(Team_min)):
#    print(Team_min.iloc[i]['Name'],Team_min.iloc[i]['POS'],Team_min.iloc[i]['AVG'],Team_min.iloc[i]['R'],Team_min.iloc[i]['RBI'],Team_min.iloc[i]['HR'],Team_min.iloc[i]['SB'],Team_min.iloc[i]['WHIP'],Team_min.iloc[i]['ERA'],Team_min.iloc[i]['W'],Team_min.iloc[i]['SV'],Team_min.iloc[i]['K'],Team_min.iloc[i]['$'])
    
#print('Total','--   --',sum(Team_min['H'])/sum(Team_min['AB']),sum(Team_min['R']),sum(Team_min['RBI']),sum(Team_min['HR']),sum(Team_min['SB']),(sum(Team_min['BB'])+sum(Team_min['Hits']))/sum(Team_min['IP']),(sum(Team_min['ER'])/sum(Team_min['IP']))*9,sum(Team_min['W']),sum(Team_min['SV']),sum(Team_min['K']),sum(Team_min['$']))
Max_Tot = Team_max[Team_max['Name'] == 'Total']
Min_Tot = Team_min[Team_min['Name'] == 'Total']
#Min_Tot
TeamMaxMin = Max_Tot[['R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']]-Min_Tot[['R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']]
#print(Max_Tot[['R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']])
#print(Min_Tot[['R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']])
Avg = find_both()
Avg_Tot = Avg[Avg['Name'] == 'Total']
TeamAvgMin = Avg_Tot[['R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']] - Min_Tot[['R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']]

TeamAvgMax = Avg_Tot[['R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']] - Max_Tot[['R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']]

#Tot = Max_Tot.append(Min_Tot).append(Avg_Tot)
#Tot[['R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP']]

print('Max',get_score(Max_Tot.loc[25]))
print('Min',get_score(Min_Tot.loc[25]))
print('Avg',get_score(Avg_Tot.loc[25]))
#Min_Tot[['R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$']]
Team_max[['Name','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$']]




NameError: name 'LpProblem' is not defined

In [11]:
all_players = get_hitters().set_index('Name').nsmallest(200,'Ovr').append(get_pitchers().set_index('Name').nsmallest(180,'Ovr'))
all_players.sort_values(by=['Ovr'], inplace=True)
all_players.drop(['AVG','R','RBI','HR','SB','WHIP','ERA','W','SV','K'], axis=1, inplace=True)
all_players

NameError: name 'get_pitchers' is not defined

In [34]:
def get_score(tm):
    score = {'R': 0,'HR':0,'RBI':0,'SB':0,'AVG':0,'K':0,'W':0,'SV':0,'ERA':0,'WHIP':0,'Total':0}
    R = sorted([1174,1136,1067,1006,1241,1110,974,997,1159,966,898])
    HR = sorted([433,352,353,284,382,321,291,332,355,302,260])
    RBI = sorted([1198,1088,1077,1030,1147,1016,955,1000,1075,905,897])
    SB = sorted([113,141,97,106,94,110,127,121,123,73,72])
    AVG = sorted([0.2735,0.2592,0.2740,0.2642,0.2768,0.2710,0.2620,0.2601,0.2705,0.2645,0.2641])
    K = sorted([1643,1531,1788,1598,1330,1387,1480,1725,1132,1391,1336])
    W = sorted([97,98,109,112,80,93,85,99,64,84,74])
    SV = sorted([59,73,3,55,115,79,105,42,39,59,54])
    ERA = sorted([3.907, 3.898,4.144,3.665,4.444,4.107,3.760,4.217,3.493,4.112,4.616],reverse=True)
    WHIP = sorted([1.216,1.244,1.262,1.102,1.339,1.247,1.210,1.291,1.131,1.267,1.284],reverse=True)
    
    hitter_scores = {'R':R,'HR':HR,'RBI':RBI,'SB':SB,'AVG':AVG}
    pitcher_scores = {'K':K,'W':W,'SV':SV,'ERA':ERA,'WHIP':WHIP}
    
    for metric in HitterMetrics:
        for s in range(len(hitter_scores[metric])):
            if(tm[metric] < hitter_scores[metric][s]):
                score[metric] = s+1
                score['Total'] += s+1
                break
        if (score[metric] == 0):
            score[metric] = 12
            score['Total'] += 12
    
    for metric in PitcherMetrics:
        if (metric != 'ERA') and (metric != 'WHIP'):
            for s in range(len(pitcher_scores[metric])):
                if(tm[metric] < pitcher_scores[metric][s]):
                    score[metric] = s+1
                    score['Total'] += s+1
                    break
        else:
            for s in range(len(pitcher_scores[metric])):
                if(tm[metric] > pitcher_scores[metric][s]):
                    score[metric] = s+1
                    score['Total'] += s+1
                    break
        if (score[metric] == 0):
            score[metric] = 12
            score['Total'] += 12
            
    print(score)
    return score

In [13]:
get_all_players()[['normR']]

NameError: name 'get_all_players' is not defined

In [10]:
def score(tm,metric):
    R = sorted([1183,1145,1125,1100,1082,1064,1046,1028,1006,980,949,893])
    HR = sorted([383,366,355,345,338,328,324,314,305,296,282,261])
    RBI = sorted([1137,1104,1079,1058,1041,1023,1007,987,964,942,910,855])
    SB = sorted([152,138,128,121,114,108,103,96,90,84,76,64])
    AVG = sorted([0.278,0.274,0.272,0.270,0.269,0.267,0.266,0.264,0.262,0.260,0.258,0.254])
    
    K = sorted([1622,1557,1509,1470,1438,1404,1370,1332,1290,1245,1182,1070])
    W = sorted([103,97,94,91,88,85,83,80,77,74,69,61])
    SV = sorted([100,89,82,77,71,66,61,54,48,40,31,18])
    ERA = sorted([3.497,3.651,3.754,3.834,3.909,4.041,4.116,4.192,4.276,4.375,4.525],reverse=True)
    WHIP = sorted([1.131,1.164,1.184,1.200,1.214,1.228,1.242,1.256,1.270,1.285,1.303,1.329],reverse=True)
    
    hitter_scores = {'R':R,'HR':HR,'RBI':RBI,'SB':SB,'AVG':AVG}
    pitcher_scores = {'K':K,'W':W,'SV':SV,'ERA':ERA,'WHIP':WHIP}
    tm_sc = sum(tm)
    if metric in HitterMetrics:
        for s in range(len(hitter_scores[metric])):
            if(tm_sc < hitter_scores[metric][s]):
                score += s+1
                break
            if(s == len(hitter_scores[metric])):
                score += 12
    
    elif metric in PitcherMetrics:
        if (metric != 'ERA') and (metric != 'WHIP'):
            for s in range(len(pitcher_scores[metric])):
                if(tm_sc < pitcher_scores[metric][s]):
                    score += s+1
                    break
                if(s == len(pitcher_scores[metric])):
                    score += 12
        else:
            for s in range(len(pitcher_scores[metric])):
                if(tm_sc > pitcher_scores[metric][s]):
                    score += s+1
                    break
                if(s == len(pitcher_scores[metric])):
                    score += 12
    print(score)
    return score 

In [11]:
def get_all_players():
    scaler = preprocessing.StandardScaler(with_std=False)
    
    
    
    hitter_avg_maxes = [(hitter_maxes[i]/14) for i in range(len(hitter_maxes)-1)]
    hitter_avg_maxes.append(hitter_maxes[-1])
    pitcher_avg_maxes = [pitcher_maxes[i]/11 for i in range(len(pitcher_maxes)-2)]
    pitcher_avg_maxes.append(1/pitcher_maxes[-2])
    pitcher_avg_maxes.append(1/pitcher_maxes[-1])
    
    
    hitter_avg_mins = [(hitter_mins[i]/14) for i in range(len(hitter_mins)-1)]
    hitter_avg_mins.append(hitter_mins[-1])
    pitcher_avg_mins = [pitcher_mins[i]/11 for i in range(len(pitcher_mins)-2)]
    pitcher_avg_mins.append(1/pitcher_mins[-2])
    pitcher_avg_mins.append(1/pitcher_mins[-1])
    #print(pitcher_avg_mins)
    
    
    hitters = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr')
    norm_hitters = hitters[['R', 'HR', 'RBI', 'SB', 'AVG']]
    norm_hitters = pd.DataFrame(scaler.fit_transform(norm_hitters),columns=['R', 'HR', 'RBI', 'SB', 'AVG'])
    minH = math.inf
    maxH = 0
    for h in HitterMetrics:
        hitters['norm{}'.format(h)] = norm_hitters[h]
        if(max(norm_hitters[h])>maxH):
            maxH = max(norm_hitters[h])
        if(min(norm_hitters[h]<minH)):
            minH = min(norm_hitters[h])
    avg_max_ary = np.array(hitter_avg_maxes).reshape(1,-1)
    avg_min_ary = np.array(hitter_avg_mins).reshape(1,-1)
    hitter_avg_maxes = scaler.transform(avg_max_ary)
    hitterer_avg_mins = scaler.transform(avg_min_ary)
    
    
    scaler2 = preprocessing.MinMaxScaler((minH,maxH))
    starters = get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr')
    starters['SV']=np.NaN
    norm_starters = starters[['W']]
    norm_starters = pd.DataFrame(scaler.fit_transform(norm_starters),columns=['W'])
    starters['normW'] = norm_starters['W']
    
    win_max_ary = np.array(pitcher_avg_maxes[0]).reshape(1,-1)
    win_min_ary = np.array(pitcher_avg_mins[0]).reshape(1,-1)
    pitcher_temp = scaler.transform(win_max_ary)
    min_temp = scaler.transform(win_min_ary)
    #print(pitcher_temp)
        
    closers = get_pitcher_prices(get_closing_pitchers()).nsmallest(30,'SV rank')
    closers['W']=np.NaN
    norm_closers = closers[['SV']]
    norm_closers = pd.DataFrame(scaler.fit_transform(norm_closers),columns=['SV'])
    closers['normSV'] = norm_closers['SV']
    
    sv_max_ary = np.array(pitcher_avg_maxes[1]).reshape(1,-1)
    sv_min_ary = np.array(pitcher_avg_mins[1]).reshape(1,-1)
    pitcher_temp = np.append(pitcher_temp, scaler.transform(sv_max_ary))
    min_temp = np.append(min_temp,scaler.transform(sv_min_ary))
    #print(pitcher_temp)
        
    
    all_pitchers = starters.append(closers, ignore_index=True)
    all_pitchers['ERA'] = all_pitchers['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    all_pitchers['WHIP'] = all_pitchers['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    
    norm_pitchers = all_pitchers[['K', 'ERA', 'WHIP']]
    norm_pitchers = pd.DataFrame(scaler.fit_transform(norm_pitchers),columns=['K', 'ERA', 'WHIP'])
    for p in ['K', 'ERA', 'WHIP']:
        all_pitchers['norm{}'.format(p)] = norm_pitchers[p]
        
    pitcher_max_ary = np.array(pitcher_avg_maxes[2:5]).reshape(1,-1)
    pitcher_min_ary = np.array(pitcher_avg_mins[2:5]).reshape(1,-1)
    pitcher_temp = np.append(pitcher_temp,scaler.transform(pitcher_max_ary))
    min_temp = np.append(min_temp, scaler.transform(pitcher_min_ary))
    pitcher_avg_maxes = pitcher_temp
    pitcher_avg_mins = min_temp
    
    all_players = hitters.append(all_pitchers, ignore_index=True)
    all_players.rename_axis('idx', inplace=True)
    
    all_players['normW'].fillna(minH, inplace=True)
    all_players['normSV'].fillna(minH, inplace=True)
    all_players.fillna(0, inplace=True)
    return all_players,hitter_avg_maxes[0],pitcher_avg_maxes,hitter_avg_mins,pitcher_avg_mins

    
    
get_all_players()

print(get_all_players()[3])
print(get_all_players()[4])

[74.71428571428571, 23.142857142857142, 71.92857142857143, 7.357142857142857, 0.266]
[ 4.10962121e+00 -2.58787879e+00  7.59394545e+01  3.86459822e-03
  1.76110824e-02]


In [16]:
def find_max_three():
    prob = LpProblem("IdealRosterProblem",LpMaximize)
    scaler = preprocessing.StandardScaler(with_std=False)
    
    #all_players,hitter_avg_maxes,pitcher_avg_maxes,hitter_avg_mins,pitcher_avg_mins = get_all_players()
    all_players = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr').append(get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr'),ignore_index=True).append(get_pitcher_prices(get_closing_pitchers()).nsmallest(30,'SV rank'),ignore_index=True)
    all_players.fillna(0, inplace=True)
    all_players['ERA'] = all_players['ERA'].apply(lambda x: 0 if x == 0 else 2/x)
    all_players['WHIP'] = all_players['WHIP'].apply(lambda x: 0 if x == 0 else 2/x)
    norm_players = all_players[['R', 'HR', 'RBI', 'SB', 'AVG','W','SV','K','ERA','WHIP']]
    
    norm_players = pd.DataFrame(scaler.fit_transform(norm_players),columns=['R', 'HR', 'RBI', 'SB', 'AVG','W','SV','K', 'ERA', 'WHIP'])
    for h in HitterMetrics:
        all_players['norm{}'.format(h)] = norm_players[h]
    for p in PitcherMetrics:
        all_players['norm{}'.format(p)] = norm_players[p]
    
    
    
    all_names = list(all_players.index)
    name_list = list(all_players['Name'])
    allCosts = dict(zip(all_names,all_players['$']))
    
    print(all_players)
    print('------')
    print(all_names)
    
    allRuns = dict(zip(all_names,all_players['normR']))
    allHRs = dict(zip(all_names,all_players['normHR']))
    allRBIs = dict(zip(all_names,all_players['normRBI']))
    allSBs = dict(zip(all_names,all_players['normSB']))
    allAVG = dict(zip(all_names,all_players['normAVG']))

    #all_players['AdjERA'] = all_players['normERA'].apply(lambda x: 0 if x == 0 else 1/x)
    #all_players['AdjWHIP'] = all_players['normWHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    
    allWs = dict(zip(all_names,all_players['normW']))
    allKs = dict(zip(all_names,all_players['normK']))
    allSVs = dict(zip(all_names,all_players['normSV']))
    allERA = dict(zip(all_names,all_players['normERA']*0.9))
    allWHIP = dict(zip(all_names,all_players['normWHIP']*0.9))
    allPOS = dict(zip(all_names,all_players['POS']))

    #allAdjERA = dict(zip(all_names,all_players['AdjERA']))
    #allAdjWHIP = dict(zip(all_names,all_players['AdjWHIP']))
    
    
    player_vars = LpVariable.dicts("Player",all_names,lowBound=0,upBound=1,cat='Integer')
    player_chosen = LpVariable.dicts("Chosen",name_list,0,1,cat='Integer')
    
    print("all plrs: ", len(all_players.index))
    print('vars: ', len(player_vars))
    print('nms: ', len(all_names))
    
    prob += lpSum([allRuns[i]*player_vars[i] for i in all_names])
    prob += lpSum([allHRs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allRBIs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allSBs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allAVG[i]*player_vars[i] for i in all_names])
    prob += lpSum([allWs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allKs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allSVs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allERA[i]*player_vars[i] for i in all_names])
    prob += lpSum([allWHIP[i]*player_vars[i] for i in all_names])
    
    prob += lpSum([allCosts[i]*player_vars[i] for i in all_names])
    
    
#     prob += lpSum([allRuns[i]*player_vars[i] for i in all_names]) <= (hitter_avg_maxes[0]*14)
#     prob += lpSum([allHRs[i]*player_vars[i] for i in all_names]) <= (hitter_avg_maxes[1]*14)
#     prob += lpSum([allRBIs[i]*player_vars[i] for i in all_names]) <= (hitter_avg_maxes[2]*14)
#     prob += lpSum([allSBs[i]*player_vars[i] for i in all_names]) <= (hitter_avg_maxes[3]*14)
#     prob += lpSum([allAVG[i]*player_vars[i] for i in all_names]) <= (hitter_avg_maxes[4])
#     prob += lpSum([allWs[i]*player_vars[i] for i in all_names]) <= (pitcher_avg_maxes[0]*11)
#     prob += lpSum([allKs[i]*player_vars[i] for i in all_names]) <= (pitcher_avg_maxes[1]*11)
#     prob += lpSum([allSVs[i]*player_vars[i] for i in all_names]) <= (pitcher_avg_maxes[2]*11)
#     prob += lpSum([allERA[i]*player_vars[i] for i in all_names]) <= (pitcher_avg_maxes[3]*11)
#     prob += lpSum([allWHIP[i]*player_vars[i] for i in all_names]) <= (pitcher_avg_maxes[4]*11)
    
    
    #prob += lpSum([allRuns[i]*player_vars[i] for i in all_names]) >= (hitter_avg_mins[0]*14)
    #prob += lpSum([allHRs[i]*player_vars[i] for i in all_names]) >= (hitter_avg_mins[1]*14)
    #prob += lpSum([allRBIs[i]*player_vars[i] for i in all_names]) >= (hitter_avg_mins[2]*14)
    #prob += lpSum([allSBs[i]*player_vars[i] for i in all_names]) >= (hitter_avg_mins[3]*14)
    #prob += lpSum([allAVG[i]*player_vars[i] for i in all_names]) >= (hitter_avg_mins[4])
    #prob += lpSum([allWs[i]*player_vars[i] for i in all_names]) >= (pitcher_avg_mins[0]*11)
    #prob += lpSum([allKs[i]*player_vars[i] for i in all_names]) >= (pitcher_avg_mins[1]*11)
    #prob += lpSum([allSVs[i]*player_vars[i] for i in all_names]) >= (pitcher_avg_mins[2]*11)
    #prob += lpSum([allERA[i]*player_vars[i] for i in all_names]) >= (pitcher_avg_mins[3]*11)
    #prob += lpSum([allWHIP[i]*player_vars[i] for i in all_names]) >= (pitcher_avg_mins[4]*11)
    
    
    prob += lpSum([allCosts[i]*player_vars[i] for i in all_names]) <= 260
    
    prob += lpSum([(allPOS[i]=='C')*player_vars[i] for i in all_names]) == 1
    
    # update these based on position depth
    prob += lpSum([(allPOS[i]=='1B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='2B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='3B')*player_vars[i] for i in all_names]) == 1
    prob += lpSum([(allPOS[i]=='SS')*player_vars[i] for i in all_names]) == 2
    
    prob += lpSum([(allPOS[i]=='OF')*player_vars[i] for i in all_names]) == 6
    
    prob += lpSum([(allPOS[i]=='SP')*player_vars[i] for i in all_names]) == 8
    
    prob += lpSum([(allPOS[i]=='RP')*player_vars[i] for i in all_names]) == 3
    prob += lpSum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0
    
    prob += lpSum([allCosts[i]*player_vars[i]*(allPOS[i]=='RP' or allPOS[i]=='SP') for i in all_names]) <= 120
    prob += lpSum([allCosts[i]*player_vars[i]*(allPOS[i]!='RP' and allPOS[i]!='SP') for i in all_names]) <= 140
    
    # ensure no player is selected twice

    for f in all_names:
        prob += player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.1
        prob += player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e5
    
    prob += lpSum(player_chosen)==25

    LpSolverDefault.msg = 1 
    prob.solve(solver=GUROBI(PoolSearchMode=2,PoolSolutions=4))
    Team = pd.DataFrame()
    print("Status:", LpStatus[prob.status])
    for v in prob.variables():
        if v.varValue>0 and not 'Chosen' in v.name:
            Team = Team.append(all_players.loc[int(v.name.split('_')[1])])
    return Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True)
 
Team_max = find_max_three()
Team['ERA'] = Team['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
Team['WHIP'] = Team['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
Max_Tot = Team_max[Team_max['Name'] == 'Total']

print('Max',get_score(Max_Tot.loc[25]))
Team_max[['Name','POS','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$']]


NameError: name 'LpProblem' is not defined

# This is the one \/

In [17]:
def find_gurobi():
    m = Model("mip1")
    # this works really well without using the maxes, but I think it might peform better with them
    
    #all_players = get_all_players()[0]
    x,hitter_avg_maxes,pitcher_avg_maxes,hitter_avg_mins,pitcher_avg_mins = get_all_players()
    scaler = preprocessing.StandardScaler(with_std=False)
    #scaler2 = preprocessing.StandardScaler()
    #scaler2 = preprocessing.MinMaxScaler()
    #all_players,hitter_avg_maxes,pitcher_avg_maxes,hitter_avg_mins,pitcher_avg_mins = get_all_players()
    all_players = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr').append(get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr'),ignore_index=True).append(get_pitcher_prices(get_closing_pitchers()).nsmallest(22,'SV rank'),ignore_index=True) 
    #all_players.fillna(0, inplace=True)
    all_players = all_players[all_players['Name'] != 'Max Kepler'].reset_index(drop=True)
    all_players.fillna(0, inplace=True)          
    #return(all_players.iloc[290])    
                              
    all_players['ERA'] = all_players['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['WHIP'] = all_players['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AB'] = all_players['AB'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['adjAVG'] = all_players['AVG']*100
    norm_players = all_players[['R', 'HR', 'RBI', 'SB','adjAVG','W','SV','K','ERA','WHIP','AB','H']]
    
    norm_players = pd.DataFrame(scaler.fit_transform(norm_players),columns=['R', 'HR', 'RBI', 'SB','adjAVG','W','SV','K', 'ERA', 'WHIP','H','AB'])
    
#     p_avg_maxes = [(hitter_maxes[i]/14) for i in range(len(hitter_maxes)-1)]
#     p_avg_maxes.append(hitter_maxes[-1]*100)
#     [p_avg_maxes.append(pitcher_maxes[i]/11) for i in range(len(pitcher_maxes)-2)]
#     p_avg_maxes.append(1/pitcher_maxes[-2])
#     p_avg_maxes.append(1/pitcher_maxes[-1])
#     print()
    
#     h_avg_max_ary = np.array(hitter_avg_maxes).reshape(1,-1)
#     p_avg_max_ary = np.array(pitcher_avg_maxes).reshape(1,-1)
#     avg_max_array = np.array(p_avg_maxes).reshape(1,-1)
#     #print(avg_max_array)
#     avg_maxes = scaler.transform(avg_max_array)
#     avg_maxes = avg_maxes[0]
#     print(avg_maxes)
    #hitter_avg_maxes = avg_maxes[0]
    #pitcher_avg_maxes = avg_maxes[1]
    
    
#     maxH = 0
#     minH = math.inf
#     for h in HitterMetrics:
#         if h == 'AVG':
#             continue
#         if(max(norm_players[h])>maxH):
#             maxH = max(norm_players[h])
#         if(min(norm_players[h]<minH)):
#             minH = min(norm_players[h])
#     for h in PitcherMetrics:
#         if(max(norm_players[h])>maxH):
#             maxH = max(norm_players[h])
#         if(min(norm_players[h]<minH)):
#             minH = min(norm_players[h])
            
    #scaler2 = preprocessing.MinMaxScaler((minH,maxH))
    
    #norm_players['AVG'] = all_players[['AVG']]
    
    #norm_players['AVG'] = pd.DataFrame(scaler2.fit_transform(norm_players),columns=['R', 'HR', 'RBI', 'SB','AVG','W','SV','K', 'ERA', 'WHIP'])['AVG']
    for h in HitterMetrics:
        if h == 'AVG':
            all_players['normAVG'] = norm_players['adjAVG']
        else:
            all_players['norm{}'.format(h)] = norm_players[h]
    for p in PitcherMetrics:
        all_players['norm{}'.format(p)] = norm_players[p]
    
    for h in ['AB','H']:
        all_players['norm{}'.format(h)] = norm_players[h]
    
    all_names = list(all_players.index)
    name_list = list(dict.fromkeys(all_players['Name']))
    allCosts = dict(zip(all_names,all_players['$']))
    
    return all_players
    player_vars = m.addVars(all_names,vtype=GRB.INTEGER,lb=0,ub=1,name='players')
    player_chosen = m.addVars(name_list,vtype=GRB.INTEGER,lb=0,ub=1,name='pl_chosen')
    
    allRuns = dict(zip(all_names,all_players['normR']))
    allHRs = dict(zip(all_names,all_players['normHR']))
    allRBIs = dict(zip(all_names,all_players['normRBI']))
    allSBs = dict(zip(all_names,all_players['normSB']*2.5))
    allAVG = dict(zip(all_names,all_players['normAVG']))
    allAB = dict(zip(all_names,all_players['normAB']*0.8))
    allH = dict(zip(all_names,all_players['normH']*0.8))

    #all_players['AdjERA'] = all_players['normERA'].apply(lambda x: 0 if x == 0 else 1/x)
    #all_players['AdjWHIP'] = all_players['normWHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    
    allWs = dict(zip(all_names,all_players['normW']))
    allKs = dict(zip(all_names,all_players['normK']))
    allSVs = dict(zip(all_names,all_players['normSV']))
    allERA = dict(zip(all_names,all_players['normERA']))
    allWHIP = dict(zip(all_names,all_players['normWHIP']))
    allPOS = dict(zip(all_names,all_players['POS']))

    #allAdjERA = dict(zip(all_names,all_players['AdjERA']))
    #allAdjWHIP = dict(zip(all_names,all_players['AdjWHIP']))
    
    
    
    #print("all plrs: ", len(all_players.index))
    ##print('nms: ', len(all_names))
    
    obj = LinExpr()
    
    obj += quicksum([allRuns[i]*player_vars[i] for i in all_names])
    obj += quicksum([allHRs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allRBIs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allSBs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allAVG[i]*player_vars[i] for i in all_names])
    obj += quicksum([allWs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allKs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allSVs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allERA[i]*player_vars[i] for i in all_names])
    obj += quicksum([allWHIP[i]*player_vars[i] for i in all_names])
    obj += quicksum([allCosts[i]*player_vars[i] for i in all_names])
    obj += quicksum([allH[i]*player_vars[i] for i in all_names])
    obj += quicksum([allAB[i]*player_vars[i] for i in all_names])
    m.setObjective(obj, GRB.MAXIMIZE)
    
    m.addConstr(sum([allCosts[i]*player_vars[i] for i in all_names])<= 260)
    
    m.addConstr(sum([(allPOS[i]=='C')*player_vars[i] for i in all_names]) == 1)
    
    # update these based on position depth
    m.addConstr(sum([(allPOS[i]=='1B')*player_vars[i] for i in all_names]) == 2)
    m.addConstr(sum([(allPOS[i]=='2B')*player_vars[i] for i in all_names]) == 2)
    m.addConstr(sum([(allPOS[i]=='3B')*player_vars[i] for i in all_names]) == 1)
    m.addConstr(sum([(allPOS[i]=='SS')*player_vars[i] for i in all_names]) == 2)
    
    m.addConstr(sum([(allPOS[i]=='OF')*player_vars[i] for i in all_names]) == 6)
    
    m.addConstr(sum([(allPOS[i]=='SP')*player_vars[i] for i in all_names]) == 8)
    
    m.addConstr(sum([(allPOS[i]=='RP')*player_vars[i] for i in all_names]) == 3)
    m.addConstr(sum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0)
    
    
#     m.addConstr(sum([allRuns[i]*player_vars[i] for i in all_names]) <= (avg_maxes[0]*15))
#     m.addConstr(sum([allHRs[i]*player_vars[i] for i in all_names]) <= (avg_maxes[1]*15))
#     m.addConstr(sum([allRBIs[i]*player_vars[i] for i in all_names]) <= (avg_maxes[2]*15))
#     m.addConstr(sum([allSBs[i]*player_vars[i] for i in all_names]) <= (avg_maxes[3]*15))
#     m.addConstr(sum([allAVG[i]*player_vars[i] for i in all_names]) <= (avg_maxes[4]*15))
#     m.addConstr(sum([allWs[i]*player_vars[i] for i in all_names]) <= (avg_maxes[5]*12))
#     m.addConstr(sum([allKs[i]*player_vars[i] for i in all_names]) <= (avg_maxes[6]*12))
#     m.addConstr(sum([allSVs[i]*player_vars[i] for i in all_names]) <= (avg_maxes[7]*12))
#     m.addConstr(sum([allERA[i]*player_vars[i] for i in all_names]) <= (avg_maxes[8]*12))
#     m.addConstr(sum([allWHIP[i]*player_vars[i] for i in all_names]) <= (avg_maxes[9]*12))
    
    
    m.addConstr(sum([allCosts[i]*player_vars[i]*(allPOS[i]=='RP' or allPOS[i]=='SP') for i in all_names]) <= 120)
    m.addConstr(sum([allCosts[i]*player_vars[i]*(allPOS[i]!='RP' and allPOS[i]!='SP') for i in all_names]) <= 140)
    
    
    # ensure no player is selected twice

    for f in all_names:
        m.addConstr(player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.01)
        m.addConstr(player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e8)
    
    
    m.addConstr(sum(player_chosen.values())==25)
    
    m.setParam(GRB.Param.PoolSolutions,10)
    m.setParam(GRB.Param.PoolSearchMode,2)
    
    m.optimize()
    
    nSolns = m.SolCount
    max_tot = 0
    best = 0
    for e in range(nSolns):
        Team = pd.DataFrame()
        m.setParam(GRB.Param.SolutionNumber,e)
        for v in m.getVars():
            if v.Xn > 0.01 and not 'chosen' in v.varName:
                Team = Team.append(all_players.iloc[int(v.varName.split('[')[1].split(']')[0])])
        Team = Team.append(Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True))
        sc = get_score(Team[Team['Name'] == 'Total'].iloc[0])['Total']
        if (sc > max_tot):
            best = e
            max_tot = sc
    print('Best Model: {}'.format(best))
    Team = pd.DataFrame()
    m.setParam(GRB.Param.SolutionNumber,best)
    for v in m.getVars():
        if v.Xn > 0.01 and not 'chosen' in v.varName:
            Team = Team.append(all_players.iloc[int(v.varName.split('[')[1].split(']')[0])])
    
    print('Obj:', m.objVal)
    #Team = pd.DataFrame()
    #print("Status:", LpStatus[prob.status])
#     for v in m.getVars():
#         if v.x > 0.01 and not 'chosen' in v.varName:
#             print(v.varName + ": " + str(v.x))
#             Team = Team.append(all_players.iloc[int(v.varName.split('[')[1].split(']')[0])])
#         if v.x > 0.01 and 'chosen' in v.varName:
#             print(v.varName + ": " + str(v.x))
    
    Team['ERA'] = Team['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    Team['WHIP'] = Team['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    return Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True)    

Team = find_gurobi()

Team[Team['Name'] == 'Total']

#print('Max',get_score(Max_Tot.iloc[0]))
#Team[['Name','POS','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$']] 

#all_players = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr').append(get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr'),ignore_index=True).append(get_pitcher_prices(get_closing_pitchers()).nsmallest(22,'SV rank'),ignore_index=True)

#pl = all_players[all_players['Name'].str.contains('Mike Trout')]
#pl.loc[0,'POS'] == 'SP' or pl.loc[0,'POS'] == 'RP'

#pl = all_players[all_players['Name'].str.contains('mike trout',case=False)].reset_index(drop=True)
#pl2 = pd.DataFrame()
#pl2.count()
#Team[['H','AB','AVG']]
#help(tupledict()) 
#Team.columns

Unnamed: 0,AVG rank,SB rank,RBI rank,HR rank,R rank,Razzball_Rank,Name,Team,POS,AB,...,normRBI,normSB,normAVG,normW,normSV,normK,normERA,normWHIP,normAB,normH


In [20]:
G_Team = find_gurobi()
G_Team[G_Team['Name'] == 'Total']

Unnamed: 0,AVG rank,SB rank,RBI rank,HR rank,R rank,Razzball_Rank,Name,Team,POS,AB,...,normRBI,normSB,normAVG,normW,normSV,normK,normERA,normWHIP,normAB,normH


In [239]:
def find_newnorm():
    prob = LpProblem("IdealRosterProblem",LpMaximize)
    
    
    all_players = get_all_players()
    
    
    all_names = list(all_players.index)
    name_list = list(all_players['Name'])
    allCosts = dict(zip(all_names,all_players['$']))
    
    
    allRuns = dict(zip(all_names,all_players['R']/max(hitter_scores['R'])))
    allHRs = dict(zip(all_names,all_players['HR']/max(hitter_scores['HR'])))
    allRBIs = dict(zip(all_names,all_players['RBI']/max(hitter_scores['RBI'])))
    allSBs = dict(zip(all_names,all_players['SB']/max(hitter_scores['SB'])))
    allAVG = dict(zip(all_names,all_players['AVG']/max(hitter_scores['AVG'])))

    all_players['AdjERA'] = all_players['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AdjWHIP'] = all_players['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    
    totAdjERA = 1/min(pitcher_scores['ERA'])
    totAdjWHIP = 1/min(pitcher_scores['WHIP'])
    
    allWs = dict(zip(all_names,all_players['W']/max(pitcher_scores['W'])))
    allKs = dict(zip(all_names,all_players['K']/max(pitcher_scores['K'])))
    allSVs = dict(zip(all_names,all_players['SV']/max(pitcher_scores['SV'])))
    allERA = dict(zip(all_names,all_players['AdjERA']/totAdjERA))
    allWHIP = dict(zip(all_names,all_players['AdjWHIP']/totAdjWHIP))
    allPOS = dict(zip(all_names,all_players['POS']))

    allAdjERA = dict(zip(all_names,all_players['AdjERA']))
    allAdjWHIP = dict(zip(all_names,all_players['AdjWHIP']))
    
    
    player_vars = LpVariable.dicts("Player",all_names,lowBound=0,upBound=1,cat='Integer')
    player_chosen = LpVariable.dicts("Chosen",name_list,0,1,cat='Integer')
    
    print("all plrs: ", len(all_players.index))
    print('vars: ', len(player_vars))
    print('nms: ', len(all_names))
    
    prob += lpSum([allRuns[i]*player_vars[i] for i in all_names])
    prob += lpSum([allHRs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allRBIs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allSBs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allAVG[i]*player_vars[i] for i in all_names])
    prob += lpSum([allWs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allKs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allSVs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allAdjERA[i]*player_vars[i] for i in all_names])
    prob += lpSum([allAdjWHIP[i]*player_vars[i] for i in all_names])
    
    
    
    prob += lpSum([allCosts[i]*player_vars[i] for i in all_names]) <= 260
    
    prob += lpSum([(allPOS[i]=='C')*player_vars[i] for i in all_names]) == 1
    
    # update these based on position depth
    prob += lpSum([(allPOS[i]=='1B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='2B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='3B')*player_vars[i] for i in all_names]) == 1
    prob += lpSum([(allPOS[i]=='SS')*player_vars[i] for i in all_names]) == 2
    
    prob += lpSum([(allPOS[i]=='OF')*player_vars[i] for i in all_names]) == 6
    
    prob += lpSum([(allPOS[i]=='SP')*player_vars[i] for i in all_names]) == 8
    
    prob += lpSum([(allPOS[i]=='RP')*player_vars[i] for i in all_names]) == 3
    prob += lpSum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0
    
    
    # ensure no player is selected twice

    for f in all_names:
        prob += player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.1
        prob += player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e5
    
    prob += lpSum(player_chosen)==25

    LpSolverDefault.msg = 1 
    prob.solve(solver=GUROBI())
    Team = pd.DataFrame()
    print("Status:", LpStatus[prob.status])
    for v in prob.variables():
        if v.varValue>0 and not 'Chosen' in v.name:
            Team = Team.append(all_players.loc[int(v.name.split('_')[1])])
            
    return Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True)
 
Team_max = find_newnorm()
Max_Tot = Team_max[Team_max['Name'] == 'Total']

print('Max',get_score(Max_Tot.loc[25]))
Team_max[['Name','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$']]


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


TypeError: 'builtin_function_or_method' object is not iterable

In [None]:
def find_max_off():
    prob = LpProblem("IdealRosterProblem",LpMaximize)
    
    scaler = preprocessing.MinMaxScaler()
    
    hitters = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr')
    norm_hitters = hitters[['R', 'HR', 'RBI', 'SB', 'AVG']]
    norm_hitters = pd.DataFrame(scaler.fit_transform(norm_hitters),columns=['R', 'HR', 'RBI', 'SB', 'AVG'])
    for h in HitterMetrics:
        hitters['norm{}'.format(h)] = norm_hitters[h]
    
    all_players = hitters
    
    
    all_names = list(all_players.index)
    name_list = list(all_players['Name'])
    allCosts = dict(zip(all_names,all_players['$']))
    
    
    allRuns = dict(zip(all_names,all_players['normR']))
    allHRs = dict(zip(all_names,all_players['normHR']))
    allRBIs = dict(zip(all_names,all_players['normRBI']))
    allSBs = dict(zip(all_names,all_players['normSB']))
    allAVG = dict(zip(all_names,all_players['normAVG']))
    allPOS = dict(zip(all_names,all_players['POS']))
    
    
    player_vars = LpVariable.dicts("Player",all_names,lowBound=0,upBound=1,cat='Integer')
    player_chosen = LpVariable.dicts("Chosen",name_list,0,1,cat='Integer')
    
    print("all plrs: ", len(all_players.index))
    print('vars: ', len(player_vars))
    print('nms: ', len(all_names))
    
    prob += lpSum([allRuns[i]*player_vars[i] for i in all_names])
    prob += lpSum([allHRs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allRBIs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allSBs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allAVG[i]*player_vars[i] for i in all_names])
    
    
    
    prob += lpSum([allCosts[i]*player_vars[i] for i in all_names]) <= 155
    
    prob += lpSum([(allPOS[i]=='C')*player_vars[i] for i in all_names]) == 1
    
    # update these based on position depth
    prob += lpSum([(allPOS[i]=='1B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='2B')*player_vars[i] for i in all_names]) == 2
    prob += lpSum([(allPOS[i]=='3B')*player_vars[i] for i in all_names]) == 1
    prob += lpSum([(allPOS[i]=='SS')*player_vars[i] for i in all_names]) == 2
    
    prob += lpSum([(allPOS[i]=='OF')*player_vars[i] for i in all_names]) == 6
    
    prob += lpSum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0
    
    
    # ensure no player is selected twice

    for f in all_names:
        prob += player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.1
        prob += player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e5
    
    prob += lpSum(player_chosen)==14

    LpSolverDefault.msg = 1 
    prob.solve()
    Team = pd.DataFrame()
    print("Status:", LpStatus[prob.status])
    for v in prob.variables():
        if v.varValue>0 and not 'Chosen' in v.name:
            Team = Team.append(all_players.loc[int(v.name.split('_')[1])])
            
    return Team#Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'$':sum(Team['$'])},ignore_index=True)
 
def find_max_def():
    scaler = preprocessing.MinMaxScaler()
    
    starters = get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr')
    norm_starters = starters[['W']]
    norm_starters = pd.DataFrame(scaler.fit_transform(norm_starters),columns=['W'])
    starters['normW'] = norm_starters['W']
        
    closers = get_pitcher_prices(get_closing_pitchers()).nsmallest(30,'SV rank')
    norm_closers = closers[['SV']]
    norm_closers = pd.DataFrame(scaler.fit_transform(norm_closers),columns=['SV'])
    closers['normSV'] = norm_closers['SV']
        
    all_pitchers = starters.append(closers, ignore_index=True)
    norm_pitchers = all_pitchers[['K', 'ERA', 'WHIP']]
    norm_pitchers = pd.DataFrame(scaler.fit_transform(norm_pitchers),columns=['K', 'ERA', 'WHIP'])
    for p in ['K', 'ERA', 'WHIP']:
        all_pitchers['norm{}'.format(p)] = norm_pitchers[p]
        
    prob = LpProblem("IdealRosterProblem",LpMaximize)
    
    
    all_players = all_pitchers
    
    
    all_names = list(all_players.index)
    name_list = list(all_players['Name'])
    allCosts = dict(zip(all_names,all_players['$']))

    all_players['AdjERA'] = all_players['normERA'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AdjWHIP'] = all_players['normWHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    
    allWs = dict(zip(all_names,all_players['normW']))
    allKs = dict(zip(all_names,all_players['normK']))
    allSVs = dict(zip(all_names,all_players['normSV']))
    allERA = dict(zip(all_names,all_players['normERA']))
    allWHIP = dict(zip(all_names,all_players['normWHIP']))
    allPOS = dict(zip(all_names,all_players['POS']))

    allAdjERA = dict(zip(all_names,all_players['AdjERA']))
    allAdjWHIP = dict(zip(all_names,all_players['AdjWHIP']))
    
    
    player_vars = LpVariable.dicts("Player",all_names,lowBound=0,upBound=1,cat='Integer')
    player_chosen = LpVariable.dicts("Chosen",name_list,0,1,cat='Integer')
    
    print("all plrs: ", len(all_players.index))
    print('vars: ', len(player_vars))
    print('nms: ', len(all_names))
    
    prob += lpSum([allWs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allKs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allSVs[i]*player_vars[i] for i in all_names])
    prob += lpSum([allAdjERA[i]*player_vars[i] for i in all_names])
    prob += lpSum([allAdjWHIP[i]*player_vars[i] for i in all_names])
    
    
    
    prob += lpSum([allCosts[i]*player_vars[i] for i in all_names]) <= 105
    
    
    prob += lpSum([(allPOS[i]=='SP')*player_vars[i] for i in all_names]) == 8
    
    prob += lpSum([(allPOS[i]=='RP')*player_vars[i] for i in all_names]) == 3
    prob += lpSum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0
    
    
    # ensure no player is selected twice

    for f in all_names:
        prob += player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.1
        prob += player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e5
    
    prob += lpSum(player_chosen)==11

    LpSolverDefault.msg = 1 
    prob.solve()
    Team = pd.DataFrame()
    print("Status:", LpStatus[prob.status])
    for v in prob.variables():
        if v.varValue>0 and not 'Chosen' in v.name:
            Team = Team.append(all_players.loc[int(v.name.split('_')[1])])
            
    return Team#Team.append({'Name':'Total','WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True)
 
Team_max_off = find_max_off()
Team_max_def = find_max_def()

Team_max = Team_max_off.append(Team_max_def, ignore_index=True)
Team_max
Team_max.fillna(0, inplace=True)
Team_max = Team_max.append({'Name':'Total','AVG':sum(Team_max['H'])/sum(Team_max['AB']),'R':sum(Team_max['R']),'RBI':sum(Team_max['RBI']),'HR':sum(Team_max['HR']),'SB':sum(Team_max['SB']),'WHIP':(sum(Team_max['BB'])+sum(Team_max['Hits']))/sum(Team_max['IP']),'ERA':(sum(Team_max['ER'])/sum(Team_max['IP']))*9,'W':sum(Team_max['W']),'SV':sum(Team_max['SV']),'K':sum(Team_max['K']),'$':sum(Team_max['$'])},ignore_index=True)
Team_max[['Name','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$']]
Max_Tot = Team_max[Team_max['Name'] == 'Total']
Max_Tot[['Name','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$']]
#print(Max_Tot)
print('Max',get_score(Max_Tot.loc[25]))
Team_max[['Name','POS','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$']]


In [13]:
GUROBI().available()

True

In [32]:
get_all_players()[['normW','normSV','normK','normERA','normWHIP','normR','normHR','normRBI','normSB','normAVG']].describe()

  return self.partial_fit(X, y)


Unnamed: 0,normW,normSV,normK,normERA,normWHIP,normR,normHR,normRBI,normSB,normAVG
count,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
mean,0.208201,0.040119,0.200778,0.272091,0.247971,0.232018,0.234332,0.197644,0.094512,0.239333
std,0.287,0.169822,0.254647,0.307216,0.281845,0.292863,0.289443,0.250578,0.159518,0.28413
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.468254,0.0,0.401035,0.570455,0.543478,0.475539,0.430836,0.357531,0.119369,0.476923
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [8]:
def find_w_selected(budget,selected):
    m = Model("mip1")
    # this works really well without using the maxes, but I think it might peform better with them
    
    x,hitter_avg_maxes,pitcher_avg_maxes,hitter_avg_mins,pitcher_avg_mins = get_all_players()
    scaler = preprocessing.StandardScaler(with_std=False)
    all_players = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr').append(get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr'),ignore_index=True).append(get_pitcher_prices(get_closing_pitchers()).nsmallest(22,'SV rank'),ignore_index=True)
    all_players.fillna(0, inplace=True)
    all_players['ERA'] = all_players['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['WHIP'] = all_players['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AB'] = all_players['AB'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['adjAVG'] = all_players['AVG']*100
    norm_players = all_players[['R', 'HR', 'RBI', 'SB','adjAVG','W','SV','K','ERA','WHIP','AB','H']]
    
    norm_players = pd.DataFrame(scaler.fit_transform(norm_players),columns=['R', 'HR', 'RBI', 'SB','adjAVG','W','SV','K', 'ERA', 'WHIP','H','AB'])
    
    for h in HitterMetrics:
        if h == 'AVG':
            all_players['normAVG'] = norm_players['adjAVG']
        else:
            all_players['norm{}'.format(h)] = norm_players[h]
    for p in PitcherMetrics:
        all_players['norm{}'.format(p)] = norm_players[p]
    
    for h in ['AB','H']:
        all_players['norm{}'.format(h)] = norm_players[h]
    
    all_names = list(all_players.index)
    name_list = list(dict.fromkeys(all_players['Name']))
    for s in selected:
        all_players.loc[all_players['Name'] == s,'$'] = 0
        print(all_players[all_players['Name'] == s]['$'])
    allCosts = dict(zip(all_names,all_players['$']))
    
    player_vars = m.addVars(all_names,vtype=GRB.INTEGER,lb=0,ub=1,name='players')
    player_chosen = m.addVars(name_list,vtype=GRB.INTEGER,lb=0,ub=1,name='pl_chosen')
    
    allRuns = dict(zip(all_names,all_players['normR']))
    allHRs = dict(zip(all_names,all_players['normHR']))
    allRBIs = dict(zip(all_names,all_players['normRBI']))
    allSBs = dict(zip(all_names,all_players['normSB']*2.5))
    allAVG = dict(zip(all_names,all_players['normAVG']))
    allAB = dict(zip(all_names,all_players['normAB']*0.8))
    allH = dict(zip(all_names,all_players['normH']*0.8))

    
    allWs = dict(zip(all_names,all_players['normW']))
    allKs = dict(zip(all_names,all_players['normK']))
    allSVs = dict(zip(all_names,all_players['normSV']))
    allERA = dict(zip(all_names,all_players['normERA']))
    allWHIP = dict(zip(all_names,all_players['normWHIP']))
    allPOS = dict(zip(all_names,all_players['POS']))
    
    obj = LinExpr()
    
    obj += quicksum([allRuns[i]*player_vars[i] for i in all_names])
    obj += quicksum([allHRs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allRBIs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allSBs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allAVG[i]*player_vars[i] for i in all_names])
    obj += quicksum([allWs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allKs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allSVs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allERA[i]*player_vars[i] for i in all_names])
    obj += quicksum([allWHIP[i]*player_vars[i] for i in all_names])
    obj += quicksum([allCosts[i]*player_vars[i] for i in all_names])
    obj += quicksum([allH[i]*player_vars[i] for i in all_names])
    obj += quicksum([allAB[i]*player_vars[i] for i in all_names])
    m.setObjective(obj, GRB.MAXIMIZE)
    
    m.addConstr(sum([allCosts[i]*player_vars[i] for i in all_names])<= budget)
    
    m.addConstr(sum([(allPOS[i]=='C')*player_vars[i] for i in all_names]) == 1)
    
    # update these based on position depth
    m.addConstr(sum([(allPOS[i]=='1B')*player_vars[i] for i in all_names]) == 2)
    m.addConstr(sum([(allPOS[i]=='2B')*player_vars[i] for i in all_names]) == 2)
    m.addConstr(sum([(allPOS[i]=='3B')*player_vars[i] for i in all_names]) == 1)
    m.addConstr(sum([(allPOS[i]=='SS')*player_vars[i] for i in all_names]) == 2)
    
    m.addConstr(sum([(allPOS[i]=='OF')*player_vars[i] for i in all_names]) == 6)
    
    m.addConstr(sum([(allPOS[i]=='SP')*player_vars[i] for i in all_names]) == 8)
    
    m.addConstr(sum([(allPOS[i]=='RP')*player_vars[i] for i in all_names]) == 3)
    m.addConstr(sum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0)
    
    
    m.addConstr(sum([allCosts[i]*player_vars[i]*(allPOS[i]=='RP' or allPOS[i]=='SP') for i in all_names]) <= 120)
    m.addConstr(sum([allCosts[i]*player_vars[i]*(allPOS[i]!='RP' and allPOS[i]!='SP') for i in all_names]) <= 140)
    
    
    # ensure no player is selected twice

    for f in all_names:
        m.addConstr(player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.01)
        m.addConstr(player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e8)
    
    
    m.addConstr(sum(player_chosen.values())==25)
    
    for s in selected:
        m.addConstr(player_chosen[s] >= 0.7)
    
    m.setParam(GRB.Param.PoolSolutions,10)
    m.setParam(GRB.Param.PoolSearchMode,2)
    
    m.optimize()
    
    nSolns = m.SolCount
    max_tot = 0
    best = 0
    for e in range(nSolns):
        Team = pd.DataFrame()
        m.setParam(GRB.Param.SolutionNumber,e)
        for v in m.getVars():
            if v.Xn > 0.01 and not 'chosen' in v.varName:
                Team = Team.append(all_players.iloc[int(v.varName.split('[')[1].split(']')[0])])
        Team = Team.append(Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True))
        sc = get_score(Team[Team['Name'] == 'Total'].iloc[0])['Total']
        if (sc > max_tot):
            best = e
            max_tot = sc
    print('Best Model: {}'.format(best))
    Team = pd.DataFrame()
    m.setParam(GRB.Param.SolutionNumber,best)
    for v in m.getVars():
        if v.Xn > 0.01 and not 'chosen' in v.varName:
            Team = Team.append(all_players.iloc[int(v.varName.split('[')[1].split(']')[0])])
    
    print('Obj:', m.objVal)
    
    Team['ERA'] = Team['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    Team['WHIP'] = Team['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    return Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True)    
Team = find_w_selected(220,['Taylor Rogers',"J.T. Realmuto"])

Max_Tot = Team[Team['Name'] == 'Total']

print('Max',get_score(Max_Tot.iloc[0]))
Team[['Name','POS','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$']] 
#help(tupledict()) 
    

Using license file /Users/samozminkowski/gurobi.lic
Academic license - for non-commercial use only


NameError: name 'get_all_players' is not defined

In [21]:
def simple_gurobi():
    m = Model("mip1")
    
    x,hitter_avg_maxes,pitcher_avg_maxes,hitter_avg_mins,pitcher_avg_mins = get_all_players()
    scaler = preprocessing.StandardScaler(with_std=False)
    
    all_players = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr').append(get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr'),ignore_index=True).append(get_pitcher_prices(get_closing_pitchers()).nsmallest(22,'SV rank'),ignore_index=True) 
    
    all_players = all_players[all_players['Name'] != 'Max Kepler'].reset_index(drop=True)
    all_players.fillna(0, inplace=True)          
        
                              
    all_players['ERA'] = all_players['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['WHIP'] = all_players['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    #all_players['AB'] = all_players['AB'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['adjAVG'] = all_players['AVG']*100
    norm_players = all_players[['R', 'HR', 'RBI', 'SB','adjAVG','W','SV','K','ERA','WHIP','AB','H']]
    
    norm_players = pd.DataFrame(scaler.fit_transform(norm_players),columns=['R', 'HR', 'RBI', 'SB','adjAVG','W','SV','K', 'ERA', 'WHIP','H','AB'])
    
    for h in HitterMetrics:
        if h == 'AVG':
            all_players['normAVG'] = norm_players['adjAVG']
        else:
            all_players['norm{}'.format(h)] = norm_players[h]
    for p in PitcherMetrics:
        all_players['norm{}'.format(p)] = norm_players[p]
    
    for h in ['AB','H']:
        all_players['norm{}'.format(h)] = norm_players[h]
    
    all_names = list(all_players.index)
    name_list = list(dict.fromkeys(all_players['Name']))
    allCosts = dict(zip(all_names,all_players['$']))
    
    player_vars = m.addVars(all_names,vtype=GRB.INTEGER,lb=0,ub=1,name='players')
    player_chosen = m.addVars(name_list,vtype=GRB.INTEGER,lb=0,ub=1,name='pl_chosen')
    
    allRuns = dict(zip(all_names,all_players['normR']))
    allHRs = dict(zip(all_names,all_players['normHR']))
    allRBIs = dict(zip(all_names,all_players['normRBI']))
    allSBs = dict(zip(all_names,all_players['normSB']))
    allAVG = dict(zip(all_names,all_players['normAVG']))
    allAB = dict(zip(all_names,all_players['normAB']))
    allH = dict(zip(all_names,all_players['normH']))

    
    allWs = dict(zip(all_names,all_players['normW']))
    allKs = dict(zip(all_names,all_players['normK']))
    allSVs = dict(zip(all_names,all_players['normSV']))
    allERA = dict(zip(all_names,all_players['normERA']))
    allWHIP = dict(zip(all_names,all_players['normWHIP']))
    allPOS = dict(zip(all_names,all_players['POS']))

    
    obj = LinExpr()
    
    obj += quicksum([allRuns[i]*player_vars[i] for i in all_names])
    obj += quicksum([allHRs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allRBIs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allSBs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allAVG[i]*player_vars[i] for i in all_names])
    obj += quicksum([allWs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allKs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allSVs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allERA[i]*player_vars[i] for i in all_names])
    obj += quicksum([allWHIP[i]*player_vars[i] for i in all_names])
    obj += quicksum([allCosts[i]*player_vars[i] for i in all_names])
    obj += quicksum([allH[i]*player_vars[i] for i in all_names])
    obj += quicksum([allAB[i]*player_vars[i] for i in all_names])
    m.setObjective(obj, GRB.MAXIMIZE)
    
    m.addConstr(sum([allCosts[i]*player_vars[i] for i in all_names])<= 260)
    
    m.addConstr(sum([(allPOS[i]=='C')*player_vars[i] for i in all_names]) == 1)
    
    # update these based on position depth
    m.addConstr(sum([(allPOS[i]=='1B')*player_vars[i] for i in all_names]) == 2)
    m.addConstr(sum([(allPOS[i]=='2B')*player_vars[i] for i in all_names]) == 2)
    m.addConstr(sum([(allPOS[i]=='3B')*player_vars[i] for i in all_names]) == 1)
    m.addConstr(sum([(allPOS[i]=='SS')*player_vars[i] for i in all_names]) == 2)
    
    m.addConstr(sum([(allPOS[i]=='OF')*player_vars[i] for i in all_names]) == 6)
    
    m.addConstr(sum([(allPOS[i]=='SP')*player_vars[i] for i in all_names]) == 8)
    
    m.addConstr(sum([(allPOS[i]=='RP')*player_vars[i] for i in all_names]) == 3)
    m.addConstr(sum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0)
    
    
    m.addConstr(sum([allCosts[i]*player_vars[i]*(allPOS[i]=='RP' or allPOS[i]=='SP') for i in all_names]) <= 120)
    m.addConstr(sum([allCosts[i]*player_vars[i]*(allPOS[i]!='RP' and allPOS[i]!='SP') for i in all_names]) <= 140)
    
    
    # ensure no player is selected twice

    for f in all_names:
        m.addConstr(player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.01)
        m.addConstr(player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e8)
    
    
    m.addConstr(sum(player_chosen.values())==25)
    
    m.setParam(GRB.Param.PoolSolutions,10)
    m.setParam(GRB.Param.PoolSearchMode,2)
    
    m.optimize()
    
#    for v in m.getVars():
#        if v.Xn > 0.01 and not 'chosen' in v.varName:
#            Team = Team.append(all_players.iloc[int(v.varName.split('[')[1].split(']')[0])])
#    Team = Team.append(Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True))

#    Team = pd.DataFrame()
#    for v in m.getVars():
#        if v.Xn > 0.01 and not 'chosen' in v.varName:
#            Team = Team.append(all_players.iloc[int(v.varName.split('[')[1].split(']')[0])])
    
#    print('Obj:', m.objVal)
    Team = pd.DataFrame()
    #print("Status:", LpStatus[prob.status])
    for v in m.getVars():
        if v.x > 0.01 and not 'chosen' in v.varName:
            print(v.varName + ": " + str(v.x))
            Team = Team.append(all_players.iloc[int(v.varName.split('[')[1].split(']')[0])])
        if v.x > 0.01 and 'chosen' in v.varName:
            print(v.varName + ": " + str(v.x))
    
    Team['ERA'] = Team['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    Team['WHIP'] = Team['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    return Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True)    
Team = simple_gurobi()

#Max_Tot = Team[Team['Name'] == 'Total']

#print('Max',get_score(Max_Tot.iloc[0]))
Team[['Name','POS','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$',"H","AB"]] 

#all_players = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr').append(get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr'),ignore_index=True).append(get_pitcher_prices(get_closing_pitchers()).nsmallest(22,'SV rank'),ignore_index=True)

#pl = all_players[all_players['Name'].str.contains('Mike Trout')]
#pl.loc[0,'POS'] == 'SP' or pl.loc[0,'POS'] == 'RP'

#pl = all_players[all_players['Name'].str.contains('mike trout',case=False)].reset_index(drop=True)
#pl2 = pd.DataFrame()
#pl2.count()

#help(tupledict()) 
#Team.columns

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


Parameter PoolSolutions unchanged
   Value: 10  Min: 1  Max: 2000000000  Default: 10
Changed value of parameter PoolSearchMode to 2
   Prev: 0  Min: 0  Max: 2  Default: 0
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 595 rows, 558 columns and 2302 nonzeros
Model fingerprint: 0x036b2e2d
Variable types: 0 continuous, 558 integer (0 binary)
Coefficient statistics:
  Matrix range     [1e-02, 1e+08]
  Objective range  [2e+02, 6e+02]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 3e+02]
Presolve removed 584 rows and 297 columns
Presolve time: 0.00s
Presolved: 11 rows, 261 columns, 767 nonzeros
Variable types: 0 continuous, 261 integer (261 binary)

Root relaxation: objective 1.445925e+03, 26 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0 1445.92508    0    4          - 1445.92508      -     -    0s
H    0  

Unnamed: 0,Name,POS,R,HR,RBI,SB,AVG,W,SV,K,ERA,WHIP,Ovr,$,H,AB
0,Eddie Rosario,OF,86.8,29.6,99.8,4.8,0.284,0.0,0.0,0.0,0.0,0.0,69.8,19.2,166.5,586.0
1,Jorge Polanco,SS,94.6,19.3,84.7,7.6,0.281,0.0,0.0,0.0,0.0,0.0,77.8,15.2,169.8,605.0
2,Andrew Benintendi,OF,99.4,19.2,78.2,13.5,0.272,0.0,0.0,0.0,0.0,0.0,78.0,16.6,162.8,598.0
3,Nicholas Castellanos,OF,87.7,26.9,88.4,3.0,0.274,0.0,0.0,0.0,0.0,0.0,103.2,13.0,155.3,567.0
4,Bryan Reynolds,OF,83.1,17.9,66.0,6.3,0.288,0.0,0.0,0.0,0.0,0.0,105.2,9.5,163.2,566.0
5,J.T. Realmuto,C,77.8,24.0,76.4,5.8,0.27,0.0,0.0,0.0,0.0,0.0,106.2,20.1,135.7,502.0
6,Matt Chapman,3B,94.2,34.7,98.2,2.7,0.258,0.0,0.0,0.0,0.0,0.0,116.2,15.2,148.3,577.0
7,Eric Hosmer,1B,79.7,23.4,80.2,3.4,0.263,0.0,0.0,0.0,0.0,0.0,128.2,6.8,147.7,561.0
8,Elvis Andrus,SS,69.2,13.5,66.0,20.8,0.262,0.0,0.0,0.0,0.0,0.0,133.0,8.6,152.0,581.0
9,Jason Heyward,OF,72.0,15.8,72.1,6.5,0.261,0.0,0.0,0.0,0.0,0.0,143.6,2.5,133.4,512.0


In [23]:
get_score(Team.loc[25]) 

{'R': 9, 'HR': 5, 'RBI': 9, 'SB': 5, 'AVG': 7, 'K': 12, 'W': 7, 'SV': 8, 'ERA': 7, 'WHIP': 7, 'Total': 76}


{'R': 9,
 'HR': 5,
 'RBI': 9,
 'SB': 5,
 'AVG': 7,
 'K': 12,
 'W': 7,
 'SV': 8,
 'ERA': 7,
 'WHIP': 7,
 'Total': 76}

In [39]:
def fin_model():
    budget = 260
    m = Model("mip1")
    # this works really well without using the maxes

    scaler = preprocessing.StandardScaler(with_std=False)
    all_players = get_hitter_prices(get_hitters()).nsmallest(150,'Ovr').append(get_pitcher_prices(get_starting_pitchers()).nsmallest(120,'Ovr'),ignore_index=True).append(get_pitcher_prices(get_closing_pitchers()).nsmallest(22,'SV rank'),ignore_index=True)

    all_players.fillna(0, inplace=True)
    all_players['ERA'] = all_players['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['WHIP'] = all_players['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['AB'] = all_players['AB'].apply(lambda x: 0 if x == 0 else 1/x)
    all_players['adjAVG'] = all_players['AVG']*100
    norm_players = all_players[['R', 'HR', 'RBI', 'SB','adjAVG','W','SV','K','ERA','WHIP','AB','H']]

    norm_players = pd.DataFrame(scaler.fit_transform(norm_players),columns=['R', 'HR', 'RBI', 'SB','adjAVG','W','SV','K', 'ERA', 'WHIP','H','AB'])

    for h in HitterMetrics:
        if h == 'AVG':
            all_players['normAVG'] = norm_players['adjAVG']
        else:
            all_players['norm{}'.format(h)] = norm_players[h]
    for p in PitcherMetrics:
        all_players['norm{}'.format(p)] = norm_players[p]

    for h in ['AB','H']:
        all_players['norm{}'.format(h)] = norm_players[h]

    all_names = list(all_players.index)
    name_list = list(dict.fromkeys(all_players['Name']))
    allCosts = dict(zip(all_names,all_players['$']))

    player_vars = m.addVars(all_names,vtype=GRB.INTEGER,lb=0,ub=1,name='players')
    player_chosen = m.addVars(name_list,vtype=GRB.INTEGER,lb=0,ub=1,name='pl_chosen')

    allRuns = dict(zip(all_names,all_players['normR']))
    allHRs = dict(zip(all_names,all_players['normHR']))
    allRBIs = dict(zip(all_names,all_players['normRBI']))
    allSBs = dict(zip(all_names,all_players['normSB']*2.5))
    allAVG = dict(zip(all_names,all_players['normAVG']))
    allAB = dict(zip(all_names,all_players['normAB']*0.8))
    allH = dict(zip(all_names,all_players['normH']*0.8))


    allWs = dict(zip(all_names,all_players['normW']))
    allKs = dict(zip(all_names,all_players['normK']))
    allSVs = dict(zip(all_names,all_players['normSV']))
    allERA = dict(zip(all_names,all_players['normERA']))
    allWHIP = dict(zip(all_names,all_players['normWHIP']))
    allPOS = dict(zip(all_names,all_players['POS']))

    obj = LinExpr()

    obj += quicksum([allRuns[i]*player_vars[i] for i in all_names])
    obj += quicksum([allHRs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allRBIs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allSBs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allAVG[i]*player_vars[i] for i in all_names])
    obj += quicksum([allWs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allKs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allSVs[i]*player_vars[i] for i in all_names])
    obj += quicksum([allERA[i]*player_vars[i] for i in all_names])
    obj += quicksum([allWHIP[i]*player_vars[i] for i in all_names])
    obj += quicksum([allCosts[i]*player_vars[i] for i in all_names])
    obj += quicksum([allH[i]*player_vars[i] for i in all_names])
    obj += quicksum([allAB[i]*player_vars[i] for i in all_names])
    m.setObjective(obj, GRB.MAXIMIZE)
    
    m.addConstr(sum([allCosts[i]*player_vars[i] for i in all_names])<= budget)
    
    m.addConstr(sum([(allPOS[i]=='C')*player_vars[i] for i in all_names]) == 1)
    
    # update these based on position depth
    m.addConstr(sum([(allPOS[i]=='1B')*player_vars[i] for i in all_names]) == 2)
    m.addConstr(sum([(allPOS[i]=='2B')*player_vars[i] for i in all_names]) == 2)
    m.addConstr(sum([(allPOS[i]=='3B')*player_vars[i] for i in all_names]) == 1)
    m.addConstr(sum([(allPOS[i]=='SS')*player_vars[i] for i in all_names]) == 2)
    
    m.addConstr(sum([(allPOS[i]=='OF')*player_vars[i] for i in all_names]) == 6)
    
    m.addConstr(sum([(allPOS[i]=='SP')*player_vars[i] for i in all_names]) == 8)
    
    m.addConstr(sum([(allPOS[i]=='RP')*player_vars[i] for i in all_names]) == 3)
    m.addConstr(sum([(allPOS[i]=='DH')*player_vars[i] for i in all_names]) == 0)
    
    
    m.addConstr(sum([allCosts[i]*player_vars[i]*(allPOS[i]=='RP' or allPOS[i]=='SP') for i in all_names]) <= 120)
    m.addConstr(sum([allCosts[i]*player_vars[i]*(allPOS[i]!='RP' and allPOS[i]!='SP') for i in all_names]) <= 140)
    
    
    # ensure no player is selected twice

    for f in all_names:
        m.addConstr(player_vars[f]>= player_chosen[all_players.iloc[f]['Name']]*0.01)
        m.addConstr(player_vars[f]<= player_chosen[all_players.iloc[f]['Name']]*1e8)
    
    
    m.addConstr(sum(player_chosen.values())==25)
    
    
    m.setParam(GRB.Param.PoolSolutions,10)
    m.setParam(GRB.Param.PoolSearchMode,2)
    
    m.setParam(GRB.Param.OutputFlag,0)

    m.optimize()
    
    nSolns = m.SolCount
    max_tot = 0 
    best = 0 
    all_players['AB'] = all_players['AB'].apply(lambda x: 0 if x == 0 else 1/x)

    for e in range(nSolns):
        Team = pd.DataFrame()
        m.setParam(GRB.Param.SolutionNumber,e)
        for v in m.getVars():
            if v.Xn > 0.01 and not 'chosen' in v.varName:
                Team = Team.append(all_players.iloc[int(v.varName.split('[')[1].split(']')[0])])
        Team = Team.append(Team.append({'Name':'Total','AVG':sum(Team['H'])/sum(Team['AB']),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True))
        sc = get_score(Team[Team['Name'] == 'Total'].iloc[0])['Total']
        if (sc > max_tot):
            best = e 
            max_tot = sc
    Team = pd.DataFrame()
    m.setParam(GRB.Param.SolutionNumber,best)
    for v in m.getVars():
        if v.Xn > 0.01 and not 'chosen' in v.varName:
            Team = Team.append(all_players.iloc[int(v.varName.split('[')[1].split(']')[0])])
    
    
    Team['ERA'] = Team['ERA'].apply(lambda x: 0 if x == 0 else 1/x)
    Team['WHIP'] = Team['WHIP'].apply(lambda x: 0 if x == 0 else 1/x)
    return Team.append({'Name':'Total','AVG':(sum(Team['H'])/sum(Team['AB'])),'R':sum(Team['R']),'RBI':sum(Team['RBI']),'HR':sum(Team['HR']),'SB':sum(Team['SB']),'WHIP':(sum(Team['BB'])+sum(Team['Hits']))/sum(Team['IP']),'ERA':(sum(Team['ER'])/sum(Team['IP']))*9,'W':sum(Team['W']),'SV':sum(Team['SV']),'K':sum(Team['K']),'$':sum(Team['$'])},ignore_index=True)  

In [40]:
fin = fin_model()
fin[['Name','POS','R', 'HR', 'RBI', 'SB', 'AVG','W', 'SV', 'K', 'ERA', 'WHIP','Ovr','$',"H","AB"]]

Parameter PoolSolutions unchanged
   Value: 10  Min: 1  Max: 2000000000  Default: 10
Changed value of parameter PoolSearchMode to 2
   Prev: 0  Min: 0  Max: 2  Default: 0
{'R': 1, 'HR': 1, 'RBI': 1, 'SB': 1, 'AVG': 9, 'K': 1, 'W': 1, 'SV': 2, 'ERA': 7, 'WHIP': 6, 'Total': 30}
{'R': 1, 'HR': 1, 'RBI': 1, 'SB': 1, 'AVG': 9, 'K': 1, 'W': 1, 'SV': 2, 'ERA': 7, 'WHIP': 6, 'Total': 30}
{'R': 1, 'HR': 1, 'RBI': 1, 'SB': 1, 'AVG': 9, 'K': 1, 'W': 1, 'SV': 2, 'ERA': 7, 'WHIP': 5, 'Total': 29}
{'R': 1, 'HR': 1, 'RBI': 1, 'SB': 1, 'AVG': 9, 'K': 1, 'W': 1, 'SV': 2, 'ERA': 7, 'WHIP': 6, 'Total': 30}
{'R': 1, 'HR': 1, 'RBI': 1, 'SB': 1, 'AVG': 7, 'K': 1, 'W': 1, 'SV': 2, 'ERA': 7, 'WHIP': 6, 'Total': 28}
{'R': 1, 'HR': 1, 'RBI': 1, 'SB': 1, 'AVG': 7, 'K': 1, 'W': 1, 'SV': 2, 'ERA': 7, 'WHIP': 6, 'Total': 28}
{'R': 1, 'HR': 1, 'RBI': 1, 'SB': 1, 'AVG': 9, 'K': 1, 'W': 1, 'SV': 2, 'ERA': 7, 'WHIP': 4, 'Total': 28}
{'R': 1, 'HR': 1, 'RBI': 1, 'SB': 1, 'AVG': 7, 'K': 1, 'W': 1, 'SV': 2, 'ERA': 7, 'WHIP

Unnamed: 0,Name,POS,R,HR,RBI,SB,AVG,W,SV,K,ERA,WHIP,Ovr,$,H,AB
0,Ozzie Albies,2B,19.8,9.1,20.3,5.1,0.288,0.0,0.0,0.0,0.0,0.0,49.6,22.4,64.6,224.0
1,Yoan Moncada,3B,19.9,10.2,19.5,4.4,0.267,0.0,0.0,0.0,0.0,0.0,72.6,17.1,58.4,219.0
2,Andrew Benintendi,OF,18.9,7.1,17.1,5.3,0.272,0.0,0.0,0.0,0.0,0.0,88.2,14.1,60.0,220.0
3,Lorenzo Cain,OF,16.6,5.9,14.5,7.0,0.277,0.0,0.0,0.0,0.0,0.0,100.8,12.5,59.7,215.0
4,J.T. Realmuto,C,16.1,8.9,18.0,2.1,0.27,0.0,0.0,0.0,0.0,0.0,106.0,19.6,50.4,187.0
5,Adam Eaton,OF,16.0,5.2,14.3,4.3,0.281,0.0,0.0,0.0,0.0,0.0,114.6,7.4,58.1,207.0
6,Bryan Reynolds,OF,14.8,6.5,15.0,2.3,0.288,0.0,0.0,0.0,0.0,0.0,118.0,6.4,59.1,205.0
7,Amed Rosario,SS,14.4,5.4,14.3,7.2,0.276,0.0,0.0,0.0,0.0,0.0,118.4,9.9,58.0,210.0
8,Joey Votto,1B,18.0,8.1,17.1,1.3,0.27,0.0,0.0,0.0,0.0,0.0,122.2,7.2,53.4,198.0
9,Eric Hosmer,1B,16.0,8.5,18.3,1.2,0.263,0.0,0.0,0.0,0.0,0.0,131.8,5.6,53.4,202.0
