## Import statements and globals

In [10]:
import pulp
import pandas as pd
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
from scipy import optimize


HitterPositions = ['C', '1B', '2B', 'SS', '3B', 'OF', 'outer', 'inner', 'util']
HitterMetrics = ['R', 'HR', 'RBI', 'SB', 'AVG']

PitcherPositions = ['SP', 'RP']
PitcherMetrics = ['W', 'SV', 'K', 'ERA', 'WHIP']

## Import data

In [2]:
def get_hitters():
    hitters = pd.read_csv('razzball-hitters.csv', index_col='#', usecols=['#','Name','Team','ESPN','R','HR', 'RBI', 'SB','AVG',])
    hitters.rename_axis('Razzball_Rank', inplace=True)
    hitters.reset_index(inplace=True)

    # sort and rank
    for metric in HitterMetrics:
        hitters.sort_values(by=[metric],inplace=True, ascending=False)
        hitters.reset_index(inplace=True, drop=True)
        hitters.index.rename('{} rank'.format(metric), inplace=True)
        hitters.reset_index(inplace=True)
    hitters['Ovr'] = (hitters['AVG rank'] + hitters['SB rank'] + hitters['RBI rank'] + hitters['HR rank'] + hitters['R rank']) / 5
    #hitters['Ovr'] = (hitters['Ovr'] + hitters['Razzball_Rank']) / 2
    
    hitters = hitters.assign(ESPN=hitters.ESPN.str.split('/')).explode('ESPN')
    hitters.sort_values(by=['Ovr'],inplace=True,ascending=True)
    return hitters

def get_pitchers():
    pitchers = pd.read_csv('razzball-pitchers.csv', index_col='#', usecols=['#','Name','Team','POS','W', 'SV', 'K', 'ERA', 'WHIP'])
    pitchers.rename_axis('Razzball_Rank', inplace=True)
    pitchers.reset_index(inplace=True)
    
    for metric in PitcherMetrics:
        pitchers.sort_values(by=[metric],inplace=True, ascending=(metric=='WHIP' or metric=='ERA'))
        pitchers.reset_index(inplace=True, drop=True)
        pitchers.rename_axis('{} rank'.format(metric), inplace=True)
        pitchers.reset_index(inplace=True)
    pitchers['Ovr'] = (pitchers['W rank'] + pitchers['SV rank'] + pitchers['K rank'] + pitchers['ERA rank'] + pitchers['WHIP rank']) / 5
    pitchers = pitchers.assign(POS=pitchers.POS.str.split('/')).explode('POS')
    pitchers.sort_values(by=['Ovr'],inplace=True,ascending=True)
    return pitchers
    
def get_hitter_prices(hitters):
    prices = pd.read_csv('razzball-hitters-prices.csv', index_col='#', usecols=['#', 'Name', 'Team', '$', '$R', '$HR', '$RBI', '$SB', '$AVG'])
    hitters = hitters.merge(prices, left_on=['Name', 'Team'], right_on=['Name','Team'], how='left')
    return hitters

def get_pitcher_prices(pitchers):
    prices = pd.read_csv('razzball-pitchers-prices.csv', index_col='#', usecols=['#','Name','Team','$','$W','$SV','$K','$WHIP','$ERA'])
    pitchers = pitchers.merge(prices, left_on['Name', 'Team'], right_on=['Name','Team'], how='left')
    return pitchers
    
def split_by_pos(hitters, pos):
    if (pos == 'C'):
        return hitters[hitters['ESPN'] == 'C']
    elif (pos == '1B'):
        return hitters[hitters['ESPN'] == '1B']
    elif (pos == '2B'):
        return hitters[hitters['ESPN'] == '2B']
    elif (pos == 'SS'):
        return hitters[hitters['ESPN'] == 'SS']
    elif (pos == '3B'):
        return hitters[hitters['ESPN'] == '3B']
    elif (pos == 'OF'):
        return hitters[hitters['ESPN'] == 'OF']
    elif (pos == 'corner'):
        return hitters[hitters['ESPN'] == '1B'].append(hitters[hitters['ESPN'] == '3B'])
    elif (pos == 'inner'):
        return hitters[hitters['ESPN'] == '2B'].append(hitters[hitters['ESPN'] == 'SS'])
    elif (pos == 'SP'):
        return hitters[hitters['POS'] == 'SP']
    elif (pos == 'RP'):
        return hitters[hitters['POS'] == 'RP']
    else: # util
        return hitters
        

## Run calculations

In [3]:
def group_players(players):
    bandwidth = estimate_bandwidth(players, quantile=0.05, n_samples=100)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    labels = ms.fit_predict(players)
    #labels = ms.labels_
    cluster_centers = ms.cluster_centers_

    #labels_unique = np.unique(labels)
    #n_clusters_ = len(labels_unique)

    #print("number of estimated clusters : %d" % n_clusters_)
    players['group'] = labels
    return players

In [4]:
def group_pitchers_pos(pitchers):
    return [group_players(split_by_pos(pitchers, x).drop(['W rank', 'SV rank', 'K rank', 'ERA rank', 'WHIP rank', 'Razzball_Rank','Team','POS'], axis=1)) for x in PitcherPositions]

def group_by_pos(hitters):
    return [group_players(split_by_pos(hitters, x).drop(['AVG rank', 'SB rank', 'RBI rank', 'HR rank', 'R rank', 'Razzball_Rank','Team','ESPN'], axis=1)) for x in HitterPositions]

### Prep dataframes

In [5]:
hitter_tables = { 'C': "",
                  '1B': "",
                  '2B': "",
                  'SS': "",
                  '3B': "",
                  'inner': "",
                  'outer': "",
                  'OF': "",
                  'util': "" }

pitcher_tables = { 'SP': "",
                   'RP': "" }

In [6]:
hitters = get_hitters().set_index('Name')
groups = []
#C, 1B, 2B, SS, 3B, OF, outer, inner, util
groups = group_by_pos(hitters)

#hitters['group'] = group_players(hitters.drop(['AVG rank', 'SB rank', 'RBI rank', 'HR rank', 'R rank', 'Razzball_Rank','Team','ESPN'], axis=1))
for i in range(len(groups)):
    groups[i].sort_values(by=['Ovr'],inplace=True,ascending=True)
    hitter_tables[HitterPositions[i]] = groups[i]
    #groups[i].to_csv('{}.csv'.format(HitterPositions[i]))

pitchers = get_pitchers().set_index('Name')
pitchers.sort_values(by=['Razzball_Rank'],inplace=True,ascending=True)

groups = group_pitchers_pos(pitchers)
for i in range (len(groups)):
    groups[i].sort_values(by=['Ovr'],inplace=True,ascending=True)
    pitcher_tables[PitcherPositions[i]] = groups[i]
    
hitter_tables

{'C':                       R    HR   RBI   SB    AVG    Ovr  group
 Name                                                         
 J.T. Realmuto      78.0  24.0  76.5  5.8  0.270  107.4     17
 Willson Contreras  65.5  19.1  66.9  3.2  0.257  175.8     19
 Christian Vazquez  54.3  13.3  53.8  4.8  0.263  192.6     20
 Salvador Perez     60.0  24.7  72.7  2.0  0.252  194.2     14
 Yasmani Grandal    72.2  25.4  76.1  3.2  0.239  195.2     14
 ...                 ...   ...   ...  ...    ...    ...    ...
 Beau Taylor         1.7   0.3   1.5  0.1  0.215  660.0      7
 Christian Kelley    1.4   0.3   1.4  0.1  0.207  667.6      7
 Ian Rice            1.2   0.3   1.2  0.1  0.209  669.0      7
 Oscar Hernandez     1.1   0.3   1.2  0.1  0.207  669.6      7
 Austin Bossart      1.1   0.2   1.1  0.1  0.209  669.6      7
 
 [115 rows x 7 columns],
 '1B':                       R    HR    RBI    SB    AVG    Ovr  group
 Name                                                           
 Cody Belling

In [7]:
all_hitters = group_by_pos(get_hitters().set_index('Name'))[0].sort_values(by='Ovr',ascending=True).reset_index();
 
hitters


Unnamed: 0_level_0,AVG rank,SB rank,RBI rank,HR rank,R rank,Razzball_Rank,Team,ESPN,R,HR,RBI,SB,AVG,Ovr
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Mike Trout,7,42,9,1,4,19,LAA,OF,115.8,43.0,109.8,13.2,0.297,12.6
Cody Bellinger,29,47,1,3,1,31,LAD,1B,118.9,42.6,118.1,12.5,0.287,16.2
Cody Bellinger,29,47,1,3,1,31,LAD,OF,118.9,42.6,118.1,12.5,0.287,16.2
Christian Yelich,3,15,33,26,9,42,MIL,OF,112.2,34.7,96.5,21.4,0.304,17.2
Ronald Acuna Jr.,39,4,36,13,6,6,ATL,OF,113.7,36.9,94.8,29.4,0.283,19.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ronnie Dawson,674,631,676,662,673,678,HOU,OF,1.3,0.3,1.2,0.3,0.203,663.2
Christian Kelley,667,669,667,663,672,658,PIT,C,1.4,0.3,1.4,0.1,0.207,667.6
Ian Rice,656,666,675,673,675,673,CHC,C,1.2,0.3,1.2,0.1,0.209,669.0
Oscar Hernandez,665,667,674,664,678,672,STL,C,1.1,0.3,1.2,0.1,0.207,669.6


In [9]:
def minimization_fun(x):
    sm = 0
    count = 0
    for key in hitter_tables:
        for metric in HitterMetrics:
            sm = sm + hitter_tables[key].iloc[int(x[count])].loc[metric]
        count = count + 1
    for metric in HitterMetrics:
        sm = sm + hitter_tables['util'].iloc[int(x[count])].loc[metric]
    count = count + 1
    for key in range(4):
        for metric in HitterMetrics:
            sm = sm + hitter_tables['OF'].iloc[int(x[count])].loc[metric]
        count = count + 1
    for key in range(8):
        for metric in PitcherMetrics:
            sm = sm + pitcher_tables['SP'].iloc[int(x[count])].loc[metric]
        count = count + 1
    for key in range(3):
        for metric in PitcherMetrics:
            sm = sm + pitcher_tables['RP'].iloc[int(x[count])].loc[metric]
        count = count + 1
    return sm

#minimization_fun([10]*25)
# need to add constraint, need to make minimize only check discrete values
#res = optimize.minimize(minimization_fun, x0=[10]*25, method = 'Nelder-Mead', options={'maxiter':10000})
#res.x

def find_min(x):
    # 20 1B, 20 2B, 20 SS, 20 3B
    # 70 OF, 100 SP, 45 RP
    # = 50,400,000,000 iterations
    
    # 18 1B, 18 2B, 18 SS, 18 3B
    # 70 OF, 80 SP, 40 RP
    # = 23,514,624,000 iterations
    prob = LpProblem("Ideal Roster Problem",LpMinimize)
    
    # C
    cNames = list(hitter_tables['C']['Name'])
    cCosts = dict(zip(hitter_tables['C']['$']))
    cRuns = dict(zip(hitter_tables['C']['R']))
    cHRs = dict(zip(hitter_tables['C']['HR']))
    cRBIs = dict(zip(hitter_tables['C']['RBI']))
    cSBs = dict(zip(hitter_tables['C']['SB']))
    cAVG = dict(zip(hitter_tables['C']['AVG']))
    
    # 1B
    fstNames = list(hitter_tables['1B']['Name'])
    fstCosts = dict(zip(hitter_tables['1B']['$']))
    fstRuns = dict(zip(hitter_tables['1B']['R']))
    fstHRs = dict(zip(hitter_tables['1B']['HR']))
    fstRBIs = dict(zip(hitter_tables['1B']['RBI']))
    fstSBs = dict(zip(hitter_tables['1B']['SB']))
    fstAVG = dict(zip(hitter_tables['1B']['AVG']))
    
    # 2B
    sndNames = list(hitter_tables['2B']['Name'])
    sndCosts = dict(zip(hitter_tables['2B']['$']))
    sndRuns = dict(zip(hitter_tables['2B']['R']))
    sndHRs = dict(zip(hitter_tables['2B']['HR']))
    sndRBIs = dict(zip(hitter_tables['2B']['RBI']))
    sndSBs = dict(zip(hitter_tables['2B']['SB']))
    sndAVG = dict(zip(hitter_tables['2B']['AVG']))
    
    # 3B
    trdNames = list(hitter_tables['3B']['Name'])
    trdCosts = dict(zip(hitter_tables['3B']['$']))
    trdRuns = dict(zip(hitter_tables['3B']['R']))
    trdHRs = dict(zip(hitter_tables['3B']['HR']))
    trdRBIs = dict(zip(hitter_tables['3B']['RBI']))
    trdSBs = dict(zip(hitter_tables['3B']['SB']))
    trdAVG = dict(zip(hitter_tables['3B']['AVG']))
    
    # SS
    ssNames = list(hitter_tables['SS']['Name'])
    ssCosts = dict(zip(hitter_tables['SS']['$']))
    ssRuns = dict(zip(hitter_tables['SS']['R']))
    ssHRs = dict(zip(hitter_tables['SS']['HR']))
    ssRBIs = dict(zip(hitter_tables['SS']['RBI']))
    ssSBs = dict(zip(hitter_tables['SS']['SB']))
    ssAVG = dict(zip(hitter_tables['SS']['AVG']))
    
    # OF 
    ofNames = list(hitter_tables['OF']['Name'])
    ofCosts = dict(zip(hitter_tables['OF']['$']))
    ofRuns = dict(zip(hitter_tables['OF']['R']))
    ofHRs = dict(zip(hitter_tables['OF']['HR']))
    ofRBIs = dict(zip(hitter_tables['OF']['RBI']))
    ofSBs = dict(zip(hitter_tables['OF']['SB']))
    ofAVG = dict(zip(hitter_tables['OF']['AVG']))
    
    # SP
    spNames = list(pitcher_tables['SP']['Name'])
    spCosts = dict(zip(pitcher_tables['SP']['$']))
    spWs = dict(zip(pitcher_tables['SP']['W']))
    spKs = dict(zip(pitcher_tables['SP']['K']))
    spSVs = dict(zip(pitcher_tables['SP']['SV']))
    spERA = dict(zip(pitcher_tables['SP']['ERA']))
    spWHIP = dict(zip(pitcher_tables['SP']['WHIP']))
    
    # RP
    rpNames = list(pitcher_tables['RP']['Name'])
    rpCosts = dict(zip(pitcher_tables['RP']['$']))
    rpWs = dict(zip(pitcher_tables['RP']['W']))
    rpKs = dict(zip(pitcher_tables['RP']['K']))
    rpSVs = dict(zip(pitcher_tables['RP']['SV']))
    rpERA = dict(zip(pitcher_tables['RP']['ERA']))
    rpWHIP = dict(zip(pitcher_tables['RP']['WHIP']))
    
    
                                

SyntaxError: unexpected EOF while parsing (<ipython-input-9-1f54a4748377>, line 43)