## Import statements and globals

In [157]:
import pandas as pd
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
from scipy import optimize

HitterPositions = ['C', '1B', '2B', 'SS', '3B', 'OF', 'outer', 'inner', 'util']
HitterMetrics = ['R', 'HR', 'RBI', 'SB', 'AVG']

PitcherPositions = ['SP', 'RP']
PitcherMetrics = ['W', 'SV', 'K', 'ERA', 'WHIP']

## Import data

In [170]:
def get_hitters():
    hitters = pd.read_csv('razzball-hitters.csv', index_col='#', usecols=['#','Name','Team','ESPN','R','HR', 'RBI', 'SB','AVG',])
    hitters.rename_axis('Razzball_Rank', inplace=True)
    hitters.reset_index(inplace=True)

    # sort and rank
    for metric in HitterMetrics:
        hitters.sort_values(by=[metric],inplace=True, ascending=False)
        hitters.reset_index(inplace=True, drop=True)
        hitters.rename_axis('{} rank'.format(metric), inplace=True)
        hitters.reset_index(inplace=True)
    hitters['Ovr'] = (hitters['AVG rank'] + hitters['SB rank'] + hitters['RBI rank'] + hitters['HR rank'] + hitters['R rank']) / 5
    #hitters['Ovr'] = (hitters['Ovr'] + hitters['Razzball_Rank']) / 2
    
    hitters = hitters.assign(ESPN=hitters.ESPN.str.split('/')).explode('ESPN')
    hitters.sort_values(by=['Ovr'],inplace=True,ascending=True)
    return hitters

def get_pitchers():
    pitchers = pd.read_csv('razzball-pitchers.csv', index_col='#', usecols=['#','Name','Team','POS','W', 'SV', 'K', 'ERA', 'WHIP'])
    pitchers.rename_axis('Razzball_Rank', inplace=True)
    pitchers.reset_index(inplace=True)
    
    for metric in PitcherMetrics:
        pitchers.sort_values(by=[metric],inplace=True, ascending=(metric=='WHIP' or metric=='ERA'))
        pitchers.reset_index(inplace=True, drop=True)
        pitchers.rename_axis('{} rank'.format(metric), inplace=True)
        pitchers.reset_index(inplace=True)
    pitchers['Ovr'] = (pitchers['W rank'] + pitchers['SV rank'] + pitchers['K rank'] + pitchers['ERA rank'] + pitchers['WHIP rank']) / 5
    pitchers = pitchers.assign(POS=pitchers.POS.str.split('/')).explode('POS')
    pitchers.sort_values(by=['Ovr'],inplace=True,ascending=True)
    return pitchers
    
def get_hitter_prices(hitters):
    prices = pd.read_csv('razzball-hitters-prices.csv', index_col='#', usecols=['#', 'Name', 'Team', '$', '$R', '$HR', '$RBI', '$SB', '$AVG'])
    hitters = hitters.merge(prices, left_on=['Name', 'Team'], right_on=['Name','Team'], how='left')
    return hitters

def get_pitcher_prices(pitchers):
    prices = pd.read_csv('razzball-pitchers-prices.csv', index_col='#', usecols=['#','Name','Team','$','$W','$SV','$K','$WHIP','$ERA'])
    pitchers = pitchers.merge(prices, left_on['Name', 'Team'], right_on=['Name','Team'], how='left')
    return pitchers
    
def split_by_pos(hitters, pos):
    if (pos == 'C'):
        return hitters[hitters['ESPN'] == 'C']
    elif (pos == '1B'):
        return hitters[hitters['ESPN'] == '1B']
    elif (pos == '2B'):
        return hitters[hitters['ESPN'] == '2B']
    elif (pos == 'SS'):
        return hitters[hitters['ESPN'] == 'SS']
    elif (pos == '3B'):
        return hitters[hitters['ESPN'] == '3B']
    elif (pos == 'OF'):
        return hitters[hitters['ESPN'] == 'OF']
    elif (pos == 'corner'):
        return hitters[hitters['ESPN'] == '1B'].append(hitters[hitters['ESPN'] == '3B'])
    elif (pos == 'inner'):
        return hitters[hitters['ESPN'] == '2B'].append(hitters[hitters['ESPN'] == 'SS'])
    elif (pos == 'SP'):
        return hitters[hitters['POS'] == 'SP']
    elif (pos == 'RP'):
        return hitters[hitters['POS'] == 'RP']
    else: # util
        return hitters
        

## Run calculations

In [152]:
def group_players(players):
    bandwidth = estimate_bandwidth(players, quantile=0.05, n_samples=100)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    labels = ms.fit_predict(players)
    #labels = ms.labels_
    cluster_centers = ms.cluster_centers_

    #labels_unique = np.unique(labels)
    #n_clusters_ = len(labels_unique)

    #print("number of estimated clusters : %d" % n_clusters_)
    players['group'] = labels
    return players

In [167]:
def group_pitchers_pos(pitchers):
    return [group_players(split_by_pos(pitchers, x).drop(['W rank', 'SV rank', 'K rank', 'ERA rank', 'WHIP rank', 'Razzball_Rank','Team','POS'], axis=1)) for x in PitcherPositions]

def group_by_pos(hitters):
    return [group_players(split_by_pos(hitters, x).drop(['AVG rank', 'SB rank', 'RBI rank', 'HR rank', 'R rank', 'Razzball_Rank','Team','ESPN'], axis=1)) for x in HitterPositions]

### Prep dataframes

In [176]:
hitter_tables = { 'C': "",
                  '1B': "",
                  '2B': "",
                  'SS': "",
                  '3B': "",
                  'inner': "",
                  'outer': "",
                  'OF': "",
                  'util': "" }

pitcher_tables = { 'SP': "",
                   'RP': "" }

In [177]:
hitters = get_hitters().set_index('Name')
groups = []
#C, 1B, 2B, SS, 3B, OF, outer, inner, util
groups = group_by_pos(hitters)

#hitters['group'] = group_players(hitters.drop(['AVG rank', 'SB rank', 'RBI rank', 'HR rank', 'R rank', 'Razzball_Rank','Team','ESPN'], axis=1))
for i in range(len(groups)):
    groups[i].sort_values(by=['Ovr'],inplace=True,ascending=True)
    hitter_tables[HitterPositions[i]] = groups[i]
    #groups[i].to_csv('{}.csv'.format(HitterPositions[i]))

pitchers = get_pitchers().set_index('Name')
pitchers.sort_values(by=['Razzball_Rank'],inplace=True,ascending=True)

groups = group_pitchers_pos(pitchers)
for i in range (len(groups)):
    groups[i].sort_values(by=['Ovr'],inplace=True,ascending=True)
    pitcher_tables[PitcherPositions[i]] = groups[i]
    
hitter_tables

{'1B':                       R    HR    RBI    SB    AVG    Ovr  group
 Name                                                           
 Cody Bellinger    118.9  42.6  118.1  12.5  0.287   16.2     17
 Freddie Freeman   110.8  32.9   99.8   6.4  0.293   43.6     15
 Anthony Rizzo     113.4  31.8   91.8   5.6  0.283   57.0     15
 Paul Goldschmidt   99.2  30.5   86.1   5.2  0.273   79.0     14
 Yuli Gurriel       81.2  22.9   91.4   4.7  0.289   85.4     14
 ...                 ...   ...    ...   ...    ...    ...    ...
 Lewin Diaz          5.3   1.8    5.8   0.2  0.240  563.4     12
 Taylor Jones        6.6   1.6    6.0   0.2  0.231  576.2     12
 Cheslor Cuthbert    3.0   0.9    3.2   0.1  0.248  582.6     12
 Andrew Vaughn       4.6   1.2    4.3   0.3  0.212  619.4     22
 Lucas Duda          0.8   0.3    0.8   0.0  0.226  660.2     23
 
 [85 rows x 7 columns],
 '2B':                     R    HR    RBI    SB    AVG    Ovr  group
 Name                                                 

In [143]:
all_hitters = group_by_pos(get_hitters().set_index('Name'))[0].sort_values(by='Ovr',ascending=True).reset_index();
 
hitters


1


Unnamed: 0,Name,R,HR,RBI,SB,AVG,Ovr,group
0,J.T. Realmuto,73.8,23.9,72.6,5.8,0.270,110.8,19
2,Salvador Perez,60.0,24.7,72.7,2.0,0.252,190.2,17
3,Christian Vazquez,54.2,13.3,53.6,4.8,0.262,190.8,18
4,Yasmani Grandal,72.1,25.3,76.0,3.2,0.239,192.8,17
5,Gary Sanchez,65.4,30.7,81.1,1.4,0.242,215.6,20
...,...,...,...,...,...,...,...,...
110,Rocky Gale,1.4,0.2,1.4,0.1,0.221,660.2,7
111,Ian Rice,1.1,0.3,1.2,0.1,0.209,668.0,7
112,Christian Kelley,1.3,0.3,1.3,0.1,0.207,668.0,7
113,Oscar Hernandez,1.0,0.3,1.1,0.1,0.207,670.2,7


In [203]:
def minimization_fun(x):
    sm = 0
    count = 0
    for key in hitter_tables:
        for metric in HitterMetrics:
            sm = sm + hitter_tables[key].iloc[int(x[count])].loc[metric]
        count = count + 1
    for metric in HitterMetrics:
        sm = sm + hitter_tables['util'].iloc[int(x[count])].loc[metric]
    count = count + 1
    for key in range(4):
        for metric in HitterMetrics:
            sm = sm + hitter_tables['OF'].iloc[int(x[count])].loc[metric]
        count = count + 1
    for key in range(8):
        for metric in PitcherMetrics:
            sm = sm + pitcher_tables['SP'].iloc[int(x[count])].loc[metric]
        count = count + 1
    for key in range(3):
        for metric in PitcherMetrics:
            sm = sm + pitcher_tables['RP'].iloc[int(x[count])].loc[metric]
        count = count + 1
    return sm

#minimization_fun([10]*25)
# need to add constraint, need to make minimize only check discrete values
res = optimize.minimize(minimization_fun, x0=[10]*25, method = 'Nelder-Mead', options={'maxiter':10000})
res.x

array([10.11188212,  9.9618385 , 10.34019593,  9.98259032,  9.99677301,
        9.97614382,  9.96456936,  9.97959646, 10.13096635,  9.9502453 ,
        9.73816502,  9.94460971,  9.94775003,  9.94681017, 10.08061469,
       10.0937216 , 10.08143901, 10.08154834, 10.0636711 , 10.06695803,
       10.09557269, 10.07751865,  9.98221668,  9.96175937,  9.93321801])