## Import statements and globals

In [128]:
import pandas as pd
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
from scipy import optimize

HitterPositions = ['C', '1B', '2B', 'SS', '3B', 'OF', 'outer', 'inner', 'util']
HitterMetrics = ['R', 'HR', 'RBI', 'SB', 'AVG']

PitcherPositions = ['SP', 'RP']
PitcherMetrics = ['W', 'SV', 'K', 'ERA', 'WHIP']

## Import data

In [145]:
def get_hitters():
    hitters = pd.read_csv('razzball-hitters.csv', index_col='#', usecols=['#','Name','Team','ESPN','R','HR', 'RBI', 'SB','AVG',])
    hitters.rename_axis('Razzball_Rank', inplace=True)
    hitters.reset_index(inplace=True)

    # sort and rank
    for metric in HitterMetrics:
        hitters.sort_values(by=[metric],inplace=True, ascending=False)
        hitters.reset_index(inplace=True, drop=True)
        hitters.rename_axis('{} rank'.format(metric), inplace=True)
        hitters.reset_index(inplace=True)
    hitters['Ovr'] = (hitters['AVG rank'] + hitters['SB rank'] + hitters['RBI rank'] + hitters['HR rank'] + hitters['R rank']) / 5
    #hitters['Ovr'] = (hitters['Ovr'] + hitters['Razzball_Rank']) / 2
    
    hitters = hitters.assign(ESPN=hitters.ESPN.str.split('/')).explode('ESPN')
    hitters.sort_values(by=['Ovr'],inplace=True,ascending=True)
    return hitters

def get_pitchers():
    pitchers = pd.read_csv('razzball-pitchers.csv', index_col='#', usecols=[])
    pitchers.rename_axis('Razzball_Rank', inplace=True)
    pitchers.reset_index(inplace=True)
    
    for metric in PitcherMetrics:
        pitchers.sort_values(by=[metric],inplace=True, ascending=False)
        pitchers.reset_index(inplace=True, drop=True)
        pitchers.rename_axis('{} rank'.format(metric), inplace=True)
        pitchers.reset_index(inplace=True)
    pitchers['Ovr'] = (pitchers['W rank'] + pitchers['SV rank'] + pitchers['K rank'] + pitchers['ERA rank'] + pitchers['WHIP rank']) / 5
    pitchers = pitchers.assign(ESPN=pitchers.ESPN.str.split('/')).explode('ESPN')
    pitchers.sort_values(by=['Ovr'],inplace=True,ascending=True)
    return pitchers
    
def get_hitter_prices(hitters):
    prices = pd.read_csv('razzball-hitters-prices.csv', index_col='#', usecols=['#', 'Name', 'Team', '$', '$R', '$HR', '$RBI', '$SB', '$AVG'])
    hitters = hitters.merge(prices, left_on=['Name', 'Team'], right_on=['Name','Team'], how='left')
    return hitters

def get_pitcher_prices(pitchers):
    prices = pd.read_csv('razzball-pitchers-prices.csv', index_col='#', usecols=['#','Name','Team','$','$W','$SV','$K','$WHIP','$ERA'])
    pitchers = pitchers.merge(prices, left_on['Name', 'Team'], right_on=['Name','Team'], how='left'))
    return pitchers
    
def split_by_pos(hitters, pos):
    if (pos == 'C'):
        return hitters[hitters['ESPN'] == 'C']
    elif (pos == '1B'):
        return hitters[hitters['ESPN'] == '1B']
    elif (pos == '2B'):
        return hitters[hitters['ESPN'] == '2B']
    elif (pos == 'SS'):
        return hitters[hitters['ESPN'] == 'SS']
    elif (pos == '3B'):
        return hitters[hitters['ESPN'] == '3B']
    elif (pos == 'OF'):
        return hitters[hitters['ESPN'] == 'OF']
    elif (pos == 'corner'):
        return hitters[hitters['ESPN'] == '1B'].append(hitters[hitters['ESPN'] == '3B'])
    elif (pos == 'inner'):
        return hitters[hitters['ESPN'] == '2B'].append(hitters[hitters['ESPN'] == 'SS'])
    elif (pos == 'SP'):
        return hitters[hitters['ESPN'] == 'SP']
    elif (pos == 'RP'):
        return hitters[hitters['ESPN'] == 'RP']
    else: # util
        return hitters
        

IndentationError: expected an indented block (<ipython-input-145-06d2177ebeae>, line 41)

Unnamed: 0,AVG rank,SB rank,RBI rank,HR rank,R rank,Razzball_Rank,Name,Team,ESPN,R,HR,RBI,SB,AVG,Ovr
28,28,48,4,3,2,31,Cody Bellinger,LAD,1B,113.4,42.5,113.3,12.5,0.287,17.0
15,15,132,29,35,14,33,Freddie Freeman,ATL,1B,105.2,32.9,95.6,6.4,0.292,45.0
34,34,155,50,37,10,13,Anthony Rizzo,CHC,1B,107.5,31.7,87.5,5.6,0.283,57.2
97,97,166,63,46,34,41,Paul Goldschmidt,STL,1B,94.1,30.4,82.5,5.2,0.273,81.2
25,25,173,40,94,75,93,Yuli Gurriel,HOU,1B,82.7,23.3,93.1,4.8,0.288,81.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,383,616,627,600,626,631,Nolan Jones,CLE,3B,4.3,1.1,3.9,0.3,0.244,570.4
346,346,681,637,613,642,647,Cheslor Cuthbert,CHW,3B,3.0,0.9,3.1,0.1,0.247,583.8
544,544,592,636,621,636,637,Jonathan India,CIN,3B,3.2,0.8,3.1,0.4,0.232,605.8
676,676,637,620,580,615,619,Sherten Apostel,TEX,3B,4.8,1.2,4.5,0.3,0.202,625.6


## Run calculations

In [125]:
def group_players(players):
    bandwidth = estimate_bandwidth(players, quantile=0.05, n_samples=100)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    labels = ms.fit_predict(players)
    #labels = ms.labels_
    cluster_centers = ms.cluster_centers_

    #labels_unique = np.unique(labels)
    #n_clusters_ = len(labels_unique)

    #print("number of estimated clusters : %d" % n_clusters_)
    players['group'] = labels
    return players

In [129]:
def group_by_pos(hitters):
    return [group_players(split_by_pos(hitters, x).drop(['AVG rank', 'SB rank', 'RBI rank', 'HR rank', 'R rank', 'Razzball_Rank','Team','ESPN'], axis=1)) for x in HitterPositions]

In [136]:
hitters = get_hitters().set_index('Name')
groups = []
#C, 1B, 2B, SS, 3B, OF, outer, inner, util
groups = group_by_pos(hitters)

#hitters['group'] = group_players(hitters.drop(['AVG rank', 'SB rank', 'RBI rank', 'HR rank', 'R rank', 'Razzball_Rank','Team','ESPN'], axis=1))
for i in range(len(groups)):
    groups[i].sort_values(by=['Ovr'],inplace=True,ascending=True)
    hitter_tables[HitterPositions[i]] = groups[i]
    #groups[i].to_csv('{}.csv'.format(HitterPositions[i]))

pitchers = get_pitchers().set_index('Name')
    
groups[0]

Unnamed: 0_level_0,R,HR,RBI,SB,AVG,Ovr,group
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
J.T. Realmuto,73.8,23.9,72.6,5.8,0.270,110.8,19
Willson Contreras,62.0,19.0,63.4,3.2,0.257,176.8,18
Salvador Perez,60.0,24.7,72.7,2.0,0.252,190.2,17
Christian Vazquez,54.2,13.3,53.6,4.8,0.262,190.8,18
Yasmani Grandal,72.1,25.3,76.0,3.2,0.239,192.8,17
...,...,...,...,...,...,...,...
Rocky Gale,1.4,0.2,1.4,0.1,0.221,660.2,7
Ian Rice,1.1,0.3,1.2,0.1,0.209,668.0,7
Christian Kelley,1.3,0.3,1.3,0.1,0.207,668.0,7
Oscar Hernandez,1.0,0.3,1.1,0.1,0.207,670.2,7


In [143]:
all_hitters = group_by_pos(get_hitters().set_index('Name'))[0].sort_values(by='Ovr',ascending=True).reset_index();
 
hitters


1


Unnamed: 0,Name,R,HR,RBI,SB,AVG,Ovr,group
0,J.T. Realmuto,73.8,23.9,72.6,5.8,0.270,110.8,19
2,Salvador Perez,60.0,24.7,72.7,2.0,0.252,190.2,17
3,Christian Vazquez,54.2,13.3,53.6,4.8,0.262,190.8,18
4,Yasmani Grandal,72.1,25.3,76.0,3.2,0.239,192.8,17
5,Gary Sanchez,65.4,30.7,81.1,1.4,0.242,215.6,20
...,...,...,...,...,...,...,...,...
110,Rocky Gale,1.4,0.2,1.4,0.1,0.221,660.2,7
111,Ian Rice,1.1,0.3,1.2,0.1,0.209,668.0,7
112,Christian Kelley,1.3,0.3,1.3,0.1,0.207,668.0,7
113,Oscar Hernandez,1.0,0.3,1.1,0.1,0.207,670.2,7


In [None]:
def minimization_fun(x):
    return sum()