In [1]:
import pandas as pd
import plotly.express as px
import statsmodels.api as sm
import numpy as np

In [19]:
df = pd.read_excel(r"C:\Users\mikej\Desktop\fantrax\fantrax_season_data.xlsm")
df.columns

Index(['team', 'players_started', 'pars_num', 'bird_num', 'eag_num', 'bog_num',
       'dbog_num', 'alb_num', 'h1_num', 'other_num', 'plc_total', 'week',
       'win_loss', 'opponent', 'fin_1', 'fin_2', 'fin_3', 'fin_4', 'fin_5',
       'fin_6', 'median', 'cuts_made', 'median_delta', 'fin_1_pts',
       'fin_2_pts', 'fin_3_pts', 'fin_4_pts', 'fin_5_pts', 'fin_6_pts',
       'pars_pts', 'bird_pts', 'eag_pts', 'bog_pts', 'dbog_pts', 'alb_pts',
       'h1_pts', 'other_pts', 'plc_pts', 'total_pts', 'total_holes',
       'pp_hole'],
      dtype='object')

In [20]:
team_color={
            "Philly919": 'rgb(14,195,210)',
            "unit_circle": 'rgb(194,139,221)',
            "AlphaWired": 'rgb(247,160,93)',
            "Sneads Foot": 'rgb(70,214,113)',
            "New Team 4": 'rgb(247,94,56)',
            "Team Gamble": 'rgb(38,147,190)',
            "txmoonshine": 'rgb(219,197,48)',
            "Putt Pirates": 'rgb(115,112,106)'
            }


In [21]:
df2 = df.groupby(['week','team','win_loss'],as_index=False)['total_pts'].sum()
df2['week_rank'] = df2.groupby('week')['total_pts'].rank(ascending=False).astype(int)
df2 = df2.sort_values(by=['week', 'week_rank'])

df3 = df[['team','week','win_loss']]

df4 = pd.merge(df2,df3, how='left', on=['team','week']).drop(columns='win_loss_y').rename(columns={'win_loss_x':'win_loss'})
df4

Unnamed: 0,week,team,win_loss,total_pts,week_rank
0,1,Team Gamble,1,523.0,1
1,1,Philly919,1,454.5,2
2,1,unit_circle,0,436.5,3
3,1,Putt Pirates,1,412.0,4
4,1,txmoonshine,1,406.5,5
...,...,...,...,...,...
123,16,Philly919,0,365.5,4
124,16,Snead's Foot,0,362.5,5
125,16,Putt Pirates,1,320.0,6
126,16,New Team 4,0,310.5,7


In [32]:
df4['win_loss'] = df4['win_loss'].astype('category')
df4['week_rank'] = -df4['week_rank'].astype(int)

px.scatter(df4[df4.team=='unit_circle'],
       x='week',
       y='week_rank',
       color='win_loss',
       width=700,
       height=350,
       title='Weekly Win/Loss for unit_circle'
       ).update_yaxes(autorange="reversed", range=[1,8], tickmode='array', tickvals=[1,2,3,4,5,6,7,8], ticktext=[1,2,3,4,5,6,7,8]
       ).update_layout(template='plotly_white',title_x=.5)

In [46]:
# unlucky losses
df4[(df4.win_loss == 0) & (df4.week_rank < 5)].team.value_counts()

unit_circle     3
New Team 4      2
Snead's Foot    2
txmoonshine     2
Team Gamble     1
AlphaWired      1
Philly919       1
Name: team, dtype: int64

In [45]:
# lucky wins
df4[(df4.win_loss == 1) & (df4.week_rank > 5)].team.value_counts()

txmoonshine     2
AlphaWired      2
Putt Pirates    2
Philly919       2
unit_circle     1
Name: team, dtype: int64

In [None]:
### starts here

In [47]:
# ###  PER TOURNAMENT AVERAGES  ###

team_stat_avgs = df.groupby('team')[['total_pts','cuts_made','total_holes','pp_hole','bird_num','eag_num','bog_num','dbog_num','plc_pts']].median()#.reset_index()
team_stat_avgs.columns = 'Total Pts','Cuts Made','Holes Played','Pts/Hole','Birdies','Eagles','Bogeys','Doubles','Place Pts'
team_stat_avgs[['Total Pts','Holes Played','Bogeys','Birdies','Place Pts']] = team_stat_avgs[['Total Pts','Holes Played','Bogeys','Birdies','Place Pts']].astype('int')
team_stat_avgs = team_stat_avgs.sort_values('Total Pts',ascending=False).round({'Cuts Made':1,'Pts/Hole':2,'Eagles':1,'Doubles':1})

team_stat_avgs

Unnamed: 0_level_0,Total Pts,Cuts Made,Holes Played,Pts/Hole,Birdies,Eagles,Bogeys,Doubles,Place Pts
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
unit_circle,433,6.0,414,1.04,93,3.0,48,3.0,26
Putt Pirates,408,5.0,396,1.08,81,2.5,45,5.0,34
Team Gamble,366,4.0,360,1.04,78,2.5,46,7.0,25
New Team 4,354,4.0,342,1.07,74,2.5,41,4.0,22
Snead's Foot,344,4.0,360,0.96,75,2.0,45,6.0,13
txmoonshine,344,4.0,360,1.0,77,1.0,48,4.5,17
Philly919,330,4.0,360,0.96,71,1.0,45,4.5,14
AlphaWired,327,4.0,342,0.95,70,1.0,42,4.5,21


In [48]:
### SEASON TO DATE SCORE VS WEEKLY MEDIAN  ###

team_median_deltas = pd.DataFrame(df.groupby('team',as_index=False)['median_delta'].sum()).sort_values(by='median_delta',ascending=False).reset_index(drop=True)

median_delta_bar = px.bar(team_median_deltas,
                          text_auto='.3s',
                          color='team',
                          color_discrete_map=team_color,
                          title='Total Pts vs. Weekly Median<br>thru 14 Weeks',
                          template='plotly_white',
                          labels={'index':'', 'value':''},
                          height=350
                         )

median_delta_bar.update_layout(title_x=.5,legend=dict(title=None))

median_delta_bar.update_xaxes(showticklabels=False)
median_delta_bar.update_yaxes(showticklabels=False, showgrid=False)
median_delta_bar.update_layout(hoverlabel=dict(font_size=18,font_family="Rockwell"),title_x=.45)

median_delta_bar.show()

In [49]:
### WEEKLY SCORE VS WEEKLY MEDIAN FOR EACH TEAM ###

team_weekly_deltas = pd.DataFrame(df[['team','week','median','median_delta']].groupby(['team','week'],as_index=False)[['median_delta','median']].sum())
px.bar(team_weekly_deltas.sort_values('median_delta',ascending=False),
       x='week',
       y='median_delta',
       color='team',
       color_discrete_map=team_color,        
       facet_col='team',
       facet_col_wrap=2,
       facet_col_spacing=.1,
       facet_row_spacing=.16,
       height=800,
       width=800,
       labels={'median_delta':'','week':''},
       template='plotly_white',
       text_auto='.3s'
       ).update_yaxes(showticklabels=False,showgrid=False,gridcolor="#B1A999",tickfont=dict(color='#5A5856', size=10)#,matches=None
       ).update_xaxes(tickfont=dict(color='#5A5856', size=10),title_font=dict(color='#5A5856',size=10),showticklabels=True,tickangle= -90,tickmode='array',tickvals = [1,2,3,4,5,6,7,8,9,10,11,12],ticktext = ['Sony','Amex','Farmers','AT&T','Waste Mgmt','Genesis','Mexico Open','Cognizant','Arnold Palmer','PLAYERS','Valspar','Houston Open'],ticklabelposition='outside'
       ).update_layout(hoverlabel=dict(font_size=14,font_family="Rockwell"),showlegend=False
       ).for_each_annotation(lambda a: a.update(text=a.text.replace("team=", "")))

In [50]:
# wins with negative median delta

temp = df[['team','week','opponent','win_loss','median','total_pts','median_delta']]
temp = temp[(temp.win_loss==True) & (temp.median_delta < 0)]
temp

Unnamed: 0,team,week,opponent,win_loss,median,total_pts,median_delta
6,txmoonshine,1,Snead's Foot,1,409.3,406.5,-2.8
22,txmoonshine,3,Team Gamble,1,343.8,334.0,-9.8
40,AlphaWired,6,Philly919,1,348.5,342.0,-6.5
48,AlphaWired,7,New Team 4,1,366.0,306.5,-59.5
57,New Team 4,8,Philly919,1,356.0,354.5,-1.5
59,Putt Pirates,8,AlphaWired,1,356.0,344.0,-12.0
66,Philly919,9,txmoonshine,1,378.5,335.5,-43.0
78,txmoonshine,10,Team Gamble,1,350.5,297.0,-53.5
90,Philly919,12,unit_circle,1,334.0,250.5,-83.5
108,Putt Pirates,14,Team Gamble,1,320.0,312.5,-7.5


In [52]:
### WEEKLY BUBBLES WIN / LOSS  ###

newnames={'False':'Loss','True':'Win'}
temp_df = df.copy()
temp_df['win_loss'] = temp_df['win_loss'].astype('bool')
scatter_fig = px.scatter(temp_df,
                        x='week',
                        y='total_pts',
                        color='win_loss',
                        template='plotly_dark',
                        size='cuts_made',
                        size_max=14,
                        hover_name='team',
                        color_discrete_sequence=px.colors.qualitative.Pastel1,
                        title='Weekly Wins & Losses by Pts Scored',
                        labels={'week':'','total_pts':'Points Scored'},
                        height=800,
                        # width=600,                        
                        ).update_layout(hoverlabel=dict(font_size=18,font_family="Rockwell"),showlegend=True,title_x=.35,
                                        legend=dict(orientation='h',yanchor="bottom",y=1,xanchor="center",x=.5,title='',font_color='#5A5856')
                        ).update_xaxes(tickangle= -45,tickvals = [1,2,3,4,5,6,7,8,9,10,11],
                                       ticktext = ['Sony','Amex','Farmers','AT&T','Waste Mgmt','Genesis','Mexico Open','Cognizant','Arnold Palmer','PLAYERS','Valspar'],
                                       tickfont=dict(color='#5A5856', size=13),title_font=dict(color='#5A5856',size=14)
                        ).update_yaxes(tickfont=dict(color='#5A5856', size=13),title_font=dict(color='#5A5856',size=14),tickcolor='darkgrey', gridcolor='darkgrey'
                        ).update_traces(marker=dict(line_color='black')
                        ).for_each_trace(lambda t: t.update(name = newnames[t.name],legendgroup = newnames[t.name],hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])))


scatter_fig.show()#,use_container_width=True, config=config)

In [66]:
### CUTS MADE DISTRIBUTION  ###

newnames={'0':'Loss','1':'Win'}

['#B6E880', '#FF97FF']

cuts_made_hist = px.histogram(df[(df.week!=4) & (df.week!=15)].sort_values('cuts_made',ascending=False),
                    x='cuts_made',
                    text_auto='.2s',
                    title='Win Percentage by Cuts Made',
                    template='plotly_dark',
                    labels={'cuts_made':'Players Thru Cut','count':''},
                    histfunc='count',
                    barnorm='percent',
                    barmode='stack',
                    color='win_loss',
#                     color_discrete_sequence=['red', 'green'],
                    color_discrete_sequence=px.colors.qualitative.Safe,
                    height=350
                             ).update_layout(title_x=.23,legend=dict(title="",x=.45,y=1.25,orientation='h'))

cuts_made_hist.update_layout(title_x=.5, bargap=0.2)
cuts_made_hist.for_each_trace(lambda t: t.update(name = newnames[t.name],legendgroup = newnames[t.name],hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])))

cuts_made_hist.update_xaxes(tickvals = [1,2,3,4,5,6],
                            ticktext = ['1/6','2/6','3/6','4/6','5/6','6/6'])

cuts_made_hist.update_yaxes(showticklabels=False, showgrid=False,visible= False)

In [70]:
### CUTS MADE DISTRIBUTION  ###
# df['rounded_percentage'] = (df['cuts_made'] * 100).round().astype(int).astype(str) + '%'

cuts_made_hist = px.histogram(df[(df.week!=4) & (df.week!=15)].sort_values('cuts_made', ascending=False),
                              x='cuts_made',
                              template='plotly_dark',
                              labels={'cuts_made':'', 'count':''},
                              title="%'s Cuts Made",
                              histnorm='percent',
                              color_discrete_sequence=['grey'],
                              height=350,
                              text_auto='.0f'
                             )


cuts_made_hist.update_layout(title_x=.5, bargap=0.2, legend=dict(title="", x=.45, y=1.2, orientation='h'))
cuts_made_hist.update_xaxes(tickvals=[1, 2, 3, 4, 5, 6], ticktext=['1/6', '2/6', '3/6', '4/6', '5/6', '6/6'])
cuts_made_hist.update_yaxes(showticklabels=False, showgrid=False, tickfont=dict(color='#5A5856'),
                             title_font_color='#5A5856', visible=False)

cuts_made_hist.show()


In [71]:
### FINISHING POSITION COMPARISON

finish_medians = round(df[['team','fin_1','fin_2','fin_3','fin_4','fin_5','fin_6']].groupby('team').median(),1).reset_index()
finish_medians.columns = 'Team','Top Finisher','2nd','3rd','4th','5th','Worst Finisher'
melted_finish_medians = finish_medians.melt(id_vars='Team',value_vars=['Top Finisher','2nd','3rd','4th','5th','Worst Finisher'])

# print(finish_medians)

fin_place_scatter = px.scatter(melted_finish_medians,
          x='variable',
          y='value',
          color='Team',
          color_discrete_map=team_color,
          template='plotly_white',
          labels={'value':'Median Finish','variable':''},
#           width=550,
          log_y=True,
          ).update_traces(marker_size=12)

# fin_place_scatter.update_layout(title_x=.5,
#                                legend=dict(
#                                    title=None,
#                                    orientation='h',
#                                    x=0,
#                                    y=1.3
#                                )
#                               )
fin_place_scatter.show()

In [72]:
### CORRELATION TO WINS BY STAT - SCATTER PLOTS WITH SLIDER / TOGGLE / RADIO BUTTON FOR EACH STAT

df['bb_ratio'] = df.bird_num / df.bog_num
stats_to_compare = ['bird_num','bb_ratio','median_delta','total_pts','plc_pts','cuts_made','pp_hole','pars_num','eag_num','dbog_num','bog_num']

for stat in stats_to_compare:
    scatter_df = df.groupby(['team'],as_index=False)[[stat,'win_loss']].sum()
    fig = px.scatter(scatter_df,
              x=stat,
              y='win_loss',
              color='team',
              color_discrete_map=team_color,
              template='plotly_white',
              height=400,
              width=600,
              labels={'win_loss':'Wins'},
              title=f"Corr between {stat} vs Wins",
              trendline='ols',trendline_scope='overall',trendline_color_override='black',
              ).update_traces(marker=dict(size=15,line_color='black')
              ).update_layout(showlegend=False)
    results = px.get_trendline_results(fig).px_fit_results.iloc[0].rsquared
    print(f"{stat}")
    print(f"R-Squared Value: {results:.2f}")
    print(fig.show())

bird_num
R-Squared Value: 0.77


None
bb_ratio
R-Squared Value: 0.71


None
median_delta
R-Squared Value: 0.66


None
total_pts
R-Squared Value: 0.67


None
plc_pts
R-Squared Value: 0.57


None
cuts_made
R-Squared Value: 0.42


None
pp_hole
R-Squared Value: 0.50


None
pars_num
R-Squared Value: 0.11


None
eag_num
R-Squared Value: 0.11


None
dbog_num
R-Squared Value: 0.15


None
bog_num
R-Squared Value: 0.20


None


In [75]:
px.get_trendline_results(fig).px_fit_results.iloc[0].rsquared

0.1412152537645025

In [73]:
scatter_df = df.groupby(['team'],as_index=False)[[stat,'win_loss']].mean()
fig = px.scatter(scatter_df,
          x=stat,
          y='win_loss',
          color='team',
          color_discrete_map=team_color,
          trendline='ols',trendline_scope='overall',trendline_color_override='black',
      ).update_traces(marker_size=12
      ).update_layout(legend=dict(title=None,orientation='h',x=0,y=1.3))

results = round(px.get_trendline_results(fig).px_fit_results.iloc[0].rsquared,2)
print(results)
fig.show()

0.2


In [76]:
###  CUTS PER WEEK PAR  ###

cuts_per_week = pd.DataFrame(df[(df.week!=4) & (df.week!=15)].groupby('team')[['cuts_made','total_pts']].mean())
px.bar(cuts_per_week.sort_values(by='cuts_made',ascending=False).round(1),
       y='cuts_made',
      log_y=True,
      text_auto=True,
      template='plotly_white',
      color='total_pts',
      height=350,
      color_continuous_scale=px.colors.sequential.Blues)

In [None]:
### PLAYGROUND BELOW HERE ###

In [77]:
temp = df.groupby('team')['pars_pts','bird_pts','eag_pts','total_pts'].sum()
temp['bird_eagle_pts'] = temp.bird_pts + temp.eag_pts
temp['portion'] = round(temp.bird_eagle_pts / temp.total_pts,2)
temp['par_portion'] = round(temp.pars_pts / temp.total_pts,2)
temp


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0_level_0,pars_pts,bird_pts,eag_pts,total_pts,bird_eagle_pts,portion,par_portion
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AlphaWired,1795.5,3753,216,5693.5,3969,0.7,0.32
New Team 4,1677.5,3624,352,5640.5,3976,0.7,0.3
Philly919,1813.0,3645,184,5449.5,3829,0.7,0.33
Putt Pirates,1901.5,4140,368,6548.5,4508,0.69,0.29
Snead's Foot,1836.0,3621,288,5512.5,3909,0.71,0.33
Team Gamble,1739.5,3912,336,5974.5,4248,0.71,0.29
txmoonshine,1772.5,3993,192,5754.0,4185,0.73,0.31
unit_circle,1891.0,4059,344,6374.5,4403,0.69,0.3


In [78]:
df.groupby(['team']).plc_pts.sum()

team
AlphaWired      363
New Team 4      429
Philly919       262
Putt Pirates    602
Snead's Foot    244
Team Gamble     458
txmoonshine     292
unit_circle     518
Name: plc_pts, dtype: int64

In [81]:
# percentage of time players make cut

with_cut_events = df[(df.week!=4) & (df.week!=15)]


with_cut_events['cuts_per_start'] = with_cut_events.cuts_made / with_cut_events.players_started
(
    with_cut_events.groupby('team')
    ['cuts_per_start']
    .mean()
    .round(2)
)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



team
AlphaWired      0.63
New Team 4      0.70
Philly919       0.68
Putt Pirates    0.75
Snead's Foot    0.65
Team Gamble     0.63
txmoonshine     0.67
unit_circle     0.77
Name: cuts_per_start, dtype: float64