In [1]:
import pandas as pd
import plotly.express as px
import statsmodels.api as sm

In [5]:
df = pd.read_excel(r"C:\Users\mikej\Desktop\fantrax\fantrax_season_data.xlsm")
merge = df.copy()
df.columns

Index(['team', 'players_started', 'pars_num', 'bird_num', 'eag_num', 'bog_num',
       'dbog_num', 'alb_num', 'h1_num', 'other_num', 'plc_total', 'week',
       'win_loss', 'opponent', 'fin_1', 'fin_2', 'fin_3', 'fin_4', 'fin_5',
       'fin_6', 'median', 'cuts_made', 'median_delta', 'fin_1_pts',
       'fin_2_pts', 'fin_3_pts', 'fin_4_pts', 'fin_5_pts', 'fin_6_pts',
       'pars_pts', 'bird_pts', 'eag_pts', 'bog_pts', 'dbog_pts', 'alb_pts',
       'h1_pts', 'other_pts', 'plc_pts', 'total_pts', 'total_holes',
       'pp_hole'],
      dtype='object')

In [6]:
team_color={
            "Philly919": 'rgb(14,195,210)',
            "unit_circle": 'rgb(194,139,221)',
            "AlphaWired": 'rgb(247,160,93)',
            "Snead's Foot": 'rgb(70,214,113)',
            "New Team 4": 'rgb(247,94,56)',
            "Team Gamble": 'rgb(38,147,190)',
            "txmoonshine": 'rgb(219,197,48)',
            "Putt Pirates": 'rgb(115,112,106)'
            }

In [7]:
# ###  PER TOURNAMENT AVERAGES  ###

team_stat_medians = df.groupby('team')[['total_pts','cuts_made','total_holes','pp_hole','bird_num','eag_num','bog_num','dbog_num','plc_pts']].mean()#.reset_index()
team_stat_medians.columns = 'Total Pts','Cuts Made','Holes Played','Pts/Hole','Birdies','Eagles','Bogeys','Doubles','Place Pts'
team_stat_medians[['Total Pts','Holes Played','Bogeys','Birdies','Place Pts']] = team_stat_medians[['Total Pts','Holes Played','Bogeys','Birdies','Place Pts']].astype('int')
team_stat_medians = team_stat_medians.sort_values('Total Pts',ascending=False).round({'Cuts Made':1,'Pts/Hole':2,'Eagles':1,'Doubles':1})

team_stat_medians

Unnamed: 0_level_0,Total Pts,Cuts Made,Holes Played,Pts/Hole,Birdies,Eagles,Bogeys,Doubles,Place Pts
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Putt Pirates,434,4.8,381,1.13,91,3.5,43,4.7,38
unit_circle,427,5.1,394,1.09,92,3.2,51,3.6,31
Team Gamble,391,4.1,354,1.1,86,3.2,45,5.8,30
AlphaWired,389,4.5,367,1.05,85,2.2,43,4.1,25
txmoonshine,361,4.0,349,1.03,83,2.2,45,5.2,17
New Team 4,356,3.9,334,1.07,77,3.3,44,4.5,22
Philly919,346,4.1,352,0.98,77,1.8,44,3.9,14
Snead's Foot,336,3.9,347,0.96,75,2.3,44,5.7,10


In [9]:
### SEASON TO DATE SCORE VS WEEKLY MEDIAN  ###

team_medians = pd.DataFrame(df.groupby('team',as_index=False)['median_delta'].sum()).sort_values(by='median_delta',ascending=False).reset_index(drop=True)

median_delta_bar = px.bar(team_medians,
                          text_auto='.2s',
                          color='team',
                          color_discrete_map=team_color,
                          title='Total Pts vs. Weekly Median<br>thru 10 Weeks',
                          template='plotly_white',
                          labels={'index':'', 'value':''},
                          height=350
                         )

median_delta_bar.update_layout(title_x=.5,legend=dict(title=None))

median_delta_bar.update_xaxes(showticklabels=False)
median_delta_bar.update_yaxes(showticklabels=False, showgrid=False)
median_delta_bar.update_layout(hoverlabel=dict(font_size=18,font_family="Rockwell"),title_x=.45)

median_delta_bar.show()

In [12]:
### WEEKLY BUBBLES WIN / LOSS  ###

temp_df = df.copy()
temp_df['win_loss'] = temp_df['win_loss'].astype('bool')

scatter_fig = px.scatter(temp_df,
                        x='week',
                        y='total_pts',
                        color='win_loss',
                        height=800,
                        width=600,
                        size='cuts_made',
                        size_max=14,
                        template='plotly_white',
                        hover_name='team',
                        title='Weekly Scores by Winner/Loser',
                        color_discrete_sequence=px.colors.qualitative.Pastel1,
                        log_y=True,
                        labels={'week':'','total_pts':'Points Scored'}
                        )
scatter_fig.update_layout(hoverlabel=dict(font_size=18,font_family="Rockwell"),
                              title_x=.5,
                              showlegend=True,
                              legend=dict(orientation='h',
                                          yanchor="bottom",
                                          y=1,
                                          xanchor="center",
                                          x=.5,
                                          title='')
                              )

scatter_fig.update_xaxes(tickangle= -45,
                         tickvals = [1,2,3,4,5,6,7,8,9,10],
                         ticktext = ['Sony','Amex','Farmers','AT&T','Waste Mgmt','Genesis','Mexico Open','Cognizant','Arnold Palmer','The PLAYERS'])

scatter_fig.update_traces(marker=dict(
#     size=12,
    line_color='black',
    opacity=.8))

scatter_fig.show()

In [59]:
### CUTS MADE DISTRIBUTION  ###

cuts_made_hist = px.histogram(df,
                    x='cuts_made',
                    text_auto='.2s',
                    title='Cuts Made Distribution',
                    template='plotly_white',
                    labels={'cuts_made':'Cuts Made','count':''},
                    histnorm='percent',
                             )

cuts_made_hist.update_layout(title_x=.5, bargap=0.1)

cuts_made_hist.update_xaxes(tickvals = [1,2,3,4,5,6],
                            ticktext = ['1/6','2/6','3/6','4/6','5/6','6/6'])

cuts_made_hist.update_yaxes(showticklabels=False, showgrid=False,visible= False)

In [65]:
### FINISHING POSITION COMPARISON

finish_medians = round(df[['team','fin_1','fin_2','fin_3','fin_4','fin_5','fin_6']].groupby('team').median(),1).reset_index()
finish_medians.columns = 'Team','Top Finisher','2nd','3rd','4th','5th','Worst Finisher'
melted_finish_medians = finish_medians.melt(id_vars='Team',value_vars=['Top Finisher','2nd','3rd','4th','5th','Worst Finisher'])

print(finish_medians)

fin_place_scatter = px.scatter(melted_finish_medians,
          x='variable',
          y='value',
          color='Team',
          color_discrete_map=team_color,
          template='plotly_white',
          labels={'value':'Median Finish','variable':''},
#           width=550,
          log_y=True,
          ).update_traces(marker_size=12)

fin_place_scatter.update_layout(title_x=.5,
                               legend=dict(
                                   title=None,
                                   orientation='h',
                                   x=0,
                                   y=1.3
                               )
                              )

           Team  Top Finisher   2nd   3rd   4th   5th  Worst Finisher
0    AlphaWired           7.5  11.0  22.0  39.5  59.0            69.0
1    New Team 4           8.0  13.0  44.0  66.0  68.0            70.0
2     Philly919          13.5  27.5  48.0  57.5  67.0            69.0
3  Putt Pirates           4.5   9.0  24.0  31.5  68.5            68.5
4  Snead's Foot          19.5  32.0  41.0  64.0  67.0            68.0
5   Team Gamble           3.0  17.0  24.5  37.5  62.0            68.5
6   txmoonshine          10.5  21.5  44.0  58.0  67.0            66.0
7   unit_circle           7.5  17.5  25.5  39.0  53.0            67.0


In [66]:
### CORRELATION TO WINS BY STAT - SCATTER PLOTS WITH SLIDER / TOGGLE / RADIO BUTTON FOR EACH STAT


stats_to_compare = ['pars_num','bird_num','eag_num','bog_num','dbog_num','plc_pts','cuts_made','median_delta','pp_hole']

for stat in stats_to_compare:
    scatter_df = df.groupby(['team'],as_index=False)[[stat,'win_loss']].sum()
    fig = px.scatter(scatter_df,
              x=stat,
              y='win_loss',
              color='team',
              color_discrete_map=team_color,
              trendline='ols',trendline_scope='overall',trendline_color_override='black',
              ).update_traces(marker=dict(size=15,line_color='black'))
    results = px.get_trendline_results(fig).px_fit_results.iloc[0].rsquared
    print(stat)
    print(f"R-Squared Value: {results:.2f}")
    print(fig.show())

pars_num
R-Squared Value: 0.44


None
bird_num
R-Squared Value: 0.81


None
eag_num
R-Squared Value: 0.18


None
bog_num
R-Squared Value: 0.00


None
dbog_num
R-Squared Value: 0.06


None
plc_pts
R-Squared Value: 0.74


None
cuts_made
R-Squared Value: 0.58


None
median_delta
R-Squared Value: 0.79


None
pp_hole
R-Squared Value: 0.56


None


In [20]:
###  CUTS PER WEEK PAR  ###

cuts_per_week = pd.DataFrame(df.groupby('team')[['cuts_made','total_pts']].sum())
cuts_per_week['cuts_per_week'] = cuts_per_week['cuts_made'] / 10
px.bar(cuts_per_week.sort_values(by='cuts_per_week',ascending=False).round(1),
       y='cuts_per_week',
      log_y=True,
      text_auto=True,
      template='plotly_white',
      color='total_pts',
      height=350,
      color_continuous_scale=px.colors.sequential.Blues)

In [None]:
### PLAYGROUND BELOW HERE ###

In [21]:
temp = df.groupby('team')['pars_pts','bird_pts','eag_pts','total_pts'].sum()
temp['bird_eagle_pts'] = temp.bird_pts + temp.eag_pts
temp['portion'] = round(temp.bird_eagle_pts / temp.total_pts,2)
temp['par_portion'] = round(temp.pars_pts / temp.total_pts,2)
temp


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0_level_0,pars_pts,bird_pts,eag_pts,total_pts,bird_eagle_pts,portion,par_portion
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AlphaWired,1152.0,2574,176,3892.0,2750,0.71,0.3
New Team 4,1025.0,2325,264,3567.5,2589,0.73,0.29
Philly919,1118.5,2331,144,3465.0,2475,0.71,0.32
Putt Pirates,1191.5,2751,280,4346.0,3031,0.7,0.27
Snead's Foot,1090.0,2268,184,3360.0,2452,0.73,0.32
Team Gamble,1070.0,2580,256,3918.0,2836,0.72,0.27
txmoonshine,1060.0,2496,176,3616.0,2672,0.74,0.29
unit_circle,1220.0,2760,256,4270.0,3016,0.71,0.29


In [23]:
df.groupby(['team']).plc_pts.sum()

team
AlphaWired      259
New Team 4      223
Philly919       146
Putt Pirates    384
Snead's Foot    104
Team Gamble     300
txmoonshine     173
unit_circle     311
Name: plc_pts, dtype: int64

In [27]:
# percentage of time players make cut
df['cuts_per_start'] = df.cuts_made / df.players_started
(
    df.groupby('team')
    ['cuts_per_start']
    .mean()
    .round(2)
)

team
AlphaWired      0.76
New Team 4      0.72
Philly919       0.69
Putt Pirates    0.80
Snead's Foot    0.66
Team Gamble     0.68
txmoonshine     0.68
unit_circle     0.85
Name: cuts_per_start, dtype: float64