In [39]:
%run ../code/utils.ipynb

In [40]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact
import logging


In [41]:
plt.set_loglevel("warning")

In [42]:
data_dir = f"{project_dir}\\data\\matchup_player_data\\"

In [43]:
df = pd.read_csv(data_dir+'matchup_player_data_through_2023.csv')

In [44]:
masks = {
        '2015': df['season'] == 2015,
        '2016': df['season'] == 2016,
        '2017': df['season'] == 2017,
        '2018': df['season'] == 2018,
        '2019': df['season'] == 2019,
        '2020': df['season'] == 2020,
        '2021': df['season'] == 2021,
        '2022': df['season'] == 2022,
        '2023': df['season'] == 2023
        }

In [45]:
position_columns = ['QB',
                   'WR1',
                   'WR2',
                   'WR3',
                   'RB1',
                   'RB2',
                   'TE',
                   'W/R/T',
                    'DEF'
                   ]
points_columns = [ x + '_points' for x in position_columns]    

In [46]:

@interact
def position_over_time_plt( pos = position_columns ):
    points_col = pos+'_points'
    filtered_df = df.loc[ :, ['season', points_col]]
    filtered_df['season'] = pd.to_datetime(filtered_df['season'], format='%Y')
    # print(filtered_df.dtypes)
    plt.figure(figsize=(10,10))
    sns.boxplot(data=filtered_df, x = filtered_df['season'], y = filtered_df[points_col], hue= 'season', legend=False);
    

interactive(children=(Dropdown(description='pos', options=('QB', 'WR1', 'WR2', 'WR3', 'RB1', 'RB2', 'TE', 'W/R…

In [47]:
df.groupby('manager')['QB_points'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
manager,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Big Rick,16.0,16.038125,7.222801,-0.03,13.4675,18.815,19.9025,27.77
Buddy,147.0,18.015238,8.461334,0.0,11.87,17.28,22.985,43.76
Eric,147.0,15.71932,9.41757,-7.83,10.61,15.18,21.69,41.82
Frags,147.0,16.645918,7.761891,-0.23,11.75,16.37,21.44,38.1
Jake,147.0,17.012109,8.252643,-0.48,10.95,17.1,22.705,40.93
Jones,147.0,18.520272,8.469762,0.0,12.69,18.02,24.125,41.88
Kyle,147.0,16.890272,7.771029,-3.0,11.1,16.54,21.59,40.06
Lauren,147.0,16.669932,8.271044,0.0,11.07,16.1,22.425,39.88
Michael,147.0,16.948027,7.810023,-1.3,12.095,16.67,22.105,37.92
Nate,131.0,16.654733,8.297715,-2.17,11.15,16.37,22.06,43.46


In [48]:
@interact

def position_points_explorer(manager = df['manager'].unique(), pos = position_columns, season = df['season'].unique()):
    """
    Paints an interactive lollipop chart for given weekly player data allowing for selections by manager, position, and season
    
    """
    # filter the dataframe by selected manager and season, with values as selected position and points
    # for example, this df might look like: manager Michael, season 2018 columns QB & QB_points 
    filtered_df = df[(df['manager'] == manager) & (df['season'] == season)][[pos, pos+'_points']]
    filtered_df.reset_index(inplace=True)

    # average for the season for that position, used as reference horizontal line in chart
    league_average = df[df['season'] == season][pos+'_points'].mean()
    
    # RGB color array, sufficiently different from eachother to make reading easier
    label_colors = [(0.0, 0.286, 0.572),
                    (0.0, 0.619, 0.450),
                    (0.6, 0.8, 0.196),
                    (1.0, 0.6, 0.6),
                    (1.0, 0.8, 0.0),
                    (1.0, 0.6, 0.0),
                    (0.901, 0.098, 0.294),
                    (0.6, 0.0, 0.6),
                    (1.0, 0.4, 1.0),
                    (1.0, 0.8, 0.6),
                    (0.6, 0.4, 0.2),
                    (0.588, 0.588, 0.588),
                    (0.4, 0.6, 0.8),
                    (0.6, 0.8, 1.0),
                    (0.8, 0.6, 1.0)]
    
    # establish weeks to serve as x axis 
    weeks = range(1, len(filtered_df.index) + 1)

    # result would indicate the df is empty - meaning the manager didn't appear in the selected season, 
    if not len(filtered_df.index):
        return "It appears the selected manager did not play in this season!"
        
    # establish figure and draw horizontal line representing season average
    plt.figure(figsize=(10,8))
    plt.hlines(league_average, 0, weeks[-1],linestyles='dotted', color='r', alpha = 0.5, label = f'{season} League Average')

    # loop responsible for charting individual scatter points and lines
    
    color_index = 0
    for player in filtered_df[pos].unique():
        # find the weeks for each player that filled the position over the course of the season played 
        weeks_played = filtered_df[filtered_df[pos] == player].index

        # chart only points and lines relevant to the player in this loop 
        plt.scatter(x = weeks_played, y = filtered_df[filtered_df[pos] == player][pos+'_points'], label = player, \
                    color=label_colors[color_index], s=75)
        # chart corresponding vertical lines (the lollipop "stick") using league average as reference point
        plt.vlines(x = weeks_played,
                   ymin = league_average,
                   ymax= filtered_df[filtered_df[pos] == player][pos+'_points'],
                  colors = label_colors[color_index])
        color_index+=1

    # additional figure elements
    plt.grid(visible=True, axis='y', alpha=0.3)
    plt.title(f"{pos} position for {manager}, {season} Season")
    plt.xlabel('Week', size=12)
    plt.ylabel('Points Scored')
    plt.ylim(-10, 50)
    plt.legend()
    plt.show()

interactive(children=(Dropdown(description='manager', options=('Ricky', 'Peter', 'Tim', 'Big Rick', 'Michael',…

### Ad-Hoc Analysis of Regular and Playoff seasons for 2023

In [53]:
dfr = df[(df['season'] == 2023) & (df['week'] < 15)]
dfp = df[(df['season'] == 2023)]

In [57]:
dfr.loc[df['QB_points'] == dfr['QB_points'].max()][['season', 'week', 'QB', 'QB_points', 'manager']]

Unnamed: 0,season,week,QB,QB_points,manager
1935,2023,9,C.J. Stroud,39.8,Eric


In [58]:
dfp.loc[df['QB_points'] == dfp['QB_points'].max()][['season', 'week', 'QB', 'QB_points', 'manager']]

Unnamed: 0,season,week,QB,QB_points,manager
1935,2023,9,C.J. Stroud,39.8,Eric


In [78]:
high_wr_r = dfr[['WR1_points', 'WR2_points', 'W/R/T_points']].max().max()
low_wr_r = dfr[['WR1_points', 'WR2_points', 'W/R/T_points']].min().min()

print(f"{high_wr_r} | {low_wr_r}")

45.0 | -1.1


In [84]:
dfr.loc[(dfr['WR1_points'] == high_wr_r) | (dfr['WR2_points'] == high_wr_r) | (dfr['W/R/T_points'] == high_wr_r)][['season', 'week', 'WR1', 'WR1_points', 'WR2', 'WR2_points', 'W/R/T', 'W/R/T_points', 'manager']]

Unnamed: 0,season,week,WR1,WR1_points,WR2,WR2_points,W/R/T,W/R/T_points,manager
1878,2023,5,DJ Moore,45.0,Tyler Boyd,6.9,Rashee Rice,11.3,Tim


In [85]:
dfr.loc[(dfp['WR1_points'] == low_wr_r) | (dfp['WR2_points'] == low_wr_r) | (dfp['W/R/T_points'] == low_wr_r)][['season', 'week', 'WR1', 'WR1_points', 'WR2', 'WR2_points','W/R/T', 'W/R/T_points', 'manager']]

Unnamed: 0,season,week,WR1,WR1_points,WR2,WR2_points,W/R/T,W/R/T_points,manager
1888,2023,5,Ja'Marr Chase,44.7,Marvin Mims Jr.,-1.1,Khalil Herbert,7.6,Nate


In [None]:
high_wr_p = dfp[['WR1_points', 'WR2_points', 'W/R/T_points']].max().max()
low_wr_p = dfp[['WR1_points', 'WR2_points', 'W/R/T_points']].min().min()

print(f"{high_wr_p} | {low_wr_p}")

In [86]:
high_rb_r = dfr[['RB1_points', 'RB2_points', 'W/R/T_points']].max().max()
low_rb_r = dfr[['RB1_points', 'RB2_points', 'W/R/T_points']].min().min()

print(f"{high_rb_r} | {low_rb_r}")

45.2 | -0.3


In [87]:
high_rb_p = dfp[['RB1_points', 'RB2_points', 'W/R/T_points']].max().max()
low_rb_p = dfp[['RB1_points', 'RB2_points', 'W/R/T_points']].min().min()

print(f"{high_rb_p} | {low_rb_p}")

45.2 | -0.6


In [89]:
dfr.loc[(dfr['RB1_points'] == high_rb_r) | (dfr['RB2_points'] == high_rb_r) | (dfr['W/R/T_points'] == high_rb_r)][['season', 'week', 'RB1', 'RB1_points', 'RB2', 'RB2_points', 'W/R/T', 'W/R/T_points', 'manager']]

Unnamed: 0,season,week,RB1,RB1_points,RB2,RB2_points,W/R/T,W/R/T_points,manager
1867,2023,4,Christian McCaffrey,45.2,Joe Mixon,8.1,Adam Thielen,11.7,Nick


In [90]:
dfr.loc[(dfr['RB1_points'] == low_rb_r) | (dfr['RB2_points'] == low_rb_r) | (dfr['W/R/T_points'] == low_rb_r)][['season', 'week', 'RB1', 'RB1_points', 'RB2', 'RB2_points', 'W/R/T', 'W/R/T_points', 'manager']]

Unnamed: 0,season,week,RB1,RB1_points,RB2,RB2_points,W/R/T,W/R/T_points,manager
1841,2023,2,Dalvin Cook,-0.3,AJ Dillon,6.8,Zay Flowers,8.8,Buddy


In [91]:
dfp.loc[(dfp['RB1_points'] == low_rb_p) | (dfr['RB2_points'] == low_rb_p) | (dfr['W/R/T_points'] == low_rb_p)][['season', 'week', 'RB1', 'RB1_points', 'RB2', 'RB2_points', 'W/R/T', 'W/R/T_points', 'manager']]

Unnamed: 0,season,week,RB1,RB1_points,RB2,RB2_points,W/R/T,W/R/T_points,manager
2017,2023,15,D'Onta Foreman,-0.6,Jerick McKinnon,14.66,Tee Higgins,20.1,Pat


In [101]:
high_te_r = dfr[['TE_points']].max().max()
low_te_r = dfr[['TE_points']].min().min()
print(f"{high_te_r} | {low_te_r}")

29.9 | 0.0


In [99]:
high_te_p = dfp[['TE_points']].max().max()
low_te_p = dfp[['TE_points']].min().min()
print(f"{high_te_p} | {low_te_p}")

29.9 | 0.0


In [95]:
dfr.loc[(dfr['TE_points'] == low_te_r) | (dfr['W/R/T_points'] == low_te_r)][['season', 'week', 'TE', 'TE_points', 'W/R/T', 'W/R/T_points', 'manager']]

Unnamed: 0,season,week,TE,TE_points,W/R/T,W/R/T_points,manager
1825,2023,1,Mark Andrews,0.0,David Montgomery,13.4,Nick
1827,2023,1,Travis Kelce,0.0,George Pickens,6.1,Buddy
1829,2023,1,Chigoziem Okonkwo,0.0,Alexander Mattison,11.9,Jones
1831,2023,1,Dallas Goedert,0.0,Antonio Gibson,0.4,Jake
1853,2023,3,Mark Andrews,5.5,David Montgomery,0.0,Nick
1885,2023,5,Hunter Henry,0.0,Raheem Mostert,13.8,Jones
1909,2023,7,Mark Andrews,20.3,Jameson Williams,0.0,Nick
1914,2023,7,Cole Kmet,0.0,Jakobi Meyers,14.5,Michael
1915,2023,7,none,0.0,Marquise Brown,6.4,Jake
1934,2023,9,Tyler Higbee,0.0,Mike Evans,10.7,Tim


In [96]:
dfp.loc[(dfp['TE_points'] == low_te_r) | (dfp['W/R/T_points'] == low_te_r)][['season', 'week', 'TE', 'TE_points', 'W/R/T', 'W/R/T_points', 'manager']]

Unnamed: 0,season,week,TE,TE_points,W/R/T,W/R/T_points,manager
1825,2023,1,Mark Andrews,0.0,David Montgomery,13.4,Nick
1827,2023,1,Travis Kelce,0.0,George Pickens,6.1,Buddy
1829,2023,1,Chigoziem Okonkwo,0.0,Alexander Mattison,11.9,Jones
1831,2023,1,Dallas Goedert,0.0,Antonio Gibson,0.4,Jake
1853,2023,3,Mark Andrews,5.5,David Montgomery,0.0,Nick
1885,2023,5,Hunter Henry,0.0,Raheem Mostert,13.8,Jones
1909,2023,7,Mark Andrews,20.3,Jameson Williams,0.0,Nick
1914,2023,7,Cole Kmet,0.0,Jakobi Meyers,14.5,Michael
1915,2023,7,none,0.0,Marquise Brown,6.4,Jake
1934,2023,9,Tyler Higbee,0.0,Mike Evans,10.7,Tim


In [102]:
dfr.loc[(dfr['TE_points'] == high_te_r) | (dfr['W/R/T_points'] == high_te_r)][['season', 'week', 'TE', 'TE_points', 'W/R/T', 'W/R/T_points', 'manager']]

Unnamed: 0,season,week,TE,TE_points,W/R/T,W/R/T_points,manager
1911,2023,7,Travis Kelce,29.9,Zay Flowers,9.5,Buddy


In [103]:
high_def_r = dfr[['DEF_points']].max().max()
low_def_r = dfr[['DEF_points']].min().min()
print(f"{high_def_r} | {low_def_r}")

42.0 | 0.0


In [105]:
dfr[['DEF_points']].min()

DEF_points    0.0
dtype: float64

In [106]:
high_def_p = dfp[['DEF_points']].max().max()
low_def_p = dfp[['DEF_points']].min().min()
print(f"{high_def_p} | {low_def_p}")

42.0 | 0.0


In [108]:
dfr.loc[(dfr['DEF_points'] == high_def_r)][['season', 'week', 'DEF', 'DEF_points',  'manager']]

Unnamed: 0,season,week,DEF,DEF_points,manager
1821,2023,1,Dallas,42.0,Pat


In [109]:
dfr.loc[(dfr['DEF_points'] == low_def_r)][['season', 'week', 'DEF', 'DEF_points',  'manager']]

Unnamed: 0,season,week,DEF,DEF_points,manager
1936,2023,9,Detroit,0.0,Frags
