In [1]:
# Import python packages
import os
import numpy as np
import pandas as pd
import rpy2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri

In [2]:
# Can uncomment to install R packages as needed
#utils = importr('utils')
#utils.install_packages('nflreadr')
#utils.install_packages('ffscrapr')

In [3]:
# Import R packages
ffscrapr = importr('ffscrapr')

In [4]:
# Config for the calculation
mfl_id = 60206  #MFL id for league
current_season = 2024
past_seasons = [2023,2022,2021,2020]  #Seasons to collect data for
current_week = 0
max_week = 17  #Maximum week number to include in calculation
min_games = 8
save_label = 'Analytics_Dynasty_League_2024'  #Where to save results

load_data = True

In [5]:
# Map from teams to conferences
conf_dic = {
    'Arizona Cardinals':'NFC',
    'Atlanta Falcons':'NFC',
    'Baltimore Ravens':'AFC',
    'Buffalo Bills':'AFC',
    'Carolina Panthers':'NFC',
    'Chicago Bears':'NFC',
    'Cincinnati Bengals':'AFC',
    'Cleveland Browns':'AFC',
    'Dallas Cowboys':'NFC',
    'Denver Broncos':'AFC',
    'Detroit Lions':'NFC',
    'Green Bay Packers':'NFC',
    'Houston Texans':'AFC',
    'Indianapolis Colts':'AFC',
    'Jacksonville Jaguars':'AFC',
    'Kansas City Chiefs':'AFC',
    'Las Vegas Raiders':'AFC',
    'Los Angeles Chargers':'AFC',
    'Los Angeles Rams':'NFC',
    'Miami Dolphins':'AFC',
    'Minnesota Vikings':'NFC',
    'New England Patriots':'AFC',
    'New Orleans Saints':'NFC',
    'New York Giants':'NFC',
    'New York Jets':'AFC',
    'Philadelphia Eagles':'NFC',
    'Pittsburgh Steelers':'AFC',
    'San Francisco 49ers':'NFC',
    'Seattle Seahawks':'NFC',
    'Tampa Bay Buccaneers':'NFC',
    'Tennessee Titans':'AFC',
    'Washington Commanders':'NFC',
    'Washington Football Team':'NFC',
}

In [6]:
# Map from positions to rank floors
rank_floor_dic = {
    'CB':36, 
    'DE':40, 
    'DT':36, 
    'LB':40, 
    'PK':16, 
    'PN':16, 
    'QB':16, 
    'RB':28, 
    'S':40, 
    'TE':16, 
    'WR':52,
}

In [None]:
# Check whether or not to load roster and playerscore data from disk
if load_data and os.path.isfile(f'{save_label}_Rosters.csv') and os.path.isfile(f'{save_label}_PlayerScores.csv'):
    # Will load roster and playerscore data from disk
    print('Loading roster and playerscore data from disk')
    
    rosters_df = pd.read_csv(f'{save_label}_Rosters.csv')
    playerscores_df = pd.read_csv(f'{save_label}_PlayerScores.csv')

else:
    # Will scrape roster  and playerscore data from MFL
    print('Scraping roster and playerscore data from MFL')
    
    # Dict to store data for each season
    rosters_df = {}
    playerscores_df = {}

    # !!! NEED TO SCRAPE DATA FOR CURRENT SEASON TOO, ADD WHEN APPROPRIATE !!!
    for s in past_seasons:
        # Scrape player positions from MFL and convert to pandas df
        mfl = ffscrapr.mfl_connect(season=s, league_id=mfl_id, rate_limit_number=1, rate_limit_seconds=6)
        
        rosters_df_r = ffscrapr.ff_rosters(mfl)
        playerscores_df_r = ffscrapr.ff_playerscores(mfl, season=s, week=[i+1 for i in range(max_week)])
        
        with (ro.default_converter + pandas2ri.converter).context():
            rosters_df[s] = ro.conversion.get_conversion().rpy2py(rosters_df_r)
            playerscores_df[s] = ro.conversion.get_conversion().rpy2py(playerscores_df_r)
    
        # Cleanup some of the data
        rosters_df[s]['season'] = s
        rosters_df[s]['player_id'] = rosters_df[s]['player_id'].astype(int)
    
        playerscores_df[s]['season'] = playerscores_df[s]['season'].astype(int)
        playerscores_df[s]['week'] = playerscores_df[s]['week'].astype(int)
        playerscores_df[s]['player_id'] = playerscores_df[s]['player_id'].astype(int)
        playerscores_df[s]['points'] = playerscores_df[s]['points'].astype(float)
        playerscores_df[s] = playerscores_df[s].drop('is_available', axis=1)
    
    # Merge seasons to single df
    rosters_df = pd.concat(rosters_df, ignore_index=True)
    playerscores_df = pd.concat(playerscores_df, ignore_index=True)
    
    # Sort by for cleaner display
    rosters_df = rosters_df.set_index(['player_id','season']).sort_index(level=[0,1],ascending=[True,True]).reset_index()
    playerscores_df = playerscores_df.set_index(['player_id','season','week']).sort_index(level=[0,1,2],ascending=[True,True,True]).reset_index()
    
    # Save a copy of the roster and playerscore dfs
    rosters_df.to_csv(f'{save_label}_Rosters.csv', index=False)
    playerscores_df.to_csv(f'{save_label}_PlayerScores.csv', index=False)

In [None]:
rosters_df

In [None]:
playerscores_df

In [None]:
# Check whether or not to load contract data from disk
if load_data and os.path.isfile(f'{save_label}_Contracts.csv'):
    # Will load contract data from disk
    print('Loading contract data from disk')
    
    contracts_df = pd.read_csv(f'{save_label}_Contracts.csv')

else:
    # Will calculate contract data from roster and playerscore data
    print('Calculating contract data from roster and playerscore data')

    # Determine if player participated in enough games for season to be considered robus
    playerscores_df = playerscores_df.set_index(['player_id','season','week']).sort_index(level=[0,1,2],ascending=[True,True,True])
    playerscores_df['num_games'] = playerscores_df['points'].groupby(['player_id','season']).count()
    playerscores_df['is_robust'] = playerscores_df['num_games'] >= min_games
    
    # Aggregate and rank qualifying player scores by position
    contracts_df = playerscores_df.groupby(['player_id','season']).head(1).droplevel(2).drop('points', axis=1)
    contracts_df['tot_pts'] = playerscores_df['points'].groupby(['player_id','season']).sum()
    contracts_df['avg_pts'] = playerscores_df['points'].groupby(['player_id','season']).mean()

    playerscores_df = playerscores_df.reset_index()
    contracts_df = contracts_df.reset_index().set_index(['pos','season']).sort_index(level=[0,1],ascending=[True,True])
    
    contracts_df['tot_pts_rank'] = contracts_df['tot_pts'].groupby(['pos','season']).rank(method='average',ascending=False).values
    contracts_df['avg_pts_rank'] = contracts_df['avg_pts'].groupby(['pos','season']).rank(method='average',ascending=False).values
    contracts_df['floor_pts_rank'] = contracts_df.index.get_level_values(0).map(lambda x: rank_floor_dic[x])
    
    contracts_df = contracts_df.reset_index().set_index(['player_id','season']).sort_index(level=[0,1],ascending=[True,True])
    
    # Merge in aggregated playerscores to roster_df
    contracts_df = rosters_df.set_index(['player_id','season']).sort_index(level=[0,1],ascending=[True,True]).merge(contracts_df[['tot_pts','tot_pts_rank','avg_pts','avg_pts_rank','floor_pts_rank','num_games','is_robust']], how='left', left_index=True, right_index=True)
    contracts_df['conf'] = contracts_df['franchise_name'].map(lambda x: conf_dic[x])
    
    # Rank the salaries by position
    contracts_df = contracts_df.reset_index().set_index(['pos','season']).sort_index(level=[0,1],ascending=[True,True])
    contracts_df['salary_rank'] = contracts_df['salary'].groupby(['pos','season']).rank(method='first',ascending=False).values
    contracts_df = contracts_df.reset_index()
    
    # Sort by for cleaner display
    contracts_df = contracts_df.set_index(['pos','season','salary_rank','conf']).sort_index(level=[0,1,2,3],ascending=[True,True,True,True]).reset_index()
    
    # Save a copy of the contract df
    contracts_df.to_csv(f'{save_label}_Contracts.csv', index=False)

In [None]:
contracts_df

In [None]:
###
def PtsRankToSalary(pts_rank, salaries):
    # Map points rank to corresponding salary
    if np.isnan(pts_rank):
        calculated_salary = np.nan

    elif pts_rank == 1:
        calculated_salary = salaries[0] + salaries[1] - 0.5*salaries[2] - 0.5*salaries[3]
        
    else:
        sal_eval_rank_high = int(np.round(2*pts_rank - 3))
        sal_eval_rank_low = int(np.round(2*pts_rank - 2))
    
        sal_eval_high = salaries[sal_eval_rank_high - 1]
        sal_eval_low = salaries[sal_eval_rank_low - 1]
        
        calculated_salary = 0.5*sal_eval_high + 0.5*sal_eval_low

    return calculated_salary

###
def CalculateBaselineEYS(df, player_id, player_conference, current_season, current_week, min_salary=1.7):
    # Function that calculates the baseline extended years salary (EYS)
    
    # Some logic to make sure pre-season vs in-season calculation is handled correctly
    query_season = current_season-1 if (current_week==0) else current_season
    salary_multiplier = 1.1 if (current_week==0) else 1.

    # Downselect data based on player_id and salary year
    player_df = df[ (df['player_id'] == player_id) & (df['conf'] == player_conference) ]
    salary_df = salary_multiplier * df[ (df['season'] == query_season) ][['pos','salary_rank','salary']].set_index(['pos','salary_rank']).sort_index(level=[0,1],ascending=[True,True])['salary']

    #print(player_df['player_name'].values[0])
    
    # Initialize list to store calculated salaries
    calculated_salaries = [min_salary]

    # Find salary, player ranks, and calculated salary for current season
    tmp_df = player_df[ player_df['season'] == query_season ]

    #display(tmp_df)
    
    current_salary = salary_multiplier * tmp_df['salary'].values[0]
    current_contract_years = tmp_df['contract_years'].values[0]
    calculated_salaries += [current_salary]

    for tmp_pos,x,y,z in zip(tmp_df['pos'].values, tmp_df['tot_pts_rank'].values, tmp_df['avg_pts_rank'].values, tmp_df['floor_pts_rank'].values):
        min_pts_ranks = min(tmp_df['tot_pts_rank'].min(), tmp_df['avg_pts_rank'].min(), tmp_df['floor_pts_rank'].min())
        calculated_salaries += [ PtsRankToSalary( min(x,y,z), salary_df.loc[tmp_pos].values ) ]

        #print(tmp_pos)
        #print(calculated_salaries)
    
    # Find player ranks and calculated salaries for previous robust seasons
    tmp_df = player_df[ (player_df['season'] < current_season) & (player_df['is_robust']) ].tail(2)

    #display(tmp_df)
    
    for tmp_pos,x,y,z in zip(tmp_df['pos'].values, tmp_df['tot_pts_rank'].values, tmp_df['avg_pts_rank'].values, tmp_df['floor_pts_rank'].values):
        min_pts_ranks = min(tmp_df['tot_pts_rank'].min(), tmp_df['avg_pts_rank'].min(), tmp_df['floor_pts_rank'].min())
        calculated_salaries += [ PtsRankToSalary( min(x,y,z), salary_df.loc[tmp_pos].values ) ]

        #print(tmp_pos)
        #print(calculated_salaries)

    # Floor the calculated salary using the relevant minimums
    return np.round(max(calculated_salaries), 2)

def SmoothNewContract():
    # Will want this function to calculate the new smoothed contract bases on year reamaining + years extended
    pass

In [None]:
# Calculate BaselineEYS for every player in the specified current season-week
ext_df = contracts_df[ contracts_df['season'] == ( current_season if (current_week > 0) else current_season-1 ) ][['player_id','player_name','conf','pos','salary','contract_years']]
ext_df['baselineEYS'] = ext_df.apply(lambda x: CalculateBaselineEYS(contracts_df, x['player_id'], x['conf'], 2024, 0), axis=1)

# Update some data, if necessary
ext_df['salary'] = ext_df['salary'] * ( 1. if (current_week > 0) else 1.1 )
ext_df['contract_years'] = ext_df['contract_years'] - ( 0. if (current_week > 0) else 1 )
ext_df['current_season'] = current_season
ext_df['current_week'] = current_week

# Save a copy of the BaselineEYS df
ext_df.to_csv(f'{save_label}_BaselineEYS.csv', index=False)

In [None]:
ext_df