This file assumes all the other feature engineering code has already ran

### Fantasy Points Data

In [1]:
import pandas as pd

pd.set_option('display.max_columns', 100)

df_points = pd.read_parquet(r'..\..\data\fantasy_points\footballguys_half_ppr.parquet')[['Season', 'Week', 'Name', 'Position', 'Team', 'Fantasy Points']]

df_points = df_points.loc[df_points['Season'] >= 2020, :].reset_index(drop=True)

df_points

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points
0,2020,1,Davante Adams,WR,GB,34.6
1,2020,1,Calvin Ridley,WR,ATL,29.4
2,2020,1,Adam Thielen,WR,MIN,26.0
3,2020,1,Darius Slayton,WR,NYG,25.2
4,2020,1,DeAndre Hopkins,WR,ARI,22.1
...,...,...,...,...,...,...
28828,2023,17,Tampa Bay Buccaneers,DST,TB,1.0
28829,2023,17,Washington Commanders,DST,WAS,1.0
28830,2023,17,Seattle Seahawks,DST,SEA,0.0
28831,2023,17,Atlanta Falcons,DST,ATL,-1.0


In [2]:
# make names easier to match
df_points.loc[df_points['Position'] != 'DST', 'Name'] = (
    df_points.loc[df_points['Position'] != 'DST', 'Name']
    .str.split().str[:2].str.join(' ')  # make names just the first two words
    .str.replace(r'[^a-zA-Z0-9\s]', '', regex=True)  # remove non-alphanumeric characters
    .str.lower()  # lowercase only (helps with matching)
)

df_points

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points
0,2020,1,davante adams,WR,GB,34.6
1,2020,1,calvin ridley,WR,ATL,29.4
2,2020,1,adam thielen,WR,MIN,26.0
3,2020,1,darius slayton,WR,NYG,25.2
4,2020,1,deandre hopkins,WR,ARI,22.1
...,...,...,...,...,...,...
28828,2023,17,Tampa Bay Buccaneers,DST,TB,1.0
28829,2023,17,Washington Commanders,DST,WAS,1.0
28830,2023,17,Seattle Seahawks,DST,SEA,0.0
28831,2023,17,Atlanta Falcons,DST,ATL,-1.0


In [3]:
team_to_abbreviation = df_points.loc[df_points['Position'] == 'DST', ['Name', 'Team']].drop_duplicates().set_index('Name').to_dict()['Team']

team_to_abbreviation

{'New Orleans Saints': 'NO',
 'Baltimore Ravens': 'BAL',
 'Washington Commanders': 'WAS',
 'New England Patriots': 'NE',
 'Los Angeles Chargers': 'LAC',
 'Buffalo Bills': 'BUF',
 'Pittsburgh Steelers': 'PIT',
 'Kansas City Chiefs': 'KC',
 'New York Jets': 'NYJ',
 'Arizona Cardinals': 'ARI',
 'Jacksonville Jaguars': 'JAX',
 'Seattle Seahawks': 'SEA',
 'Green Bay Packers': 'GB',
 'Indianapolis Colts': 'IND',
 'Dallas Cowboys': 'DAL',
 'Denver Broncos': 'DEN',
 'New York Giants': 'NYG',
 'Los Angeles Rams': 'LAR',
 'San Francisco 49ers': 'SF',
 'Chicago Bears': 'CHI',
 'Cincinnati Bengals': 'CIN',
 'Tennessee Titans': 'TEN',
 'Philadelphia Eagles': 'PHI',
 'Miami Dolphins': 'MIA',
 'Detroit Lions': 'DET',
 'Cleveland Browns': 'CLE',
 'Houston Texans': 'HOU',
 'Las Vegas Raiders': 'LV',
 'Tampa Bay Buccaneers': 'TB',
 'Atlanta Falcons': 'ATL',
 'Carolina Panthers': 'CAR',
 'Minnesota Vikings': 'MIN'}

### Expert Rankings Data

In [4]:
df_player_rankings = pd.concat(
    [
        pd.read_parquet(r'..\..\data\fantasypros_in_season_rankings\superflex_rankings.parquet'),
        pd.read_parquet(r'..\..\data\fantasypros_in_season_rankings\kicker_rankings.parquet'),
    ],
    ignore_index=True,
).sort_values(['Year', 'Week', 'RK', 'POS RK'], ignore_index=True)

df_player_rankings = df_player_rankings.loc[df_player_rankings['Year'] >= 2020, :].reset_index(drop=True)

df_player_rankings = df_player_rankings.loc[df_player_rankings['Player Name'].notna(), :].reset_index(drop=True)

# make names easier to match
df_player_rankings['Player Name'] = (
    df_player_rankings['Player Name'].str.split().str[:2].str.join(' ')  # make names just the first two words
    .str.replace(r'[^a-zA-Z0-9\s]', '', regex=True)  # remove non-alphanumeric characters
    .str.lower()  # lowercase only (helps with matching)
)

df_player_rankings

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK
0,2020,1,1.0,patrick mahomes,QB,1
1,2020,1,2.0,lamar jackson,QB,2
2,2020,1,3.0,christian mccaffrey,RB,1
3,2020,1,4.0,dak prescott,QB,3
4,2020,1,5.0,russell wilson,QB,4
...,...,...,...,...,...,...
32017,2023,17,,eddy pineiro,K,29
32018,2023,17,,greg zuerlein,K,30
32019,2023,17,,joey slye,K,31
32020,2023,17,,chad ryland,K,32


In [5]:
df_defense_rankings = pd.read_parquet(r'..\..\data\fantasypros_in_season_rankings\defense_rankings.parquet')

df_defense_rankings = df_defense_rankings.loc[df_defense_rankings['Year'] >= 2020, :].reset_index(drop=True)

df_defense_rankings = df_defense_rankings.loc[df_defense_rankings['Player Name'].notna(), :].reset_index(drop=True)

df_defense_rankings

Unnamed: 0,Year,Week,POS RK,Player Name,POS,OPP
0,2020,1,1,Buffalo Bills,DST,vs. NYJ
1,2020,1,2,Pittsburgh Steelers,DST,at NYG
2,2020,1,3,Baltimore Ravens,DST,vs. CLE
3,2020,1,4,New England Patriots,DST,vs. MIA
4,2020,1,5,Philadelphia Eagles,DST,at WAS
...,...,...,...,...,...,...
2011,2023,17,28,Cincinnati Bengals,DST,at KC
2012,2023,17,29,Detroit Lions,DST,at DAL
2013,2023,17,30,New York Giants,DST,vs. LAR
2014,2023,17,31,Arizona Cardinals,DST,at PHI


### Prop Bets Data

In [6]:
df_props = pd.concat(
    [
        pd.read_parquet(r'..\..\data\betting_lines\qb1_props.parquet').assign(Position='QB'),
        pd.read_parquet(r'..\..\data\betting_lines\rb1_props.parquet').assign(Position='RB'),
        pd.read_parquet(r'..\..\data\betting_lines\rb2_props.parquet').assign(Position='RB'),
        pd.read_parquet(r'..\..\data\betting_lines\wr1_props.parquet').assign(Position='WR'),
        pd.read_parquet(r'..\..\data\betting_lines\wr2_props.parquet').assign(Position='WR'),
        pd.read_parquet(r'..\..\data\betting_lines\wr3_props.parquet').assign(Position='WR'),
        pd.read_parquet(r'..\..\data\betting_lines\te1_props.parquet').assign(Position='TE'),
    ],
    ignore_index=True,
).sort_values(['Year', 'Week'], ignore_index=True)

df_props.insert(df_props.columns.get_loc('Player Name') + 1, 'Position', df_props.pop('Position'))

df_props

Unnamed: 0,Year,Week,Player Name,Position,Team,Passing Touchdowns Projection,Passing Touchdowns Over,Passing Touchdowns Under,Passing Yards Projection,Passing Yards Over,Passing Yards Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Interceptions Projection,Interceptions Over,Interceptions Under,Adjusted Passing Touchdowns Projection,Adjusted Passing Yards Projection,Adjusted Rushing Yards Projection,Adjusted Interceptions Projection,Anytime Touchdown Probability,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Adjusted Receiving Yards Projection,Adjusted Receptions Projection
0,2020,1,patrick mahomes,QB,KC,2.5,104.0,-132.0,306.5,-110.0,-110.0,300.0,20.5,-105.0,-115.0,,,,2.460615,306.5,20.488656,,0.250000,,,,,,,,
1,2020,1,lamar jackson,QB,BAL,1.5,-152.0,-120.0,222.5,-110.0,-110.0,110.0,60.5,-110.0,-110.0,,,,1.528860,222.5,60.500000,,0.476190,,,,,,,,
2,2020,1,dak prescott,QB,DAL,2.5,-175.0,-182.0,285.5,-110.0,-110.0,333.0,11.5,-132.0,-115.0,,,,2.495487,285.5,11.517041,,0.230947,,,,,,,,
3,2020,1,russell wilson,QB,SEA,1.5,-140.0,120.0,259.5,-110.0,-110.0,400.0,21.5,-110.0,-110.0,,,,1.564394,259.5,21.500000,,0.200000,,,,,,,,
4,2020,1,deshaun watson,QB,HOU,1.5,-152.0,120.0,275.5,-110.0,-110.0,190.0,30.5,-122.0,-110.0,,,,1.574315,275.5,30.512870,,0.344828,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14084,2023,17,mike gesicki,TE,NE,,,,,,,500.0,,,,,,,,,,,0.166667,17.5,-115.0,-110.0,2.5,116.0,-150.0,17.505537,2.431481
14085,2023,17,josh oliver,TE,MIN,,,,,,,450.0,,,,,,,,,,,0.181818,10.5,-115.0,-115.0,,,,10.500000,
14086,2023,17,austin hooper,TE,LV,,,,,,,450.0,,,,,,,,,,,0.181818,20.5,-113.0,-110.0,2.5,100.0,-135.0,20.503353,2.462766
14087,2023,17,adam trautman,TE,DEN,,,,,,,400.0,,,,,,,,,,,0.200000,5.5,-110.0,-110.0,0.5,-174.0,130.0,5.500000,0.600127


### Game Lines Data

In [7]:
df_game_lines = pd.read_parquet(r'..\..\data\betting_lines\projected_game_scores.parquet')

df_game_lines = df_game_lines.loc[df_game_lines['Season'] >= 2020, :].reset_index(drop=True)

df_game_lines.rename(columns={'Away Team': 'Away Town', 'Home Team': 'Home Town'}, inplace=True)

df_game_lines

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score
0,2020,1,Houston,Kansas City,53.5,-110.0,-110.0,9.5,-110,-110,22.00,31.50
1,2020,1,Green Bay,Minnesota,44.5,-110.0,-110.0,1.5,-110,-110,21.50,23.00
2,2020,1,Chicago,Detroit,42.5,-110.0,-110.0,2.5,-110,-110,20.00,22.50
3,2020,1,Indianapolis,Jacksonville,44.5,-110.0,-110.0,-7.0,-110,-110,25.75,18.75
4,2020,1,Las Vegas,Carolina,48.0,-110.0,-110.0,-3.0,-105,-115,25.50,22.50
...,...,...,...,...,...,...,...,...,...,...,...,...
1001,2023,17,San Francisco,Washington,48.5,-110.0,-110.0,-14.0,-115,-105,31.25,17.25
1002,2023,17,Pittsburgh,Seattle,40.5,-112.0,-108.0,4.5,-110,-110,18.00,22.50
1003,2023,17,Cincinnati,Kansas City,46.0,-110.0,-110.0,7.0,-105,-115,19.50,26.50
1004,2023,17,Los Angeles,Denver,39.5,-110.0,-110.0,3.5,-110,-110,18.00,21.50


Map teams to abbreviation; note that teams with same location may have issues

In [8]:
town_to_abbreviation = {
    # 'New York Jets': 'NYJ',
    'Minnesota': 'MIN',
    # 'Los Angeles': 'LAR',
    'Baltimore': 'BAL',
    'Cleveland': 'CLE',
    'Chicago': 'CHI',
    'Jacksonville': 'JAX',
    'Miami': 'MIA',
    'Washington': 'WAS',
    'Carolina': 'CAR',
    'Cincinnati': 'CIN',
    'Denver': 'DEN',
    'Tennessee': 'TEN',
    'Philadelphia': 'PHI',
    'Kansas City': 'KC',
    'Pittsburgh': 'PIT',
    'New England': 'NE',
    'Houston': 'HOU',
    'Detroit': 'DET',
    'Atlanta': 'ATL',
    'Seattle': 'SEA',
    'Tampa Bay': 'TB',
    'Green Bay': 'GB',
    'Dallas': 'DAL',
    'San Francisco': 'SF',
    'Indianapolis': 'IND',
    'Arizona': 'ARI',
    # 'New York': 'NYG',
    'Las Vegas': 'LV',
    'Buffalo': 'BUF',
    # 'Los Angeles': 'LAC',
    'New Orleans': 'NO',
}

len(town_to_abbreviation)

28

In [9]:
df_game_lines['Home Team'] = df_game_lines['Home Town'].map(town_to_abbreviation)
df_game_lines['Away Team'] = df_game_lines['Away Town'].map(town_to_abbreviation)

df_game_lines

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team
0,2020,1,Houston,Kansas City,53.5,-110.0,-110.0,9.5,-110,-110,22.00,31.50,KC,HOU
1,2020,1,Green Bay,Minnesota,44.5,-110.0,-110.0,1.5,-110,-110,21.50,23.00,MIN,GB
2,2020,1,Chicago,Detroit,42.5,-110.0,-110.0,2.5,-110,-110,20.00,22.50,DET,CHI
3,2020,1,Indianapolis,Jacksonville,44.5,-110.0,-110.0,-7.0,-110,-110,25.75,18.75,JAX,IND
4,2020,1,Las Vegas,Carolina,48.0,-110.0,-110.0,-3.0,-105,-115,25.50,22.50,CAR,LV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,2023,17,San Francisco,Washington,48.5,-110.0,-110.0,-14.0,-115,-105,31.25,17.25,WAS,SF
1002,2023,17,Pittsburgh,Seattle,40.5,-112.0,-108.0,4.5,-110,-110,18.00,22.50,SEA,PIT
1003,2023,17,Cincinnati,Kansas City,46.0,-110.0,-110.0,7.0,-105,-115,19.50,26.50,KC,CIN
1004,2023,17,Los Angeles,Denver,39.5,-110.0,-110.0,3.5,-110,-110,18.00,21.50,DEN,


In [10]:
df_matchup = df_defense_rankings[['Year', 'Week', 'Player Name', 'OPP']].copy()

df_matchup['Location'] = 0
df_matchup.loc[df_matchup['OPP'].str.contains('^vs. ', regex=True), 'Location'] = 1
df_matchup.loc[df_matchup['OPP'].str.contains('^at ', regex=True), 'Location'] = -1

df_matchup['OPP'] = df_matchup['OPP'].str.split(' ').str[-1]

# Jacksonville is listed as JAC with from the regex instead of JAX
df_matchup.loc[df_matchup['OPP'] == 'JAC', 'OPP'] = 'JAX'

df_matchup['Player Name'] = df_matchup['Player Name'].map(team_to_abbreviation)

df_matchup = df_matchup.loc[df_matchup['Location'] == 1, :].reset_index(drop=True)

df_matchup.rename(columns={'Year': 'Season', 'Player Name': 'Home Team', 'OPP': 'Away Team'}, inplace=True)

df_matchup.drop(columns=['Location'], inplace=True)

df_matchup

Unnamed: 0,Season,Week,Home Team,Away Team
0,2020,1,BUF,NYJ
1,2020,1,BAL,CLE
2,2020,1,NE,MIA
3,2020,1,SF,ARI
4,2020,1,KC,HOU
...,...,...,...,...
1003,2023,17,DAL,DET
1004,2023,17,TB,NO
1005,2023,17,MIN,GB
1006,2023,17,NYG,LAR


In [11]:
fill_away_teams = (
    df_game_lines.loc[df_game_lines['Away Team'].isna(), ['Season', 'Week', 'Home Team']].merge(
        df_matchup,
        how='left',
    )['Away Team']
).to_numpy()

fill_away_teams

array(['NYJ', 'LAC', 'NYG', 'LAR', 'LAR', 'NYJ', 'LAC', nan, 'LAR', 'NYG',
       'LAC', 'NYJ', 'LAR', 'NYG', 'NYJ', 'LAR', 'LAC', 'NYG', 'LAC', nan,
       'LAR', 'LAC', 'NYG', 'LAR', 'NYG', 'NYJ', 'LAC', nan, 'NYG', 'LAR',
       'LAC', 'NYJ', 'NYG', 'LAR', 'LAC', 'NYJ', 'NYG', 'LAR', 'NYJ',
       'NYG', nan, 'LAC', 'NYJ', 'LAR', 'NYG', 'NYJ', 'LAC', 'LAR', 'NYG',
       'NYJ', 'LAC', 'LAR', 'NYG', 'LAC', nan, 'LAR', 'NYJ', 'NYG', 'LAC',
       'LAR', 'NYG', 'LAR', 'NYG', 'LAC', 'NYJ', 'LAR', 'LAC', 'NYJ',
       'LAR', 'NYG', 'LAC', 'NYJ', 'NYG', 'NYJ', 'NYG', 'LAC', 'LAR',
       'LAC', 'NYJ', 'LAR', 'NYG', 'LAC', 'LAR', 'NYJ', 'LAC', 'NYJ',
       'NYG', 'LAR', 'NYG', 'LAC', 'NYJ', nan, 'LAR', 'LAC', 'NYG', 'NYJ',
       'NYG', 'LAC', 'LAR', 'LAR', 'NYG', 'NYJ', 'NYG', 'LAC', 'LAR', nan,
       'LAR', 'NYG', nan, 'NYG', 'NYJ', 'NYG', 'LAC', 'NYJ', 'LAR', 'LAC',
       'LAR', 'LAC', 'NYG', 'NYJ', 'NYG', 'NYJ', nan, 'LAC'], dtype=object)

In [12]:
df_game_lines.loc[df_game_lines['Away Team'].isna(), 'Away Team'] = fill_away_teams

df_game_lines

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team
0,2020,1,Houston,Kansas City,53.5,-110.0,-110.0,9.5,-110,-110,22.00,31.50,KC,HOU
1,2020,1,Green Bay,Minnesota,44.5,-110.0,-110.0,1.5,-110,-110,21.50,23.00,MIN,GB
2,2020,1,Chicago,Detroit,42.5,-110.0,-110.0,2.5,-110,-110,20.00,22.50,DET,CHI
3,2020,1,Indianapolis,Jacksonville,44.5,-110.0,-110.0,-7.0,-110,-110,25.75,18.75,JAX,IND
4,2020,1,Las Vegas,Carolina,48.0,-110.0,-110.0,-3.0,-105,-115,25.50,22.50,CAR,LV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,2023,17,San Francisco,Washington,48.5,-110.0,-110.0,-14.0,-115,-105,31.25,17.25,WAS,SF
1002,2023,17,Pittsburgh,Seattle,40.5,-112.0,-108.0,4.5,-110,-110,18.00,22.50,SEA,PIT
1003,2023,17,Cincinnati,Kansas City,46.0,-110.0,-110.0,7.0,-105,-115,19.50,26.50,KC,CIN
1004,2023,17,Los Angeles,Denver,39.5,-110.0,-110.0,3.5,-110,-110,18.00,21.50,DEN,LAC


In [13]:
fill_home_teams = (
    df_game_lines.loc[df_game_lines['Home Team'].isna(), ['Season', 'Week', 'Away Team']].merge(
        df_matchup,
        how='left',
    )['Home Team']
).to_numpy()

fill_home_teams

array(['LAR', 'NYG', 'NYJ', 'LAC', 'NYG', 'LAC', 'NYJ', nan, 'NYJ', 'NYG',
       'NYJ', 'LAC', 'LAR', 'NYG', 'LAC', 'NYJ', 'NYG', 'LAR', nan, 'NYJ',
       'LAR', 'NYJ', 'LAC', 'LAR', 'NYG', 'LAC', nan, 'NYG', 'NYJ', 'LAC',
       'NYG', 'LAR', 'NYJ', 'LAC', 'NYG', 'LAR', 'NYJ', 'LAR', 'LAC',
       'LAC', nan, 'NYG', 'LAR', 'NYJ', 'LAC', 'NYG', 'LAR', 'NYJ', 'LAC',
       'NYJ', 'LAC', 'NYG', 'NYJ', 'LAR', 'NYJ', nan, 'LAC', 'NYG', 'LAR',
       'NYJ', 'NYJ', 'LAC', 'LAR', 'NYJ', 'LAC', 'NYG', 'LAR', 'NYJ',
       'LAC', 'NYG', 'NYG', 'NYJ', 'LAR', 'NYG', 'LAR', 'LAC', 'LAC',
       'NYJ', 'LAR', 'NYJ', 'NYG', 'LAR', 'NYG', 'LAC', 'NYJ', 'NYG',
       'LAR', 'LAR', 'NYG', 'LAC', 'NYJ', 'LAC', 'NYJ', 'LAR', 'NYG', nan,
       'LAC', 'NYG', 'NYJ', 'LAR', 'NYJ', 'LAC', 'NYJ', 'NYG', 'LAR',
       'LAR', 'NYJ', 'LAC', 'NYG', 'LAR', nan, 'LAC', nan, 'LAC', 'LAR',
       'NYJ', 'NYG', 'LAC', 'NYJ', 'LAR', 'NYJ', 'LAC', 'NYG', 'LAR',
       'LAR', 'LAC', 'NYJ', nan], dtype=object)

In [14]:
df_game_lines.loc[df_game_lines['Home Team'].isna(), 'Home Team'] = fill_home_teams

df_game_lines

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team
0,2020,1,Houston,Kansas City,53.5,-110.0,-110.0,9.5,-110,-110,22.00,31.50,KC,HOU
1,2020,1,Green Bay,Minnesota,44.5,-110.0,-110.0,1.5,-110,-110,21.50,23.00,MIN,GB
2,2020,1,Chicago,Detroit,42.5,-110.0,-110.0,2.5,-110,-110,20.00,22.50,DET,CHI
3,2020,1,Indianapolis,Jacksonville,44.5,-110.0,-110.0,-7.0,-110,-110,25.75,18.75,JAX,IND
4,2020,1,Las Vegas,Carolina,48.0,-110.0,-110.0,-3.0,-105,-115,25.50,22.50,CAR,LV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,2023,17,San Francisco,Washington,48.5,-110.0,-110.0,-14.0,-115,-105,31.25,17.25,WAS,SF
1002,2023,17,Pittsburgh,Seattle,40.5,-112.0,-108.0,4.5,-110,-110,18.00,22.50,SEA,PIT
1003,2023,17,Cincinnati,Kansas City,46.0,-110.0,-110.0,7.0,-105,-115,19.50,26.50,KC,CIN
1004,2023,17,Los Angeles,Denver,39.5,-110.0,-110.0,3.5,-110,-110,18.00,21.50,DEN,LAC


In [15]:
df_game_lines.loc[df_game_lines['Home Team'].isna(), :]

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team
57,2020,4,New York,Los Angeles,49.5,-110.0,-110.0,13.5,-110,-110,18.0,31.5,,
156,2020,11,New York,Los Angeles,47.0,-110.0,-110.0,9.5,-110,-110,18.75,28.25,,
219,2020,15,New York,Los Angeles,44.5,-110.0,-110.0,17.0,-110,-110,13.75,30.75,,
326,2021,6,Los Angeles,New York,49.0,-110.0,-110.0,-7.5,-105,-110,28.25,20.75,,
443,2021,14,New York,Los Angeles,43.5,-110.0,-110.0,9.0,-110,-110,17.25,26.25,,
748,2022,17,Los Angeles,Los Angeles,41.5,-110.0,-110.0,6.5,-110,-110,17.5,24.0,,
860,2023,8,New York,New York,34.5,-110.0,-110.0,-3.0,-115,-105,18.75,15.75,,
885,2023,9,Los Angeles,New York,41.0,-110.0,-110.0,-3.0,-115,-105,22.0,19.0,,
999,2023,17,Los Angeles,New York,43.5,-110.0,-110.0,-6.0,-110,-110,24.75,18.75,,


Fix issues where New York and Los Angeles teams play each other

In [16]:
fix_new_york = (
    df_game_lines.loc[
        (df_game_lines['Away Town'] == 'New York') & (df_game_lines['Home Town'] == 'New York'), 
        ['Season', 'Week']
    ]
    .merge(
        df_matchup.loc[(df_matchup['Home Team'].str.contains('^NY')) & (df_matchup['Away Team'].str.contains('^NY')), :],
        how='left',
        on=['Season', 'Week'],
    )[['Home Team', 'Away Team']]
)

df_game_lines.loc[
    (df_game_lines['Away Town'] == 'New York') & (df_game_lines['Home Town'] == 'New York'), 
    'Home Team'
] = fix_new_york['Home Team'].to_numpy()

df_game_lines.loc[
    (df_game_lines['Away Town'] == 'New York') & (df_game_lines['Home Town'] == 'New York'), 
    'Away Team'
] = fix_new_york['Away Team'].to_numpy()

df_game_lines.loc[
    (df_game_lines['Away Town'] == 'New York') & (df_game_lines['Home Town'] == 'New York'), 
    :
]

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team
860,2023,8,New York,New York,34.5,-110.0,-110.0,-3.0,-115,-105,18.75,15.75,NYG,NYJ


In [17]:
fix_los_angeles = (
    df_game_lines.loc[
        (df_game_lines['Away Town'] == 'Los Angeles') & (df_game_lines['Home Town'] == 'Los Angeles'), 
        ['Season', 'Week']
    ]
    .merge(
        df_matchup.loc[(df_matchup['Home Team'].str.contains('^LA')) & (df_matchup['Away Team'].str.contains('^LA')), :],
        how='left',
        on=['Season', 'Week'],
    )[['Home Team', 'Away Team']]
)

df_game_lines.loc[
    (df_game_lines['Away Town'] == 'Los Angeles') & (df_game_lines['Home Town'] == 'Los Angeles'), 
    'Home Team'
] = fix_los_angeles['Home Team'].to_numpy()

df_game_lines.loc[
    (df_game_lines['Away Town'] == 'Los Angeles') & (df_game_lines['Home Town'] == 'Los Angeles'), 
    'Away Team'
] = fix_los_angeles['Away Team'].to_numpy()

df_game_lines.loc[
    (df_game_lines['Away Town'] == 'Los Angeles') & (df_game_lines['Home Town'] == 'Los Angeles'), 
    :
]

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team
748,2022,17,Los Angeles,Los Angeles,41.5,-110.0,-110.0,6.5,-110,-110,17.5,24.0,LAC,LAR


In [18]:
df_game_lines.loc[df_game_lines['Home Team'].isna(), :]

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team
57,2020,4,New York,Los Angeles,49.5,-110.0,-110.0,13.5,-110,-110,18.0,31.5,,
156,2020,11,New York,Los Angeles,47.0,-110.0,-110.0,9.5,-110,-110,18.75,28.25,,
219,2020,15,New York,Los Angeles,44.5,-110.0,-110.0,17.0,-110,-110,13.75,30.75,,
326,2021,6,Los Angeles,New York,49.0,-110.0,-110.0,-7.5,-105,-110,28.25,20.75,,
443,2021,14,New York,Los Angeles,43.5,-110.0,-110.0,9.0,-110,-110,17.25,26.25,,
885,2023,9,Los Angeles,New York,41.0,-110.0,-110.0,-3.0,-115,-105,22.0,19.0,,
999,2023,17,Los Angeles,New York,43.5,-110.0,-110.0,-6.0,-110,-110,24.75,18.75,,


In [19]:
ny_vs_la = df_matchup.loc[(df_matchup['Away Team'].str.contains('^LA')) & (df_matchup['Home Team'].str.contains('^NY')), :]

df_game_lines.loc[
    (df_game_lines['Away Town'] == 'Los Angeles') & (df_game_lines['Home Town'] == 'New York'), 
    'Home Team'
] = ny_vs_la['Home Team'].to_numpy()

df_game_lines.loc[
    (df_game_lines['Away Town'] == 'Los Angeles') & (df_game_lines['Home Town'] == 'New York'), 
    'Away Team'
] = ny_vs_la['Away Team'].to_numpy()

df_game_lines.loc[
    (df_game_lines['Away Town'] == 'Los Angeles') & (df_game_lines['Home Town'] == 'New York'), 
    :
]

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team
326,2021,6,Los Angeles,New York,49.0,-110.0,-110.0,-7.5,-105,-110,28.25,20.75,NYG,LAR
885,2023,9,Los Angeles,New York,41.0,-110.0,-110.0,-3.0,-115,-105,22.0,19.0,NYJ,LAC
999,2023,17,Los Angeles,New York,43.5,-110.0,-110.0,-6.0,-110,-110,24.75,18.75,NYG,LAR


In [20]:
la_vs_ny = df_matchup.loc[(df_matchup['Home Team'].str.contains('^LA')) & (df_matchup['Away Team'].str.contains('^NY')), :]

df_game_lines.loc[
    (df_game_lines['Home Town'] == 'Los Angeles') & (df_game_lines['Away Town'] == 'New York'), 
    'Home Team'
] = la_vs_ny['Home Team'].to_numpy()

df_game_lines.loc[
    (df_game_lines['Home Town'] == 'Los Angeles') & (df_game_lines['Away Town'] == 'New York'), 
    'Away Team'
] = la_vs_ny['Away Team'].to_numpy()

df_game_lines.loc[
    (df_game_lines['Home Town'] == 'Los Angeles') & (df_game_lines['Away Town'] == 'New York'), 
    :
]

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team
57,2020,4,New York,Los Angeles,49.5,-110.0,-110.0,13.5,-110,-110,18.0,31.5,LAR,NYG
156,2020,11,New York,Los Angeles,47.0,-110.0,-110.0,9.5,-110,-110,18.75,28.25,LAC,NYJ
219,2020,15,New York,Los Angeles,44.5,-110.0,-110.0,17.0,-110,-110,13.75,30.75,LAR,NYJ
443,2021,14,New York,Los Angeles,43.5,-110.0,-110.0,9.0,-110,-110,17.25,26.25,LAC,NYG


In [21]:
df_game_lines.loc[df_game_lines['Home Team'].isna(), :]

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team


In [22]:
df_game_lines.loc[df_game_lines['Away Team'].isna(), :]

Unnamed: 0,Season,Week,Away Town,Home Town,Total,Over Odds,Under Odds,Spread,Away Odds,Home Odds,Projected Away Score,Projected Home Score,Home Team,Away Team


### Merge

In [23]:
df_players = pd.merge(
    df_player_rankings.rename(columns={'POS': 'Position', 'POS RK': 'Position Rank', 'RK': 'Rank'}),
    (
        df_props
        [[
            'Year', 
            'Week', 
            'Player Name', 
            'Position', 
            'Adjusted Passing Yards Projection',
            'Adjusted Passing Touchdowns Projection',
            'Adjusted Interceptions Projection',
            'Adjusted Rushing Yards Projection',
            'Adjusted Receiving Yards Projection',
            'Adjusted Receptions Projection',
            'Anytime Touchdown Probability',
        ]]
    ),
    how='left',
    on=['Year', 'Week', 'Player Name', 'Position'],
)

df_players

Unnamed: 0,Year,Week,Rank,Player Name,Position,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability
0,2020,1,1.0,patrick mahomes,QB,1,306.5,2.460615,,20.488656,,,0.250000
1,2020,1,2.0,lamar jackson,QB,2,222.5,1.528860,,60.500000,,,0.476190
2,2020,1,3.0,christian mccaffrey,RB,1,,,,73.512870,51.488656,6.544513,0.652778
3,2020,1,4.0,dak prescott,QB,3,285.5,2.495487,,11.517041,,,0.230947
4,2020,1,5.0,russell wilson,QB,4,259.5,1.564394,,21.500000,,,0.200000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
32017,2023,17,,eddy pineiro,K,29,,,,,,,
32018,2023,17,,greg zuerlein,K,30,,,,,,,
32019,2023,17,,joey slye,K,31,,,,,,,
32020,2023,17,,chad ryland,K,32,,,,,,,


In [24]:
df_mod = pd.concat(
    [
        df_players,
        (
            df_defense_rankings
            .rename(columns={'POS': 'Position', 'POS RK': 'Position Rank'})
            [['Year', 'Week', 'Player Name', 'Position', 'Position Rank']]
        ),
    ],
    ignore_index=True,
).sort_values(['Year', 'Week', 'Rank', 'Position Rank'], ignore_index=True)

df_mod

Unnamed: 0,Year,Week,Rank,Player Name,Position,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability
0,2020,1,1.0,patrick mahomes,QB,1,306.5,2.460615,,20.488656,,,0.250000
1,2020,1,2.0,lamar jackson,QB,2,222.5,1.528860,,60.500000,,,0.476190
2,2020,1,3.0,christian mccaffrey,RB,1,,,,73.512870,51.488656,6.544513,0.652778
3,2020,1,4.0,dak prescott,QB,3,285.5,2.495487,,11.517041,,,0.230947
4,2020,1,5.0,russell wilson,QB,4,259.5,1.564394,,21.500000,,,0.200000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
34033,2023,17,,joey slye,K,31,,,,,,,
34034,2023,17,,Arizona Cardinals,DST,31,,,,,,,
34035,2023,17,,chad ryland,K,32,,,,,,,
34036,2023,17,,Washington Commanders,DST,32,,,,,,,


In [25]:
df_mod = df_mod.rename(columns={'Year': 'Season', 'Player Name': 'Name'}).merge(
    df_points,
    how='inner',
    on=['Season', 'Week', 'Name', 'Position'],
)

df_mod.insert(df_mod.columns.get_loc('Position') + 1, 'Team', df_mod.pop('Team'))
df_mod.insert(df_mod.columns.get_loc('Team') + 1, 'Fantasy Points', df_mod.pop('Fantasy Points'))
df_mod.insert(df_mod.columns.get_loc('Position Rank') - 1, 'Rank', df_mod.pop('Rank'))

df_mod

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points,Rank,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability
0,2020,1,patrick mahomes,QB,KC,20.4,1.0,1,306.5,2.460615,,20.488656,,,0.250000
1,2020,1,lamar jackson,QB,BAL,27.5,2.0,2,222.5,1.528860,,60.500000,,,0.476190
2,2020,1,christian mccaffrey,RB,CAR,26.9,3.0,1,,,,73.512870,51.488656,6.544513,0.652778
3,2020,1,dak prescott,QB,DAL,17.6,4.0,3,285.5,2.495487,,11.517041,,,0.230947
4,2020,1,russell wilson,QB,SEA,31.8,5.0,4,259.5,1.564394,,21.500000,,,0.200000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28050,2023,17,New York Giants,DST,NYG,16.0,,30,,,,,,,
28051,2023,17,joey slye,K,WAS,5.0,,31,,,,,,,
28052,2023,17,Arizona Cardinals,DST,ARI,2.0,,31,,,,,,,
28053,2023,17,chad ryland,K,NE,2.0,,32,,,,,,,


In [26]:
df_gl_to_mod = pd.DataFrame(
    {
        'Season': df_game_lines['Season'].to_list() + df_game_lines['Season'].to_list(),
        'Week': df_game_lines['Week'].to_list() + df_game_lines['Week'].to_list(),
        'Team': df_game_lines['Home Team'].to_list() + df_game_lines['Away Team'].to_list(),
        # 'Opponent': df_game_lines['Away Team'].to_list() + df_game_lines['Home Team'].to_list(),
        'Location': [1 for _ in range(df_game_lines.shape[0])] + [-1 for _ in range(df_game_lines.shape[0])],
        'Team Projected Score': df_game_lines['Projected Home Score'].to_list() + df_game_lines['Projected Away Score'].to_list(),
        'Opponent Projected Score': df_game_lines['Projected Away Score'].to_list() + df_game_lines['Projected Home Score'].to_list(),
    },
)

df_gl_to_mod

Unnamed: 0,Season,Week,Team,Location,Team Projected Score,Opponent Projected Score
0,2020,1,KC,1,31.50,22.00
1,2020,1,MIN,1,23.00,21.50
2,2020,1,DET,1,22.50,20.00
3,2020,1,JAX,1,18.75,25.75
4,2020,1,CAR,1,22.50,25.50
...,...,...,...,...,...,...
2007,2023,17,SF,-1,31.25,17.25
2008,2023,17,PIT,-1,18.00,22.50
2009,2023,17,CIN,-1,19.50,26.50
2010,2023,17,LAC,-1,18.00,21.50


In [27]:
df_mod = df_mod.rename(columns={'Year': 'Season'}).merge(
    df_gl_to_mod,
    how='left',
    on=['Season', 'Week', 'Team'],
)

df_mod

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points,Rank,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability,Location,Team Projected Score,Opponent Projected Score
0,2020,1,patrick mahomes,QB,KC,20.4,1.0,1,306.5,2.460615,,20.488656,,,0.250000,1.0,31.50,22.00
1,2020,1,lamar jackson,QB,BAL,27.5,2.0,2,222.5,1.528860,,60.500000,,,0.476190,1.0,27.25,20.25
2,2020,1,christian mccaffrey,RB,CAR,26.9,3.0,1,,,,73.512870,51.488656,6.544513,0.652778,1.0,22.50,25.50
3,2020,1,dak prescott,QB,DAL,17.6,4.0,3,285.5,2.495487,,11.517041,,,0.230947,-1.0,25.50,26.50
4,2020,1,russell wilson,QB,SEA,31.8,5.0,4,259.5,1.564394,,21.500000,,,0.200000,-1.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28050,2023,17,New York Giants,DST,NYG,16.0,,30,,,,,,,,1.0,18.75,24.75
28051,2023,17,joey slye,K,WAS,5.0,,31,,,,,,,,1.0,17.25,31.25
28052,2023,17,Arizona Cardinals,DST,ARI,2.0,,31,,,,,,,,-1.0,17.75,30.25
28053,2023,17,chad ryland,K,NE,2.0,,32,,,,,,,,-1.0,12.75,27.25


### Filter to Relevant Players

In [29]:
df_qb = (
    df_mod
    .loc[df_mod['Position'] == 'QB', :]
    .sort_values(['Season', 'Week', 'Position Rank'], ignore_index=True)
    .groupby(['Season', 'Week', 'Team'])
    .head(1)
)

df_qb

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points,Rank,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability,Location,Team Projected Score,Opponent Projected Score
0,2020,1,patrick mahomes,QB,KC,20.4,1.0,1,306.500000,2.460615,,20.488656,,,0.250000,1.0,31.50,22.00
1,2020,1,lamar jackson,QB,BAL,27.5,2.0,2,222.500000,1.528860,,60.500000,,,0.476190,1.0,27.25,20.25
2,2020,1,dak prescott,QB,DAL,17.6,4.0,3,285.500000,2.495487,,11.517041,,,0.230947,-1.0,25.50,26.50
3,2020,1,russell wilson,QB,SEA,31.8,5.0,4,259.500000,1.564394,,21.500000,,,0.200000,-1.0,,
4,2020,1,deshaun watson,QB,HOU,20.8,6.0,5,275.500000,1.574315,,30.512870,,,0.344828,-1.0,22.00,31.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2299,2023,17,taylor heinicke,QB,ATL,15.1,52.0,28,203.494463,1.304627,0.510823,10.504409,,,0.153846,-1.0,18.25,20.75
2300,2023,17,cj beathard,QB,JAX,9.5,54.0,29,214.495591,1.351543,0.567442,10.500000,,,0.133333,1.0,20.75,17.25
2301,2023,17,sam howell,QB,WAS,6.7,59.0,30,234.500000,1.394584,0.639448,12.501128,,,0.153846,1.0,17.25,31.25
2302,2023,17,bailey zappe,QB,NE,12.1,71.0,31,195.500000,0.615610,0.616228,5.489177,,,0.058824,-1.0,12.75,27.25


In [30]:
df_rb = (
    df_mod
    .loc[df_mod['Position'] == 'RB', :]
    .sort_values(['Season', 'Week', 'Position Rank'], ignore_index=True)
    .groupby(['Season', 'Week', 'Team'])
    .head(2)
)

df_rb

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points,Rank,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability,Location,Team Projected Score,Opponent Projected Score
0,2020,1,christian mccaffrey,RB,CAR,26.9,3.0,1,,,,73.512870,51.488656,6.544513,0.652778,1.0,22.50,25.50
1,2020,1,ezekiel elliott,RB,DAL,26.2,16.0,2,,,,81.517442,29.500000,3.574315,0.619772,-1.0,25.50,26.50
2,2020,1,dalvin cook,RB,MIN,17.3,20.0,3,,,,74.500000,28.500000,3.544513,0.579832,1.0,23.00,21.50
3,2020,1,josh jacobs,RB,LV,33.9,22.0,4,,,,79.500000,16.511344,2.477273,0.607843,-1.0,25.50,22.50
4,2020,1,saquon barkley,RB,NYG,9.6,24.0,5,,,,72.500000,33.500000,3.471510,0.523810,1.0,19.00,25.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6114,2023,17,matt breida,RB,NYG,0.0,207.0,63,,,,,,,0.125000,1.0,18.75,24.75
6116,2023,17,latavius murray,RB,BUF,0.0,224.0,65,,,,,,,0.153846,1.0,27.25,12.75
6118,2023,17,trey sermon,RB,IND,5.1,228.0,67,,,,,,,0.200000,1.0,23.00,19.50
6126,2023,17,deuce vaughn,RB,DAL,1.7,254.0,77,,,,9.494463,,,0.190476,1.0,29.00,24.50


In [31]:
df_wr = (
    df_mod
    .loc[df_mod['Position'] == 'WR', :]
    .sort_values(['Season', 'Week', 'Position Rank'], ignore_index=True)
    .groupby(['Season', 'Week', 'Team'])
    .head(3)
)

df_wr

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points,Rank,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability,Location,Team Projected Score,Opponent Projected Score
0,2020,1,michael thomas,WR,NO,3.2,21.0,1,,,,,,,,1.0,26.25,22.25
1,2020,1,davante adams,WR,GB,34.6,33.0,2,,,,,87.500000,7.466125,0.476190,-1.0,21.50,23.00
2,2020,1,julio jones,WR,ATL,20.2,36.0,3,,,,,92.500000,6.544513,0.476190,1.0,,
3,2020,1,tyreek hill,WR,KC,13.1,37.0,4,,,,,74.500000,5.442461,0.565217,1.0,31.50,22.00
4,2020,1,chris godwin,WR,TB,10.9,45.0,5,,,,,70.500000,5.548948,0.444444,-1.0,22.25,26.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9637,2023,17,equanimeous st,WR,CHI,0.0,266.0,110,,,,,,,,1.0,20.75,18.25
9639,2023,17,van jefferson,WR,ATL,3.4,270.0,112,,,,,11.488149,1.339031,0.125000,-1.0,18.25,20.75
9640,2023,17,julio jones,WR,PHI,16.4,272.0,113,,,,,8.500000,1.343000,0.100000,1.0,30.25,17.75
9646,2023,17,khadarel hodge,WR,ATL,0.0,297.0,119,,,,,8.500000,0.640297,0.083333,-1.0,18.25,20.75


In [32]:
df_te = (
    df_mod
    .loc[df_mod['Position'] == 'TE', :]
    .sort_values(['Season', 'Week', 'Position Rank'], ignore_index=True)
    .groupby(['Season', 'Week', 'Team'])
    .head(1)
)

df_te

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points,Rank,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability,Location,Team Projected Score,Opponent Projected Score
0,2020,1,george kittle,TE,SF,7.3,43.0,1,,,,,71.500000,6.464916,0.500000,1.0,26.75,20.25
1,2020,1,travis kelce,TE,KC,14.0,48.0,2,,,,,69.524775,5.557539,0.545455,1.0,31.50,22.00
2,2020,1,mark andrews,TE,BAL,20.3,70.0,3,,,,,48.500000,4.436640,0.420168,1.0,27.25,20.25
3,2020,1,zach ertz,TE,PHI,9.3,76.0,4,,,,,54.500000,5.458984,0.380228,-1.0,22.00,16.50
4,2020,1,darren waller,TE,LV,7.5,87.0,5,,,,,52.500000,4.544513,0.420168,-1.0,25.50,22.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5868,2023,17,mike gesicki,TE,NE,5.5,197.0,31,,,,,17.505537,2.431481,0.166667,-1.0,12.75,27.25
5869,2023,17,josh oliver,TE,MIN,2.2,200.0,32,,,,,10.500000,,0.181818,1.0,21.75,20.75
5871,2023,17,austin hooper,TE,LV,4.3,217.0,34,,,,,20.503353,2.462766,0.181818,-1.0,19.50,23.00
5872,2023,17,adam trautman,TE,DEN,1.4,225.0,35,,,,,5.500000,0.600127,0.200000,1.0,21.50,18.00


In [33]:
df_dst = (
    df_mod
    .loc[df_mod['Position'] == 'DST', :]
    .sort_values(['Season', 'Week', 'Position Rank'], ignore_index=True)
    .groupby(['Season', 'Week', 'Team'])
    .head(1)
)

df_dst

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points,Rank,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability,Location,Team Projected Score,Opponent Projected Score
0,2020,1,Buffalo Bills,DST,BUF,8.0,,1,,,,,,,,1.0,23.75,17.25
1,2020,1,Pittsburgh Steelers,DST,PIT,8.0,,2,,,,,,,,-1.0,25.00,19.00
2,2020,1,Baltimore Ravens,DST,BAL,15.0,,3,,,,,,,,1.0,27.25,20.25
3,2020,1,New England Patriots,DST,NE,11.0,,4,,,,,,,,1.0,24.50,17.00
4,2020,1,Philadelphia Eagles,DST,PHI,3.0,,5,,,,,,,,-1.0,22.00,16.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011,2023,17,Cincinnati Bengals,DST,CIN,4.0,,28,,,,,,,,-1.0,19.50,26.50
2012,2023,17,Detroit Lions,DST,DET,6.0,,29,,,,,,,,-1.0,24.50,29.00
2013,2023,17,New York Giants,DST,NYG,16.0,,30,,,,,,,,1.0,18.75,24.75
2014,2023,17,Arizona Cardinals,DST,ARI,2.0,,31,,,,,,,,-1.0,17.75,30.25


In [34]:
df_k = (
    df_mod
    .loc[df_mod['Position'] == 'K', :]
    .sort_values(['Season', 'Week', 'Position Rank'], ignore_index=True)
    .groupby(['Season', 'Week', 'Team'])
    .head(1)
)

df_k

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points,Rank,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability,Location,Team Projected Score,Opponent Projected Score
0,2020,1,harrison butker,K,KC,10.0,,1,,,,,,,,1.0,31.50,22.00
1,2020,1,justin tucker,K,BAL,9.0,,2,,,,,,,,1.0,27.25,20.25
2,2020,1,wil lutz,K,NO,10.0,,3,,,,,,,,1.0,26.25,22.25
3,2020,1,greg zuerlein,K,DAL,4.0,,4,,,,,,,,-1.0,25.50,26.50
4,2020,1,robbie gould,K,SF,10.0,,5,,,,,,,,1.0,26.75,20.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1955,2023,17,matt prater,K,ARI,9.0,,27,,,,,,,,-1.0,17.75,30.25
1956,2023,17,mason crosby,K,NYG,5.0,,28,,,,,,,,1.0,18.75,24.75
1957,2023,17,greg zuerlein,K,NYJ,9.0,,30,,,,,,,,-1.0,13.25,20.25
1958,2023,17,joey slye,K,WAS,5.0,,31,,,,,,,,1.0,17.25,31.25


In [35]:
df_mod = pd.concat(
    [
        df_qb,
        df_rb,
        df_wr,
        df_te,
        df_dst,
        df_k,
    ],
    ignore_index=True,
).sort_values(['Season', 'Week', 'Rank', 'Position Rank'], ignore_index=True)

df_mod

Unnamed: 0,Season,Week,Name,Position,Team,Fantasy Points,Rank,Position Rank,Adjusted Passing Yards Projection,Adjusted Passing Touchdowns Projection,Adjusted Interceptions Projection,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability,Location,Team Projected Score,Opponent Projected Score
0,2020,1,patrick mahomes,QB,KC,20.4,1.0,1,306.5,2.460615,,20.488656,,,0.250000,1.0,31.50,22.00
1,2020,1,lamar jackson,QB,BAL,27.5,2.0,2,222.5,1.528860,,60.500000,,,0.476190,1.0,27.25,20.25
2,2020,1,christian mccaffrey,RB,CAR,26.9,3.0,1,,,,73.512870,51.488656,6.544513,0.652778,1.0,22.50,25.50
3,2020,1,dak prescott,QB,DAL,17.6,4.0,3,285.5,2.495487,,11.517041,,,0.230947,-1.0,25.50,26.50
4,2020,1,russell wilson,QB,SEA,31.8,5.0,4,259.5,1.564394,,21.500000,,,0.200000,-1.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18036,2023,17,greg zuerlein,K,NYJ,9.0,,30,,,,,,,,-1.0,13.25,20.25
18037,2023,17,Arizona Cardinals,DST,ARI,2.0,,31,,,,,,,,-1.0,17.75,30.25
18038,2023,17,joey slye,K,WAS,5.0,,31,,,,,,,,1.0,17.25,31.25
18039,2023,17,Washington Commanders,DST,WAS,1.0,,32,,,,,,,,1.0,17.25,31.25


### Save

In [36]:
df_mod.to_parquet('../../data/model_data/model_data_single_output.parquet')

print('Done')

Done
