Find relevant runningbacks to webscrape

In [1]:
import pandas as pd

pd.set_option('display.max_columns', 100)

df_superflex_rankings = pd.read_parquet(r'..\..\data\fantasypros_in_season_rankings\superflex_rankings.parquet')

df_superflex_rankings = df_superflex_rankings.loc[df_superflex_rankings['Player Name'].notna(), :].reset_index(drop=True)

df_superflex_rankings

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK
0,2018,1,1,Todd Gurley II,RB,1
1,2018,1,2,David Johnson,RB,2
2,2018,1,3,Alvin Kamara,RB,3
3,2018,1,4,Melvin Gordon III,RB,4
4,2018,1,5,Ezekiel Elliott,RB,5
...,...,...,...,...,...,...
41050,2023,17,461,Steven Sims Jr.,WR,183
41051,2023,17,462,Jashaun Corbin,RB,112
41052,2023,17,463,Dee Eskridge,WR,184
41053,2023,17,464,Clayton Tune,QB,65


In [2]:
df_rb = df_superflex_rankings.loc[df_superflex_rankings['POS'] == 'RB', :].sort_values(['Year', 'Week', 'POS RK'], ignore_index=True)

df_rb = df_rb.loc[df_rb['Year'] >= 2020, :].reset_index(drop=True)  # for now

df_rb.sort_values(['Year', 'Week', 'POS RK'], ignore_index=True, inplace=True)

df_rb

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK
0,2020,1,3,Christian McCaffrey,RB,1
1,2020,1,16,Ezekiel Elliott,RB,2
2,2020,1,20,Dalvin Cook,RB,3
3,2020,1,22,Josh Jacobs,RB,4
4,2020,1,24,Saquon Barkley,RB,5
...,...,...,...,...,...,...
7994,2023,17,396,Adam Prentice,RB,108
7995,2023,17,412,Jonathan Ward,RB,109
7996,2023,17,415,Sean Tucker,RB,110
7997,2023,17,416,Ty Montgomery II,RB,111


In [3]:
df_rb.loc[df_rb['Player Name'].str.contains('-'), 'Player Name'].unique()

array(['Clyde Edwards-Helaire', 'Tony Brooks-James', 'Tyrion Davis-Price'],
      dtype=object)

In [4]:
df_rb.loc[df_rb['Player Name'].str.contains(r'(Sr\.|Jr\.)$', regex=True), 'Player Name'].unique()

  df_rb.loc[df_rb['Player Name'].str.contains(r'(Sr\.|Jr\.)$', regex=True), 'Player Name'].unique()


array(['Duke Johnson Jr.', 'Darrell Henderson Jr.', 'Benny Snell Jr.',
       'Wayne Gallman Jr.', 'Jeff Wilson Jr.', 'Alex Armah Jr.',
       "De'Angelo Henderson Sr.", 'Anthony McFarland Jr.',
       'Kerrith Whyte Jr.', 'Patrick Taylor Jr.', 'Tony Jones Jr.',
       'Adrian Killins Jr.', 'Demetric Felton Jr.', 'Pooka Williams Jr.',
       'John Kelly Jr.', 'A.J. Rose Jr.', 'Travis Etienne Jr.',
       'Pierre Strong Jr.', 'Brian Robinson Jr.', 'Chris Rodriguez Jr.',
       'SaRodorick Thompson Jr.'], dtype=object)

In [5]:
# make names easier to match
df_rb['Player Name'] = (
    df_rb['Player Name']
    .str.split().str[:2].str.join(' ')  # make names just the first two words
    .str.replace(r'[^a-zA-Z0-9\s]', '', regex=True)  # remove non-alphanumeric characters
    .str.lower()  # lowercase only (helps with matching)
)

df_rb

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK
0,2020,1,3,christian mccaffrey,RB,1
1,2020,1,16,ezekiel elliott,RB,2
2,2020,1,20,dalvin cook,RB,3
3,2020,1,22,josh jacobs,RB,4
4,2020,1,24,saquon barkley,RB,5
...,...,...,...,...,...,...
7994,2023,17,396,adam prentice,RB,108
7995,2023,17,412,jonathan ward,RB,109
7996,2023,17,415,sean tucker,RB,110
7997,2023,17,416,ty montgomery,RB,111


Filter to relevant players

In [6]:
df_teams = pd.read_parquet('../../data/fantasy_points/footballguys_half_ppr.parquet')[['Season', 'Week', 'Name', 'Team', 'Position']]

df_teams['Name'] = (
    df_teams['Name'].str.split().str[:2].str.join(' ')  # make names just the first two words
    .str.replace(r'[^a-zA-Z0-9\s]', '', regex=True)  # remove non-alphanumeric characters
    .str.lower()  # lowercase only (helps with matching)
)

df_teams = df_teams.loc[df_teams['Position'] == 'RB', :].reset_index(drop=True)

df_teams

Unnamed: 0,Season,Week,Name,Team,Position
0,2018,1,alvin kamara,NO,RB
1,2018,1,james conner,PIT,RB
2,2018,1,adrian peterson,WAS,RB
3,2018,1,joe mixon,CIN,RB
4,2018,1,isaiah crowell,NYJ,RB
...,...,...,...,...,...
9248,2023,17,hunter luepke,DAL,RB
9249,2023,17,latavius murray,BUF,RB
9250,2023,17,dare ogunbowale,HOU,RB
9251,2023,17,cordarrelle patterson,ATL,RB


Fix players with difficult names

In [7]:
df_teams.loc[df_teams['Name'] == 'ken walker', 'Name'] = 'kenneth walker'

In [8]:
df_rb = df_rb.merge(
    df_teams.rename(columns={'Season': 'Year', 'Name': 'Player Name', 'Position': 'POS'}),
    how='left',
    on=['Year', 'Week', 'Player Name', 'POS'],
)

df_rb.loc[(df_rb['Team'].isna()) & (df_rb['POS RK'] <= 25), :]

# 2022 Week 17 Bills at Bengals was cancelled

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team
1488,2020,13,73,clyde edwardshelaire,RB,18,


In [9]:
df_rb.loc[(df_rb['Team'].isna()) & (df_rb['POS RK'] <= 32*2), :].groupby(['Player Name'])['POS'].count().sort_values(ascending=False).head(10)

Player Name
ty montgomery        4
boston scott         3
jordan mason         3
leonard fournette    3
keaontay ingram      3
royce freeman        2
zamir white          2
trey sermon          2
gary brightwell      2
benny snell          2
Name: POS, dtype: int64

In [10]:
df_rb = df_rb.loc[df_rb['Team'].notna(), :].reset_index(drop=True)

df_rb

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team
0,2020,1,3,christian mccaffrey,RB,1,CAR
1,2020,1,16,ezekiel elliott,RB,2,DAL
2,2020,1,20,dalvin cook,RB,3,MIN
3,2020,1,22,josh jacobs,RB,4,LV
4,2020,1,24,saquon barkley,RB,5,NYG
...,...,...,...,...,...,...,...
6170,2023,17,369,jakob johnson,RB,102,LV
6171,2023,17,377,alex armah,RB,105,WAS
6172,2023,17,388,nick bawden,RB,107,NYJ
6173,2023,17,396,adam prentice,RB,108,NO


In [11]:
df_rb = df_rb.groupby(['Year', 'Week', 'Team']).nth(0).reset_index(drop=True)

df_rb

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team
0,2020,1,3,christian mccaffrey,RB,1,CAR
1,2020,1,16,ezekiel elliott,RB,2,DAL
2,2020,1,20,dalvin cook,RB,3,MIN
3,2020,1,22,josh jacobs,RB,4,LV
4,2020,1,24,saquon barkley,RB,5,NYG
...,...,...,...,...,...,...,...
2011,2023,17,84,jaylen warren,RB,29,PIT
2012,2023,17,89,jerome ford,RB,30,CLE
2013,2023,17,99,gus edwards,RB,32,BAL
2014,2023,17,104,khalil herbert,RB,33,CHI


In [12]:
df_rb['Player Name URL'] = df_rb['Player Name'].str.replace(' ', '-')

# fix players who are known to have issues
df_rb.loc[df_rb['Player Name'] == 'clyde edwardshelaire', 'Player Name URL'] = 'clyde-edwards-helaire'
df_rb.loc[df_rb['Player Name'] == 'benny snell', 'Player Name URL'] = 'benjamin-snell-jr'
df_rb.loc[df_rb['Player Name'] == 'jeff wilson', 'Player Name URL'] = 'jeffery-wilson'
df_rb.loc[df_rb['Player Name'] == 'tony jones', 'Player Name URL'] = 'tony-jones-rb'
df_rb.loc[df_rb['Player Name'] == 'pierre strong', 'Player Name URL'] = 'pierre-strong-jr'
df_rb.loc[df_rb['Player Name'] == 'brian robinson', 'Player Name URL'] = 'brian-robinson-jr'
df_rb.loc[df_rb['Player Name'] == 'chris rodriguez', 'Player Name URL'] = 'chris-rodriguez-jr'
df_rb.loc[df_rb['Player Name'] == 'david johnson', 'Player Name URL'] = 'david-johnson-rb'
df_rb.loc[df_rb['Player Name'] == 'ronald jones', 'Player Name URL'] = 'ronald-jones-ii'
df_rb.loc[df_rb['Player Name'] == 'damien harris', 'Player Name URL'] = 'damien-harris-rb'
df_rb.loc[df_rb['Player Name'] == 'mike davis', 'Player Name URL'] = 'mike-davis-rb'
df_rb.loc[df_rb['Player Name'] == 'adrian peterson', 'Player Name URL'] = 'adrian-peterson-min'
df_rb.loc[df_rb['Player Name'] == 'justin jackson', 'Player Name URL'] = 'justin-jackson-rb'
df_rb.loc[df_rb['Player Name'] == 'rodney smith', 'Player Name URL'] = 'rodney-smith-rb'
df_rb.loc[df_rb['Player Name'] == 'buddy howell', 'Player Name URL'] = 'gregory-howell'
df_rb.loc[df_rb['Player Name'] == 'najee harris', 'Player Name URL'] = 'najee-harris-rb'
df_rb.loc[df_rb['Player Name'] == 'kenneth walker', 'Player Name URL'] = 'kenneth-walker-rb'
df_rb.loc[df_rb['Player Name'] == 'zonovan knight', 'Player Name URL'] = 'zonovan-bam-knight'
df_rb.loc[df_rb['Player Name'] == 'michael carter', 'Player Name URL'] = 'michael-carter-rb'
df_rb.loc[df_rb['Player Name'] == 'tyrion davisprice', 'Player Name URL'] = 'tyrion-davis-price'
df_rb.loc[df_rb['Player Name'] == 'spencer brown', 'Player Name URL'] = 'spencer-brown-rb'


df_rb

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team,Player Name URL
0,2020,1,3,christian mccaffrey,RB,1,CAR,christian-mccaffrey
1,2020,1,16,ezekiel elliott,RB,2,DAL,ezekiel-elliott
2,2020,1,20,dalvin cook,RB,3,MIN,dalvin-cook
3,2020,1,22,josh jacobs,RB,4,LV,josh-jacobs
4,2020,1,24,saquon barkley,RB,5,NYG,saquon-barkley
...,...,...,...,...,...,...,...,...
2011,2023,17,84,jaylen warren,RB,29,PIT,jaylen-warren
2012,2023,17,89,jerome ford,RB,30,CLE,jerome-ford
2013,2023,17,99,gus edwards,RB,32,BAL,gus-edwards
2014,2023,17,104,khalil herbert,RB,33,CHI,khalil-herbert


In [13]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
# options.add_argument('--headless')

driver = webdriver.Chrome(options=options)

In [14]:
import time
from selenium.webdriver.common.by import By

def scrape_props(player_name, player_name_url, year, week, team):
    driver.get(url=f"https://www.bettingpros.com/nfl/odds/player-props/{player_name_url}/?season={year}&week={week}")
    time.sleep(5)

    # the page uses lazy loading
    # this loop helps all relevant elements load before scraping
    for _ in range(3):
        # find all loaded rows
        rows = driver.find_elements(
            by=By.CLASS_NAME,
            value="grouped-items-with-sticky-footer__content",
        )

        # scroll to the last loaded row, which should load subsequent rows if there are any
        if len(rows) > 0:
            driver.execute_script(f"window.scrollTo(0, {rows[-1].location['y']});")

        time.sleep(3)

    rows = driver.find_elements(
        by=By.CLASS_NAME,
        value="grouped-items-with-sticky-footer__content",
    )

    if len(rows) == 0:
        print(f"Warning: No rows for {player_name} {year} week {week}")
        
    data = {
        'Year': year,
        'Week': week,
        'Player Name': player_name,
        'Team': team,
    }
    for row in rows:
        t = row.text.split('\n')
        if t[0] == 'Rushing Yards Over/Under':
            data['Rushing Yards Projection'] = t[-4]
            data['Rushing Yards Over'] = t[-3]
            data['Rushing Yards Under'] = t[-1]
        elif t[0] == 'Receiving Yards Over/Under':
            data['Receiving Yards Projection'] = t[-4]
            data['Receiving Yards Over'] = t[-3]
            data['Receiving Yards Under'] = t[-1]
        elif t[0] == 'Receptions Over/Under':
            data['Receptions Projection'] = t[-4]
            data['Receptions Over'] = t[-3]
            data['Receptions Under'] = t[-1]
        elif t[0] == 'Anytime Touchdown Scorer':
            data['Anytime Touchdown Line'] = t[-1]

    return data

In [15]:
from tqdm.autonotebook import tqdm

d = []
for index, row in tqdm(list(df_rb.iterrows())):
    try:
        d.append(scrape_props(row['Player Name'], row['Player Name URL'], row['Year'], row['Week'], row['Team']))
    except Exception as e:
        print(f"Error with {row['Player Name'], row['Player Name URL'], row['Year'], row['Week'], row['Team']}: {e}")

  from tqdm.autonotebook import tqdm


  0%|          | 0/2016 [00:00<?, ?it/s]



In [16]:
driver.quit()

In [17]:
d

[{'Year': 2020,
  'Week': 1,
  'Player Name': 'christian mccaffrey',
  'Team': 'CAR',
  'Anytime Touchdown Line': '-188',
  'Receiving Yards Projection': 'O 51.5',
  'Receiving Yards Over': '(-105)',
  'Receiving Yards Under': '(-115)',
  'Receptions Projection': 'O 6.5',
  'Receptions Over': '(-130)',
  'Receptions Under': '(+110)',
  'Rushing Yards Projection': 'O 73.5',
  'Rushing Yards Over': '(-122)',
  'Rushing Yards Under': '(-110)'},
 {'Year': 2020,
  'Week': 1,
  'Player Name': 'ezekiel elliott',
  'Team': 'DAL',
  'Receiving Yards Projection': 'O 29.5',
  'Receiving Yards Over': '(-110)',
  'Receiving Yards Under': '(-110)',
  'Receptions Projection': 'O 3.5',
  'Receptions Over': '(-152)',
  'Receptions Under': '(+120)',
  'Rushing Yards Projection': 'O 81.5',
  'Rushing Yards Over': '(-115)',
  'Rushing Yards Under': '(EVEN)',
  'Anytime Touchdown Line': '-163'},
 {'Year': 2020,
  'Week': 1,
  'Player Name': 'dalvin cook',
  'Team': 'MIN',
  'Anytime Touchdown Line': '-138'

In [18]:
import pandas as pd

df = pd.json_normalize(d)

df

Unnamed: 0,Year,Week,Player Name,Team,Anytime Touchdown Line,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under
0,2020,1,christian mccaffrey,CAR,-188,O 51.5,(-105),(-115),O 6.5,(-130),(+110),O 73.5,(-122),(-110)
1,2020,1,ezekiel elliott,DAL,-163,O 29.5,(-110),(-110),O 3.5,(-152),(+120),O 81.5,(-115),(EVEN)
2,2020,1,dalvin cook,MIN,-138,O 28.5,(-110),(-110),O 3.5,(-130),(+110),O 74.5,(-110),(-110)
3,2020,1,josh jacobs,LV,-155,O 16.5,(-115),(-105),O 2.5,(EVEN),(-120),O 79.5,(-110),(-110)
4,2020,1,saquon barkley,NYG,-110,O 33.5,(-110),(-110),O 3.5,(-134),(-170),O 72.5,(-110),(-110)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011,2023,17,jaylen warren,PIT,+210,O 25.5,(-115),(-115),O 3.5,(-125),(-105),O 36.5,(-115),(-115)
2012,2023,17,jerome ford,CLE,+160,O 19.5,(-120),(-110),O 3.5,(+133),(-180),O 47.5,(-115),(-115)
2013,2023,17,gus edwards,BAL,EVEN,O 7.5,(-110),(-115),O 1.5,(+150),(-200),O 41.5,(-113),(-110)
2014,2023,17,khalil herbert,CHI,+175,O 7.5,(-110),(-110),O 1.5,(EVEN),(-131),O 49.5,(-115),(-110)


In [19]:
for column in ('Rushing Yards Projection', 'Receiving Yards Projection', 'Receptions Projection'):
    df[column] = df[column].str.extract(r'(\d+\.\d+)').astype(float)

df

Unnamed: 0,Year,Week,Player Name,Team,Anytime Touchdown Line,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under
0,2020,1,christian mccaffrey,CAR,-188,51.5,(-105),(-115),6.5,(-130),(+110),73.5,(-122),(-110)
1,2020,1,ezekiel elliott,DAL,-163,29.5,(-110),(-110),3.5,(-152),(+120),81.5,(-115),(EVEN)
2,2020,1,dalvin cook,MIN,-138,28.5,(-110),(-110),3.5,(-130),(+110),74.5,(-110),(-110)
3,2020,1,josh jacobs,LV,-155,16.5,(-115),(-105),2.5,(EVEN),(-120),79.5,(-110),(-110)
4,2020,1,saquon barkley,NYG,-110,33.5,(-110),(-110),3.5,(-134),(-170),72.5,(-110),(-110)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011,2023,17,jaylen warren,PIT,+210,25.5,(-115),(-115),3.5,(-125),(-105),36.5,(-115),(-115)
2012,2023,17,jerome ford,CLE,+160,19.5,(-120),(-110),3.5,(+133),(-180),47.5,(-115),(-115)
2013,2023,17,gus edwards,BAL,EVEN,7.5,(-110),(-115),1.5,(+150),(-200),41.5,(-113),(-110)
2014,2023,17,khalil herbert,CHI,+175,7.5,(-110),(-110),1.5,(EVEN),(-131),49.5,(-115),(-110)


In [20]:
for column in ('Rushing Yards Over', 'Rushing Yards Under', 'Receiving Yards Over', 'Receiving Yards Under', 'Receptions Over', 'Receptions Under'):
    df.loc[df[column].str.contains('EVEN', regex=False, na=False), column] = '100'
    df.loc[df[column] == '--', column] = 'nan'
    df[column] = df[column].str.replace(r'\(|\)', '', regex=True).astype(float)

df

Unnamed: 0,Year,Week,Player Name,Team,Anytime Touchdown Line,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under
0,2020,1,christian mccaffrey,CAR,-188,51.5,-105.0,-115.0,6.5,-130.0,110.0,73.5,-122.0,-110.0
1,2020,1,ezekiel elliott,DAL,-163,29.5,-110.0,-110.0,3.5,-152.0,120.0,81.5,-115.0,100.0
2,2020,1,dalvin cook,MIN,-138,28.5,-110.0,-110.0,3.5,-130.0,110.0,74.5,-110.0,-110.0
3,2020,1,josh jacobs,LV,-155,16.5,-115.0,-105.0,2.5,100.0,-120.0,79.5,-110.0,-110.0
4,2020,1,saquon barkley,NYG,-110,33.5,-110.0,-110.0,3.5,-134.0,-170.0,72.5,-110.0,-110.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011,2023,17,jaylen warren,PIT,+210,25.5,-115.0,-115.0,3.5,-125.0,-105.0,36.5,-115.0,-115.0
2012,2023,17,jerome ford,CLE,+160,19.5,-120.0,-110.0,3.5,133.0,-180.0,47.5,-115.0,-115.0
2013,2023,17,gus edwards,BAL,EVEN,7.5,-110.0,-115.0,1.5,150.0,-200.0,41.5,-113.0,-110.0
2014,2023,17,khalil herbert,CHI,+175,7.5,-110.0,-110.0,1.5,100.0,-131.0,49.5,-115.0,-110.0


In [21]:
for column in ('Anytime Touchdown Line', ):
    df.loc[df[column].str.contains('EVEN', regex=False, na=False), column] = '100'
    df.loc[df[column] == '--', column] = 'nan'
    df[column] = df[column].astype(float)

df

Unnamed: 0,Year,Week,Player Name,Team,Anytime Touchdown Line,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under
0,2020,1,christian mccaffrey,CAR,-188.0,51.5,-105.0,-115.0,6.5,-130.0,110.0,73.5,-122.0,-110.0
1,2020,1,ezekiel elliott,DAL,-163.0,29.5,-110.0,-110.0,3.5,-152.0,120.0,81.5,-115.0,100.0
2,2020,1,dalvin cook,MIN,-138.0,28.5,-110.0,-110.0,3.5,-130.0,110.0,74.5,-110.0,-110.0
3,2020,1,josh jacobs,LV,-155.0,16.5,-115.0,-105.0,2.5,100.0,-120.0,79.5,-110.0,-110.0
4,2020,1,saquon barkley,NYG,-110.0,33.5,-110.0,-110.0,3.5,-134.0,-170.0,72.5,-110.0,-110.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011,2023,17,jaylen warren,PIT,210.0,25.5,-115.0,-115.0,3.5,-125.0,-105.0,36.5,-115.0,-115.0
2012,2023,17,jerome ford,CLE,160.0,19.5,-120.0,-110.0,3.5,133.0,-180.0,47.5,-115.0,-115.0
2013,2023,17,gus edwards,BAL,100.0,7.5,-110.0,-115.0,1.5,150.0,-200.0,41.5,-113.0,-110.0
2014,2023,17,khalil herbert,CHI,175.0,7.5,-110.0,-110.0,1.5,100.0,-131.0,49.5,-115.0,-110.0


In [22]:
def american_odds_to_probability(odds):
    """
    Converts American betting odds to implied probability.
    
    Args:
        odds (int or float): The American odds value (e.g., -150, +200).

    Returns:
        float: The implied probability as a decimal (e.g., 0.60 for 60%).
    """
    if odds > 0:
        # Positive odds: (100 / (odds + 100))
        probability = 100 / (odds + 100)
    else:
        # Negative odds: (-odds / (-odds + 100))
        probability = -odds / (-odds + 100)
    
    return probability

Adjust projections based on over/under lines

In [23]:
for stat in ('Rushing Yards', 'Receiving Yards', 'Receptions'):
    df[f'Adjusted {stat} Projection'] = (
        df[f'{stat} Projection'] +
        0.5 * df[f'{stat} Over'].apply(american_odds_to_probability) -
        0.5 * df[f'{stat} Under'].apply(american_odds_to_probability)
    )

df

Unnamed: 0,Year,Week,Player Name,Team,Anytime Touchdown Line,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection
0,2020,1,christian mccaffrey,CAR,-188.0,51.5,-105.0,-115.0,6.5,-130.0,110.0,73.5,-122.0,-110.0,73.512870,51.488656,6.544513
1,2020,1,ezekiel elliott,DAL,-163.0,29.5,-110.0,-110.0,3.5,-152.0,120.0,81.5,-115.0,100.0,81.517442,29.500000,3.574315
2,2020,1,dalvin cook,MIN,-138.0,28.5,-110.0,-110.0,3.5,-130.0,110.0,74.5,-110.0,-110.0,74.500000,28.500000,3.544513
3,2020,1,josh jacobs,LV,-155.0,16.5,-115.0,-105.0,2.5,100.0,-120.0,79.5,-110.0,-110.0,79.500000,16.511344,2.477273
4,2020,1,saquon barkley,NYG,-110.0,33.5,-110.0,-110.0,3.5,-134.0,-170.0,72.5,-110.0,-110.0,72.500000,33.500000,3.471510
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011,2023,17,jaylen warren,PIT,210.0,25.5,-115.0,-115.0,3.5,-125.0,-105.0,36.5,-115.0,-115.0,36.500000,25.500000,3.521680
2012,2023,17,jerome ford,CLE,160.0,19.5,-120.0,-110.0,3.5,133.0,-180.0,47.5,-115.0,-115.0,47.500000,19.510823,3.393164
2013,2023,17,gus edwards,BAL,100.0,7.5,-110.0,-115.0,1.5,150.0,-200.0,41.5,-113.0,-110.0,41.503353,7.494463,1.366667
2014,2023,17,khalil herbert,CHI,175.0,7.5,-110.0,-110.0,1.5,100.0,-131.0,49.5,-115.0,-110.0,49.505537,7.500000,1.466450


In [24]:
df['Anytime Touchdown Probability'] = df['Anytime Touchdown Line'].apply(american_odds_to_probability)

df

Unnamed: 0,Year,Week,Player Name,Team,Anytime Touchdown Line,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability
0,2020,1,christian mccaffrey,CAR,-188.0,51.5,-105.0,-115.0,6.5,-130.0,110.0,73.5,-122.0,-110.0,73.512870,51.488656,6.544513,0.652778
1,2020,1,ezekiel elliott,DAL,-163.0,29.5,-110.0,-110.0,3.5,-152.0,120.0,81.5,-115.0,100.0,81.517442,29.500000,3.574315,0.619772
2,2020,1,dalvin cook,MIN,-138.0,28.5,-110.0,-110.0,3.5,-130.0,110.0,74.5,-110.0,-110.0,74.500000,28.500000,3.544513,0.579832
3,2020,1,josh jacobs,LV,-155.0,16.5,-115.0,-105.0,2.5,100.0,-120.0,79.5,-110.0,-110.0,79.500000,16.511344,2.477273,0.607843
4,2020,1,saquon barkley,NYG,-110.0,33.5,-110.0,-110.0,3.5,-134.0,-170.0,72.5,-110.0,-110.0,72.500000,33.500000,3.471510,0.523810
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011,2023,17,jaylen warren,PIT,210.0,25.5,-115.0,-115.0,3.5,-125.0,-105.0,36.5,-115.0,-115.0,36.500000,25.500000,3.521680,0.322581
2012,2023,17,jerome ford,CLE,160.0,19.5,-120.0,-110.0,3.5,133.0,-180.0,47.5,-115.0,-115.0,47.500000,19.510823,3.393164,0.384615
2013,2023,17,gus edwards,BAL,100.0,7.5,-110.0,-115.0,1.5,150.0,-200.0,41.5,-113.0,-110.0,41.503353,7.494463,1.366667,0.500000
2014,2023,17,khalil herbert,CHI,175.0,7.5,-110.0,-110.0,1.5,100.0,-131.0,49.5,-115.0,-110.0,49.505537,7.500000,1.466450,0.363636


In [25]:
df.to_parquet(r'..\..\data\betting_lines\rb1_props.parquet')

print('Done')

Done
