Find relevant quarterbacks to webscrape

In [1]:
import pandas as pd

pd.set_option('display.max_columns', 100)

df_superflex_rankings = pd.read_parquet(r'..\..\data\fantasypros_in_season_rankings\superflex_rankings.parquet')

df_superflex_rankings = df_superflex_rankings.loc[df_superflex_rankings['Player Name'].notna(), :].reset_index(drop=True)

# make names easier to match
df_superflex_rankings['Player Name'] = (
    df_superflex_rankings['Player Name']
    .str.split().str[:2].str.join(' ')  # make names just the first two words
    .str.replace(r'[^a-zA-Z0-9\s]', '', regex=True)  # remove non-alphanumeric characters
    .str.lower()  # lowercase only (helps with matching)
)

df_superflex_rankings

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK
0,2018,1,1,todd gurley,RB,1
1,2018,1,2,david johnson,RB,2
2,2018,1,3,alvin kamara,RB,3
3,2018,1,4,melvin gordon,RB,4
4,2018,1,5,ezekiel elliott,RB,5
...,...,...,...,...,...,...
41050,2023,17,461,steven sims,WR,183
41051,2023,17,462,jashaun corbin,RB,112
41052,2023,17,463,dee eskridge,WR,184
41053,2023,17,464,clayton tune,QB,65


In [2]:
df_qb = df_superflex_rankings.loc[df_superflex_rankings['POS'] == 'QB', :].sort_values(['Year', 'Week', 'POS RK'], ignore_index=True)

df_qb = df_qb.loc[df_qb['Year'] >= 2020, :].reset_index(drop=True)  # for now

df_qb.sort_values(['Year', 'Week', 'POS RK'], ignore_index=True, inplace=True)

df_qb

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK
0,2020,1,1,patrick mahomes,QB,1
1,2020,1,2,lamar jackson,QB,2
2,2020,1,4,dak prescott,QB,3
3,2020,1,5,russell wilson,QB,4
4,2020,1,6,deshaun watson,QB,5
...,...,...,...,...,...,...
3695,2023,17,453,jameis winston,QB,61
3696,2023,17,454,jeff driskel,QB,62
3697,2023,17,455,brett rypien,QB,63
3698,2023,17,456,pj walker,QB,64


Filter to relevant players

In [3]:
df_teams = pd.read_parquet('../../data/fantasy_points/footballguys_half_ppr.parquet')[['Season', 'Week', 'Name', 'Team', 'Position']]

df_teams['Name'] = (
    df_teams['Name'].str.split().str[:2].str.join(' ')  # make names just the first two words
    .str.replace(r'[^a-zA-Z0-9\s]', '', regex=True)  # remove non-alphanumeric characters
    .str.lower()  # lowercase only (helps with matching)
)

df_teams = df_teams.loc[df_teams['Position'] == 'QB', :].reset_index(drop=True)

df_teams

Unnamed: 0,Season,Week,Name,Team,Position
0,2018,1,ryan fitzpatrick,TB,QB
1,2018,1,drew brees,NO,QB
2,2018,1,patrick mahomes,KC,QB
3,2018,1,philip rivers,LAC,QB
4,2018,1,aaron rodgers,GB,QB
...,...,...,...,...,...
3608,2023,17,mike white,MIA,QB
3609,2023,17,kyle allen,BUF,QB
3610,2023,17,sam darnold,SF,QB
3611,2023,17,matt barkley,JAX,QB


In [4]:
df_qb = df_qb.merge(
    df_teams.rename(columns={'Season': 'Year', 'Name': 'Player Name', 'Position': 'POS'}),
    how='left',
    on=['Year', 'Week', 'Player Name', 'POS'],
)

df_qb.loc[(df_qb['Team'].isna()) & (df_qb['POS RK'] <= 25), :]

# 2022 Week 17 Bills at Bengals was cancelled
# 2020 Week 2 Tyrod Taylor got hurt right before the game

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team
88,2020,2,38,tyrod taylor,QB,24,


Fix players with difficult names

In [5]:
# df_teams.loc[df_teams['Name'] == 'will fuller', 'Name'] = 'william fuller'

In [6]:
df_qb.loc[(df_qb['Team'].isna()) & (df_qb['POS RK'] <= 32*1), :].groupby(['Player Name'])['POS'].count().sort_values(ascending=False).head(10)

Player Name
cj beathard          6
jameis winston       6
trey lance           6
mitchell trubisky    5
jarrett stidham      5
pj walker            5
malik willis         4
bailey zappe         4
cooper rush          3
drew lock            3
Name: POS, dtype: int64

In [7]:
df_qb = df_qb.loc[df_qb['Team'].notna(), :].reset_index(drop=True)

df_qb

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team
0,2020,1,1,patrick mahomes,QB,1,KC
1,2020,1,2,lamar jackson,QB,2,BAL
2,2020,1,4,dak prescott,QB,3,DAL
3,2020,1,5,russell wilson,QB,4,SEA
4,2020,1,6,deshaun watson,QB,5,HOU
...,...,...,...,...,...,...,...
2310,2023,17,430,desmond ridder,QB,47,ATL
2311,2023,17,431,sean clifford,QB,48,GB
2312,2023,17,436,davis mills,QB,52,HOU
2313,2023,17,450,matt barkley,QB,60,JAX


In [8]:
df_qb = df_qb.groupby(['Year', 'Week', 'Team']).nth(0).reset_index(drop=True)

df_qb

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team
0,2020,1,1,patrick mahomes,QB,1,KC
1,2020,1,2,lamar jackson,QB,2,BAL
2,2020,1,4,dak prescott,QB,3,DAL
3,2020,1,5,russell wilson,QB,4,SEA
4,2020,1,6,deshaun watson,QB,5,HOU
...,...,...,...,...,...,...,...
2000,2023,17,52,taylor heinicke,QB,28,ATL
2001,2023,17,54,cj beathard,QB,29,JAX
2002,2023,17,59,sam howell,QB,30,WAS
2003,2023,17,71,bailey zappe,QB,31,NE


In [9]:
df_qb['Player Name URL'] = df_qb['Player Name'].str.replace(' ', '-')

# fix players who are known to have issues
df_qb.loc[df_qb['Player Name'] == 'josh allen', 'Player Name URL'] = 'josh-allen-qb'
df_qb.loc[df_qb['Player Name'] == 'mitchell trubisky', 'Player Name URL'] = 'mitch-trubisky'
df_qb.loc[df_qb['Player Name'] == 'pj walker', 'Player Name URL'] = 'phillip-walker'
df_qb.loc[df_qb['Player Name'] == 'robert griffin', 'Player Name URL'] = 'robert-griffin-iii'
df_qb.loc[df_qb['Player Name'] == 'dorian thompsonrobinson', 'Player Name URL'] = 'dorian-thompson-robinson'

df_qb

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team,Player Name URL
0,2020,1,1,patrick mahomes,QB,1,KC,patrick-mahomes
1,2020,1,2,lamar jackson,QB,2,BAL,lamar-jackson
2,2020,1,4,dak prescott,QB,3,DAL,dak-prescott
3,2020,1,5,russell wilson,QB,4,SEA,russell-wilson
4,2020,1,6,deshaun watson,QB,5,HOU,deshaun-watson
...,...,...,...,...,...,...,...,...
2000,2023,17,52,taylor heinicke,QB,28,ATL,taylor-heinicke
2001,2023,17,54,cj beathard,QB,29,JAX,cj-beathard
2002,2023,17,59,sam howell,QB,30,WAS,sam-howell
2003,2023,17,71,bailey zappe,QB,31,NE,bailey-zappe


In [10]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
# options.add_argument('--headless')

driver = webdriver.Chrome(options=options)

In [11]:
import time
from selenium.webdriver.common.by import By

def scrape_props(player_name, player_name_url, year, week, team):
    driver.get(url=f"https://www.bettingpros.com/nfl/odds/player-props/{player_name_url}/?season={year}&week={week}")
    time.sleep(5)

    # the page uses lazy loading
    # this loop helps all relevant elements load before scraping
    for _ in range(3):
        # find all loaded rows
        rows = driver.find_elements(
            by=By.CLASS_NAME,
            value="grouped-items-with-sticky-footer__content",
        )

        # scroll to the last loaded row, which should load subsequent rows if there are any
        if len(rows) > 0:
            driver.execute_script(f"window.scrollTo(0, {rows[-1].location['y']});")

        time.sleep(3)

    rows = driver.find_elements(
        by=By.CLASS_NAME,
        value="grouped-items-with-sticky-footer__content",
    )

    if len(rows) == 0:
        print(f"Warning: No rows for {player_name} {year} week {week}")
        
    data = {
        'Year': year,
        'Week': week,
        'Player Name': player_name,
        'Team': team,
    }
    for row in rows:
        t = row.text.split('\n')
        if t[0] == 'Passing Touchdowns Over/Under':
            data['Passing Touchdowns Projection'] = t[-4]
            data['Passing Touchdowns Over'] = t[-3]
            data['Passing Touchdowns Under'] = t[-1]
        elif t[0] == 'Passing Yards Over/Under':
            data['Passing Yards Projection'] = t[-4]
            data['Passing Yards Over'] = t[-3]
            data['Passing Yards Under'] = t[-1]
        elif t[0] == 'Anytime Touchdown Scorer':
            data['Anytime Touchdown Line'] = t[-1]
        elif t[0] == 'Rushing Yards Over/Under':
            data['Rushing Yards Projection'] = t[-4]
            data['Rushing Yards Over'] = t[-3]
            data['Rushing Yards Under'] = t[-1]
        elif t[0] == 'Interceptions Over/Under':
            data['Interceptions Projection'] = t[-4]
            data['Interceptions Over'] = t[-3]
            data['Interceptions Under'] = t[-1]

    return data

In [12]:
from tqdm.autonotebook import tqdm

d = []
for index, row in tqdm(list(df_qb.iterrows())):
    try:
        d.append(scrape_props(row['Player Name'], row['Player Name URL'], row['Year'], row['Week'], row['Team']))
    except Exception as e:
        print(f"Error with {row['Player Name'], row['Player Name URL'], row['Year'], row['Week'], row['Team']}: {e}")

  from tqdm.autonotebook import tqdm


  0%|          | 0/2005 [00:00<?, ?it/s]



In [13]:
driver.quit()

In [14]:
d

[{'Year': 2020,
  'Week': 1,
  'Player Name': 'patrick mahomes',
  'Team': 'KC',
  'Passing Touchdowns Projection': 'O 2.5',
  'Passing Touchdowns Over': '(+104)',
  'Passing Touchdowns Under': '(-132)',
  'Passing Yards Projection': 'O 306.5',
  'Passing Yards Over': '(-110)',
  'Passing Yards Under': '(-110)',
  'Anytime Touchdown Line': '+300',
  'Rushing Yards Projection': 'O 20.5',
  'Rushing Yards Over': '(-105)',
  'Rushing Yards Under': '(-115)'},
 {'Year': 2020,
  'Week': 1,
  'Player Name': 'lamar jackson',
  'Team': 'BAL',
  'Passing Touchdowns Projection': 'O 1.5',
  'Passing Touchdowns Over': '(-152)',
  'Passing Touchdowns Under': '(-120)',
  'Passing Yards Projection': 'O 222.5',
  'Passing Yards Over': '(-110)',
  'Passing Yards Under': '(-110)',
  'Anytime Touchdown Line': '+110',
  'Rushing Yards Projection': 'O 60.5',
  'Rushing Yards Over': '(-110)',
  'Rushing Yards Under': '(-110)'},
 {'Year': 2020,
  'Week': 1,
  'Player Name': 'dak prescott',
  'Team': 'DAL',
  

In [19]:
import pandas as pd

df = pd.json_normalize(d)

df

Unnamed: 0,Year,Week,Player Name,Team,Passing Touchdowns Projection,Passing Touchdowns Over,Passing Touchdowns Under,Passing Yards Projection,Passing Yards Over,Passing Yards Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Interceptions Projection,Interceptions Over,Interceptions Under
0,2020,1,patrick mahomes,KC,O 2.5,(+104),(-132),O 306.5,(-110),(-110),+300,O 20.5,(-105),(-115),,,
1,2020,1,lamar jackson,BAL,O 1.5,(-152),(-120),O 222.5,(-110),(-110),+110,O 60.5,(-110),(-110),,,
2,2020,1,dak prescott,DAL,O 2.5,(-175),(-182),O 285.5,(-110),(-110),+333,O 11.5,(-132),(-115),,,
3,2020,1,russell wilson,SEA,O 1.5,(-140),(+120),O 259.5,(-110),(-110),+400,O 21.5,(-110),(-110),,,
4,2020,1,deshaun watson,HOU,O 1.5,(-152),(+120),O 275.5,(-110),(-110),+190,O 30.5,(-122),(-110),,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2000,2023,17,taylor heinicke,ATL,O 1.5,(+195),(-270),O 203.5,(-110),(-115),+550,O 10.5,(-115),(-111),O 0.5,(-120),(-110)
2001,2023,17,cj beathard,JAX,O 1.5,(+160),(-214),O 214.5,(-111),(-115),+650,O 10.5,(-115),(-115),O 0.5,(-150),(+115)
2002,2023,17,sam howell,WAS,O 1.5,(+135),(-175),O 234.5,(-115),(-115),+550,O 12.5,(-111),(-110),O 0.5,(-204),(+155)
2003,2023,17,bailey zappe,NE,O 0.5,(-184),(+140),O 195.5,(-115),(-115),+1600,O 5.5,(-110),(-120),O 0.5,(-185),(+140)


In [20]:
for column in ('Passing Touchdowns Projection', 'Passing Yards Projection', 'Rushing Yards Projection', 'Interceptions Projection'):
    df[column] = df[column].str.extract(r'(\d+\.\d+)').astype(float)

df

Unnamed: 0,Year,Week,Player Name,Team,Passing Touchdowns Projection,Passing Touchdowns Over,Passing Touchdowns Under,Passing Yards Projection,Passing Yards Over,Passing Yards Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Interceptions Projection,Interceptions Over,Interceptions Under
0,2020,1,patrick mahomes,KC,2.5,(+104),(-132),306.5,(-110),(-110),+300,20.5,(-105),(-115),,,
1,2020,1,lamar jackson,BAL,1.5,(-152),(-120),222.5,(-110),(-110),+110,60.5,(-110),(-110),,,
2,2020,1,dak prescott,DAL,2.5,(-175),(-182),285.5,(-110),(-110),+333,11.5,(-132),(-115),,,
3,2020,1,russell wilson,SEA,1.5,(-140),(+120),259.5,(-110),(-110),+400,21.5,(-110),(-110),,,
4,2020,1,deshaun watson,HOU,1.5,(-152),(+120),275.5,(-110),(-110),+190,30.5,(-122),(-110),,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2000,2023,17,taylor heinicke,ATL,1.5,(+195),(-270),203.5,(-110),(-115),+550,10.5,(-115),(-111),0.5,(-120),(-110)
2001,2023,17,cj beathard,JAX,1.5,(+160),(-214),214.5,(-111),(-115),+650,10.5,(-115),(-115),0.5,(-150),(+115)
2002,2023,17,sam howell,WAS,1.5,(+135),(-175),234.5,(-115),(-115),+550,12.5,(-111),(-110),0.5,(-204),(+155)
2003,2023,17,bailey zappe,NE,0.5,(-184),(+140),195.5,(-115),(-115),+1600,5.5,(-110),(-120),0.5,(-185),(+140)


In [21]:
for column in ('Passing Touchdowns Over', 'Passing Touchdowns Under', 'Passing Yards Over', 'Passing Yards Under', 'Rushing Yards Over', 'Rushing Yards Under', 'Interceptions Over', 'Interceptions Under'):
    df.loc[df[column].str.contains('EVEN', regex=False, na=False), column] = '100'
    df.loc[df[column] == '--', column] = 'nan'
    df.loc[df[column] == 'NL', column] = 'nan'
    df[column] = df[column].str.replace(r'\(|\)', '', regex=True).astype(float)

df

Unnamed: 0,Year,Week,Player Name,Team,Passing Touchdowns Projection,Passing Touchdowns Over,Passing Touchdowns Under,Passing Yards Projection,Passing Yards Over,Passing Yards Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Interceptions Projection,Interceptions Over,Interceptions Under
0,2020,1,patrick mahomes,KC,2.5,104.0,-132.0,306.5,-110.0,-110.0,+300,20.5,-105.0,-115.0,,,
1,2020,1,lamar jackson,BAL,1.5,-152.0,-120.0,222.5,-110.0,-110.0,+110,60.5,-110.0,-110.0,,,
2,2020,1,dak prescott,DAL,2.5,-175.0,-182.0,285.5,-110.0,-110.0,+333,11.5,-132.0,-115.0,,,
3,2020,1,russell wilson,SEA,1.5,-140.0,120.0,259.5,-110.0,-110.0,+400,21.5,-110.0,-110.0,,,
4,2020,1,deshaun watson,HOU,1.5,-152.0,120.0,275.5,-110.0,-110.0,+190,30.5,-122.0,-110.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2000,2023,17,taylor heinicke,ATL,1.5,195.0,-270.0,203.5,-110.0,-115.0,+550,10.5,-115.0,-111.0,0.5,-120.0,-110.0
2001,2023,17,cj beathard,JAX,1.5,160.0,-214.0,214.5,-111.0,-115.0,+650,10.5,-115.0,-115.0,0.5,-150.0,115.0
2002,2023,17,sam howell,WAS,1.5,135.0,-175.0,234.5,-115.0,-115.0,+550,12.5,-111.0,-110.0,0.5,-204.0,155.0
2003,2023,17,bailey zappe,NE,0.5,-184.0,140.0,195.5,-115.0,-115.0,+1600,5.5,-110.0,-120.0,0.5,-185.0,140.0


In [22]:
for column in ('Anytime Touchdown Line', ):
    df.loc[df[column].str.contains('EVEN', regex=False, na=False), column] = '100'
    df.loc[df[column] == '--', column] = 'nan'
    df.loc[df[column] == 'NL', column] = 'nan'
    df[column] = df[column].astype(float)

df

Unnamed: 0,Year,Week,Player Name,Team,Passing Touchdowns Projection,Passing Touchdowns Over,Passing Touchdowns Under,Passing Yards Projection,Passing Yards Over,Passing Yards Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Interceptions Projection,Interceptions Over,Interceptions Under
0,2020,1,patrick mahomes,KC,2.5,104.0,-132.0,306.5,-110.0,-110.0,300.0,20.5,-105.0,-115.0,,,
1,2020,1,lamar jackson,BAL,1.5,-152.0,-120.0,222.5,-110.0,-110.0,110.0,60.5,-110.0,-110.0,,,
2,2020,1,dak prescott,DAL,2.5,-175.0,-182.0,285.5,-110.0,-110.0,333.0,11.5,-132.0,-115.0,,,
3,2020,1,russell wilson,SEA,1.5,-140.0,120.0,259.5,-110.0,-110.0,400.0,21.5,-110.0,-110.0,,,
4,2020,1,deshaun watson,HOU,1.5,-152.0,120.0,275.5,-110.0,-110.0,190.0,30.5,-122.0,-110.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2000,2023,17,taylor heinicke,ATL,1.5,195.0,-270.0,203.5,-110.0,-115.0,550.0,10.5,-115.0,-111.0,0.5,-120.0,-110.0
2001,2023,17,cj beathard,JAX,1.5,160.0,-214.0,214.5,-111.0,-115.0,650.0,10.5,-115.0,-115.0,0.5,-150.0,115.0
2002,2023,17,sam howell,WAS,1.5,135.0,-175.0,234.5,-115.0,-115.0,550.0,12.5,-111.0,-110.0,0.5,-204.0,155.0
2003,2023,17,bailey zappe,NE,0.5,-184.0,140.0,195.5,-115.0,-115.0,1600.0,5.5,-110.0,-120.0,0.5,-185.0,140.0


In [23]:
def american_odds_to_probability(odds):
    """
    Converts American betting odds to implied probability.
    
    Args:
        odds (int or float): The American odds value (e.g., -150, +200).

    Returns:
        float: The implied probability as a decimal (e.g., 0.60 for 60%).
    """
    if odds > 0:
        # Positive odds: (100 / (odds + 100))
        probability = 100 / (odds + 100)
    else:
        # Negative odds: (-odds / (-odds + 100))
        probability = -odds / (-odds + 100)
    
    return probability

Adjust projections based on over/under lines

In [24]:
for stat in ('Passing Touchdowns', 'Passing Yards', 'Rushing Yards', 'Interceptions'):
    df[f'Adjusted {stat} Projection'] = (
        df[f'{stat} Projection'] +
        0.5 * df[f'{stat} Over'].apply(american_odds_to_probability) -
        0.5 * df[f'{stat} Under'].apply(american_odds_to_probability)
    )

df

Unnamed: 0,Year,Week,Player Name,Team,Passing Touchdowns Projection,Passing Touchdowns Over,Passing Touchdowns Under,Passing Yards Projection,Passing Yards Over,Passing Yards Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Interceptions Projection,Interceptions Over,Interceptions Under,Adjusted Passing Touchdowns Projection,Adjusted Passing Yards Projection,Adjusted Rushing Yards Projection,Adjusted Interceptions Projection
0,2020,1,patrick mahomes,KC,2.5,104.0,-132.0,306.5,-110.0,-110.0,300.0,20.5,-105.0,-115.0,,,,2.460615,306.500000,20.488656,
1,2020,1,lamar jackson,BAL,1.5,-152.0,-120.0,222.5,-110.0,-110.0,110.0,60.5,-110.0,-110.0,,,,1.528860,222.500000,60.500000,
2,2020,1,dak prescott,DAL,2.5,-175.0,-182.0,285.5,-110.0,-110.0,333.0,11.5,-132.0,-115.0,,,,2.495487,285.500000,11.517041,
3,2020,1,russell wilson,SEA,1.5,-140.0,120.0,259.5,-110.0,-110.0,400.0,21.5,-110.0,-110.0,,,,1.564394,259.500000,21.500000,
4,2020,1,deshaun watson,HOU,1.5,-152.0,120.0,275.5,-110.0,-110.0,190.0,30.5,-122.0,-110.0,,,,1.574315,275.500000,30.512870,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2000,2023,17,taylor heinicke,ATL,1.5,195.0,-270.0,203.5,-110.0,-115.0,550.0,10.5,-115.0,-111.0,0.5,-120.0,-110.0,1.304627,203.494463,10.504409,0.510823
2001,2023,17,cj beathard,JAX,1.5,160.0,-214.0,214.5,-111.0,-115.0,650.0,10.5,-115.0,-115.0,0.5,-150.0,115.0,1.351543,214.495591,10.500000,0.567442
2002,2023,17,sam howell,WAS,1.5,135.0,-175.0,234.5,-115.0,-115.0,550.0,12.5,-111.0,-110.0,0.5,-204.0,155.0,1.394584,234.500000,12.501128,0.639448
2003,2023,17,bailey zappe,NE,0.5,-184.0,140.0,195.5,-115.0,-115.0,1600.0,5.5,-110.0,-120.0,0.5,-185.0,140.0,0.615610,195.500000,5.489177,0.616228


In [25]:
df['Anytime Touchdown Probability'] = df['Anytime Touchdown Line'].apply(american_odds_to_probability)

df

Unnamed: 0,Year,Week,Player Name,Team,Passing Touchdowns Projection,Passing Touchdowns Over,Passing Touchdowns Under,Passing Yards Projection,Passing Yards Over,Passing Yards Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Interceptions Projection,Interceptions Over,Interceptions Under,Adjusted Passing Touchdowns Projection,Adjusted Passing Yards Projection,Adjusted Rushing Yards Projection,Adjusted Interceptions Projection,Anytime Touchdown Probability
0,2020,1,patrick mahomes,KC,2.5,104.0,-132.0,306.5,-110.0,-110.0,300.0,20.5,-105.0,-115.0,,,,2.460615,306.500000,20.488656,,0.250000
1,2020,1,lamar jackson,BAL,1.5,-152.0,-120.0,222.5,-110.0,-110.0,110.0,60.5,-110.0,-110.0,,,,1.528860,222.500000,60.500000,,0.476190
2,2020,1,dak prescott,DAL,2.5,-175.0,-182.0,285.5,-110.0,-110.0,333.0,11.5,-132.0,-115.0,,,,2.495487,285.500000,11.517041,,0.230947
3,2020,1,russell wilson,SEA,1.5,-140.0,120.0,259.5,-110.0,-110.0,400.0,21.5,-110.0,-110.0,,,,1.564394,259.500000,21.500000,,0.200000
4,2020,1,deshaun watson,HOU,1.5,-152.0,120.0,275.5,-110.0,-110.0,190.0,30.5,-122.0,-110.0,,,,1.574315,275.500000,30.512870,,0.344828
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2000,2023,17,taylor heinicke,ATL,1.5,195.0,-270.0,203.5,-110.0,-115.0,550.0,10.5,-115.0,-111.0,0.5,-120.0,-110.0,1.304627,203.494463,10.504409,0.510823,0.153846
2001,2023,17,cj beathard,JAX,1.5,160.0,-214.0,214.5,-111.0,-115.0,650.0,10.5,-115.0,-115.0,0.5,-150.0,115.0,1.351543,214.495591,10.500000,0.567442,0.133333
2002,2023,17,sam howell,WAS,1.5,135.0,-175.0,234.5,-115.0,-115.0,550.0,12.5,-111.0,-110.0,0.5,-204.0,155.0,1.394584,234.500000,12.501128,0.639448,0.153846
2003,2023,17,bailey zappe,NE,0.5,-184.0,140.0,195.5,-115.0,-115.0,1600.0,5.5,-110.0,-120.0,0.5,-185.0,140.0,0.615610,195.500000,5.489177,0.616228,0.058824


In [26]:
df.to_parquet(r'..\..\data\betting_lines\qb1_props.parquet')

print('Done')

Done
