Find relevant wide receivers to webscrape

In [1]:
import pandas as pd

pd.set_option('display.max_columns', 100)

df_superflex_rankings = pd.read_parquet(r'..\..\data\fantasypros_in_season_rankings\superflex_rankings.parquet')

df_superflex_rankings = df_superflex_rankings.loc[df_superflex_rankings['Player Name'].notna(), :].reset_index(drop=True)

df_superflex_rankings

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK
0,2018,1,1,Todd Gurley II,RB,1
1,2018,1,2,David Johnson,RB,2
2,2018,1,3,Alvin Kamara,RB,3
3,2018,1,4,Melvin Gordon III,RB,4
4,2018,1,5,Ezekiel Elliott,RB,5
...,...,...,...,...,...,...
41050,2023,17,461,Steven Sims Jr.,WR,183
41051,2023,17,462,Jashaun Corbin,RB,112
41052,2023,17,463,Dee Eskridge,WR,184
41053,2023,17,464,Clayton Tune,QB,65


In [2]:
df_wr = df_superflex_rankings.loc[df_superflex_rankings['POS'] == 'WR', :].sort_values(['Year', 'Week', 'POS RK'], ignore_index=True)

df_wr = df_wr.loc[df_wr['Year'] >= 2020, :].reset_index(drop=True)  # for now

df_wr.sort_values(['Year', 'Week', 'POS RK'], ignore_index=True, inplace=True)

df_wr

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK
0,2020,1,21,Michael Thomas,WR,1
1,2020,1,33,Davante Adams,WR,2
2,2020,1,36,Julio Jones,WR,3
3,2020,1,37,Tyreek Hill,WR,4
4,2020,1,45,Chris Godwin,WR,5
...,...,...,...,...,...,...
11449,2023,17,458,Lucky Jackson,WR,180
11450,2023,17,459,DJ Turner,WR,181
11451,2023,17,460,Laquon Treadwell,WR,182
11452,2023,17,461,Steven Sims Jr.,WR,183


In [3]:
df_wr.loc[df_wr['Player Name'].str.contains('-'), 'Player Name'].unique()

array(['JuJu Smith-Schuster', 'Marquez Valdes-Scantling',
       'J.J. Arcega-Whiteside', 'Antonio Gandy-Golden',
       'Ray-Ray McCloud III', 'Nick Westbrook-Ikhine',
       'Tyron Billy-Johnson', 'Donovan Peoples-Jones',
       'Amon-Ra St. Brown', 'Ihmir Smith-Marsette',
       'Anthony Ratliff-Williams', 'Jaxon Smith-Njigba'], dtype=object)

In [4]:
df_wr.loc[df_wr['Player Name'].str.contains(r'(Sr\.|Jr\.)$', regex=True), 'Player Name'].unique()

  df_wr.loc[df_wr['Player Name'].str.contains(r'(Sr\.|Jr\.)$', regex=True), 'Player Name'].unique()


array(['DJ Chark Jr.', 'Odell Beckham Jr.', 'Marvin Jones Jr.',
       'Mecole Hardman Jr.', 'Steven Sims Jr.', 'Michael Pittman Jr.',
       'Laviska Shenault Jr.', 'Jakeem Grant Sr.', 'Ted Ginn Jr.',
       'Keelan Cole Sr.', 'Mohamed Sanu Sr.', 'KJ Hill Jr.',
       'Cedrick Wilson Jr.', 'Jason Moore Jr.', 'Richie James Jr.',
       'Trent Sherfield Sr.', 'Steven Mitchell Jr.', 'Cyril Grayson Jr.',
       'Lynn Bowden Jr.', 'Victor Bolden Jr.', 'Paul Richardson Jr.',
       'Deebo Samuel Sr.', 'Stanley Morgan Jr.', 'Terrace Marshall Jr.',
       'Easop Winston Jr.', 'Jeff Cotton Jr.', 'Kaylon Geiger Sr.',
       'Justin Hardee Sr.', 'Velus Jones Jr.', 'Marvin Mims Jr.',
       'Austin Watkins Jr.'], dtype=object)

In [5]:
# make names easier to match
df_wr['Player Name'] = (
    df_wr['Player Name']
    .str.split().str[:2].str.join(' ')  # make names just the first two words
    .str.replace(r'[^a-zA-Z0-9\s]', '', regex=True)  # remove non-alphanumeric characters
    .str.lower()  # lowercase only (helps with matching)
)

df_wr

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK
0,2020,1,21,michael thomas,WR,1
1,2020,1,33,davante adams,WR,2
2,2020,1,36,julio jones,WR,3
3,2020,1,37,tyreek hill,WR,4
4,2020,1,45,chris godwin,WR,5
...,...,...,...,...,...,...
11449,2023,17,458,lucky jackson,WR,180
11450,2023,17,459,dj turner,WR,181
11451,2023,17,460,laquon treadwell,WR,182
11452,2023,17,461,steven sims,WR,183


Filter to relevant players

In [6]:
df_teams = pd.read_parquet('../../data/fantasy_points/footballguys_half_ppr.parquet')[['Season', 'Week', 'Name', 'Team', 'Position']]

df_teams['Name'] = (
    df_teams['Name'].str.split().str[:2].str.join(' ')  # make names just the first two words
    .str.replace(r'[^a-zA-Z0-9\s]', '', regex=True)  # remove non-alphanumeric characters
    .str.lower()  # lowercase only (helps with matching)
)

df_teams = df_teams.loc[df_teams['Position'] == 'WR', :].reset_index(drop=True)

df_teams

Unnamed: 0,Season,Week,Name,Team,Position
0,2018,1,tyreek hill,KC,WR
1,2018,1,michael thomas,NO,WR
2,2018,1,desean jackson,TB,WR
3,2018,1,randall cobb,GB,WR
4,2018,1,kenny stills,MIA,WR
...,...,...,...,...,...
14200,2023,17,laquon treadwell,BAL,WR
14201,2023,17,kavontae turpin,DAL,WR
14202,2023,17,tylan wallace,BAL,WR
14203,2023,17,olamide zaccheaus,PHI,WR


Fix players with difficult names

In [7]:
df_teams.loc[df_teams['Name'] == 'will fuller', 'Name'] = 'william fuller'

In [8]:
df_wr = df_wr.merge(
    df_teams.rename(columns={'Season': 'Year', 'Name': 'Player Name', 'Position': 'POS'}),
    how='left',
    on=['Year', 'Week', 'Player Name', 'POS'],
)

df_wr.loc[(df_wr['Team'].isna()) & (df_wr['POS RK'] <= 25), :]

# 2022 Week 17 Bills at Bengals was cancelled

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team
8359,2022,17,20,jamarr chase,WR,2,


In [9]:
df_wr.loc[(df_wr['Team'].isna()) & (df_wr['POS RK'] <= 32*3), :].groupby(['Player Name'])['POS'].count()

Player Name
aj green                   1
albert wilson              2
antonio gandygolden        1
bisi johnson               3
brandon zylstra            1
braxton berrios            1
dee eskridge               1
denzel mims                1
donovan peoplesjones       1
gabe davis                 1
greg dortch                1
jamarr chase               1
jarvis landry              1
josh reynolds              1
khadarel hodge             1
marquez valdesscantling    1
mecole hardman             1
parris campbell            1
rayray mccloud             1
terrace marshall           2
tom kennedy                1
trenton irwin              1
Name: POS, dtype: int64

In [10]:
df_wr = df_wr.loc[df_wr['Team'].notna(), :].reset_index(drop=True)

df_wr

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team
0,2020,1,21,michael thomas,WR,1,NO
1,2020,1,33,davante adams,WR,2,GB
2,2020,1,36,julio jones,WR,3,ATL
3,2020,1,37,tyreek hill,WR,4,KC
4,2020,1,45,chris godwin,WR,5,TB
...,...,...,...,...,...,...,...
9697,2023,17,414,jalen brooks,WR,168,DAL
9698,2023,17,423,keelan doss,WR,171,LAC
9699,2023,17,448,austin trammell,WR,177,LAR
9700,2023,17,458,lucky jackson,WR,180,MIN


In [11]:
df_wr = df_wr.groupby(['Year', 'Week', 'Team']).nth(2).reset_index(drop=True)

df_wr

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team
0,2020,1,114,ceedee lamb,WR,41,DAL
1,2020,1,140,sammy watkins,WR,51,KC
2,2020,1,145,curtis samuel,WR,52,CAR
3,2020,1,151,randall cobb,WR,56,HOU
4,2020,1,153,larry fitzgerald,WR,58,ARI
...,...,...,...,...,...,...,...
2008,2023,17,250,david bell,WR,103,CLE
2009,2023,17,266,equanimeous st,WR,110,CHI
2010,2023,17,272,julio jones,WR,113,PHI
2011,2023,17,297,khadarel hodge,WR,119,ATL


In [12]:
df_wr['Player Name URL'] = df_wr['Player Name'].str.replace(' ', '-')

# fix players who are known to have issues
df_wr.loc[df_wr['Player Name'] == 'juju smithschuster', 'Player Name URL'] = 'juju-smith-schuster'
df_wr.loc[df_wr['Player Name'] == 'jj arcegawhiteside', 'Player Name URL'] = 'jj-arcega-whiteside'
df_wr.loc[df_wr['Player Name'] == 'rayray mccloud', 'Player Name URL'] = 'ray-ray-mccloud'
df_wr.loc[df_wr['Player Name'] == 'donovan peoplesjones', 'Player Name URL'] = 'donovan-peoples-jones'
df_wr.loc[df_wr['Player Name'] == 'amonra st', 'Player Name URL'] = 'amonra-stbrown'
df_wr.loc[df_wr['Player Name'] == 'jaxon smithnjigba', 'Player Name URL'] = 'jaxon-smith-njigba'
df_wr.loc[df_wr['Player Name'] == 'ted ginn', 'Player Name URL'] = 'ted-ginn-jr'


df_wr

Unnamed: 0,Year,Week,RK,Player Name,POS,POS RK,Team,Player Name URL
0,2020,1,114,ceedee lamb,WR,41,DAL,ceedee-lamb
1,2020,1,140,sammy watkins,WR,51,KC,sammy-watkins
2,2020,1,145,curtis samuel,WR,52,CAR,curtis-samuel
3,2020,1,151,randall cobb,WR,56,HOU,randall-cobb
4,2020,1,153,larry fitzgerald,WR,58,ARI,larry-fitzgerald
...,...,...,...,...,...,...,...,...
2008,2023,17,250,david bell,WR,103,CLE,david-bell
2009,2023,17,266,equanimeous st,WR,110,CHI,equanimeous-st
2010,2023,17,272,julio jones,WR,113,PHI,julio-jones
2011,2023,17,297,khadarel hodge,WR,119,ATL,khadarel-hodge


In [13]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
# options.add_argument('--headless')

driver = webdriver.Chrome(options=options)

In [14]:
import time
from selenium.webdriver.common.by import By

def scrape_props(player_name, player_name_url, year, week, team):
    driver.get(url=f"https://www.bettingpros.com/nfl/odds/player-props/{player_name_url}/?season={year}&week={week}")
    time.sleep(5)

    # the page uses lazy loading
    # this loop helps all relevant elements load before scraping
    for _ in range(3):
        # find all loaded rows
        rows = driver.find_elements(
            by=By.CLASS_NAME,
            value="grouped-items-with-sticky-footer__content",
        )

        # scroll to the last loaded row, which should load subsequent rows if there are any
        if len(rows) > 0:
            driver.execute_script(f"window.scrollTo(0, {rows[-1].location['y']});")

        time.sleep(3)

    rows = driver.find_elements(
        by=By.CLASS_NAME,
        value="grouped-items-with-sticky-footer__content",
    )

    if len(rows) == 0:
        print(f"Warning: No rows for {player_name} {year} week {week}")
        
    data = {
        'Year': year,
        'Week': week,
        'Player Name': player_name,
        'Team': team,
    }
    for row in rows:
        t = row.text.split('\n')
        if t[0] == 'Rushing Yards Over/Under':
            data['Rushing Yards Projection'] = t[-4]
            data['Rushing Yards Over'] = t[-3]
            data['Rushing Yards Under'] = t[-1]
        elif t[0] == 'Receiving Yards Over/Under':
            data['Receiving Yards Projection'] = t[-4]
            data['Receiving Yards Over'] = t[-3]
            data['Receiving Yards Under'] = t[-1]
        elif t[0] == 'Receptions Over/Under':
            data['Receptions Projection'] = t[-4]
            data['Receptions Over'] = t[-3]
            data['Receptions Under'] = t[-1]
        elif t[0] == 'Anytime Touchdown Scorer':
            data['Anytime Touchdown Line'] = t[-1]

    return data

In [15]:
from tqdm.autonotebook import tqdm

d = []
for index, row in tqdm(list(df_wr.iterrows())):
    try:
        d.append(scrape_props(row['Player Name'], row['Player Name URL'], row['Year'], row['Week'], row['Team']))
    except Exception as e:
        print(f"Error with {row['Player Name'], row['Player Name URL'], row['Year'], row['Week'], row['Team']}: {e}")

  from tqdm.autonotebook import tqdm


  0%|          | 0/2013 [00:00<?, ?it/s]



In [16]:
driver.quit()

In [17]:
d

[{'Year': 2020,
  'Week': 1,
  'Player Name': 'ceedee lamb',
  'Team': 'DAL',
  'Receiving Yards Projection': 'O 50.5',
  'Receiving Yards Over': '(-110)',
  'Receiving Yards Under': '(-110)',
  'Receptions Projection': 'O 3.5',
  'Receptions Over': '(+125)',
  'Receptions Under': '(-145)',
  'Anytime Touchdown Line': '+260'},
 {'Year': 2020,
  'Week': 1,
  'Player Name': 'sammy watkins',
  'Team': 'KC',
  'Receptions Projection': 'O 3.5',
  'Receptions Over': '(+108)',
  'Receptions Under': '(-136)',
  'Anytime Touchdown Line': '+240',
  'Receiving Yards Projection': 'O 42.5',
  'Receiving Yards Over': '(-110)',
  'Receiving Yards Under': '(-110)'},
 {'Year': 2020,
  'Week': 1,
  'Player Name': 'curtis samuel',
  'Team': 'CAR',
  'Anytime Touchdown Line': '+275',
  'Receiving Yards Projection': 'O 36.5',
  'Receiving Yards Over': '(-110)',
  'Receiving Yards Under': '(-110)',
  'Receptions Projection': 'O 3.5',
  'Receptions Over': '(+125)',
  'Receptions Under': '(-145)'},
 {'Year': 

In [18]:
import pandas as pd

df = pd.json_normalize(d)

df

Unnamed: 0,Year,Week,Player Name,Team,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under
0,2020,1,ceedee lamb,DAL,O 50.5,(-110),(-110),O 3.5,(+125),(-145),+260,,,
1,2020,1,sammy watkins,KC,O 42.5,(-110),(-110),O 3.5,(+108),(-136),+240,,,
2,2020,1,curtis samuel,CAR,O 36.5,(-110),(-110),O 3.5,(+125),(-145),+275,,,
3,2020,1,randall cobb,HOU,O 35.5,(-110),(-110),O 2.5,(-172),(+134),+350,,,
4,2020,1,larry fitzgerald,ARI,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2008,2023,17,david bell,CLE,O 13.5,(-110),(-120),O 1.5,(-113),(-113),+600,,,
2009,2023,17,equanimeous st,CHI,,,,,,,,,,
2010,2023,17,julio jones,PHI,O 8.5,(-110),(-110),O 1.5,(+165),(-224),+900,,,
2011,2023,17,khadarel hodge,ATL,O 8.5,(-110),(-110),O 0.5,(-210),(+152),+1100,,,


In [19]:
for column in ('Rushing Yards Projection', 'Receiving Yards Projection', 'Receptions Projection'):
    df[column] = df[column].str.extract(r'(\d+\.\d+)').astype(float)

df

Unnamed: 0,Year,Week,Player Name,Team,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under
0,2020,1,ceedee lamb,DAL,50.5,(-110),(-110),3.5,(+125),(-145),+260,,,
1,2020,1,sammy watkins,KC,42.5,(-110),(-110),3.5,(+108),(-136),+240,,,
2,2020,1,curtis samuel,CAR,36.5,(-110),(-110),3.5,(+125),(-145),+275,,,
3,2020,1,randall cobb,HOU,35.5,(-110),(-110),2.5,(-172),(+134),+350,,,
4,2020,1,larry fitzgerald,ARI,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2008,2023,17,david bell,CLE,13.5,(-110),(-120),1.5,(-113),(-113),+600,,,
2009,2023,17,equanimeous st,CHI,,,,,,,,,,
2010,2023,17,julio jones,PHI,8.5,(-110),(-110),1.5,(+165),(-224),+900,,,
2011,2023,17,khadarel hodge,ATL,8.5,(-110),(-110),0.5,(-210),(+152),+1100,,,


In [20]:
for column in ('Rushing Yards Over', 'Rushing Yards Under', 'Receiving Yards Over', 'Receiving Yards Under', 'Receptions Over', 'Receptions Under'):
    df.loc[df[column].str.contains('EVEN', regex=False, na=False), column] = '100'
    df.loc[df[column] == '--', column] = 'nan'
    df[column] = df[column].str.replace(r'\(|\)', '', regex=True).astype(float)

df

Unnamed: 0,Year,Week,Player Name,Team,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under
0,2020,1,ceedee lamb,DAL,50.5,-110.0,-110.0,3.5,125.0,-145.0,+260,,,
1,2020,1,sammy watkins,KC,42.5,-110.0,-110.0,3.5,108.0,-136.0,+240,,,
2,2020,1,curtis samuel,CAR,36.5,-110.0,-110.0,3.5,125.0,-145.0,+275,,,
3,2020,1,randall cobb,HOU,35.5,-110.0,-110.0,2.5,-172.0,134.0,+350,,,
4,2020,1,larry fitzgerald,ARI,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2008,2023,17,david bell,CLE,13.5,-110.0,-120.0,1.5,-113.0,-113.0,+600,,,
2009,2023,17,equanimeous st,CHI,,,,,,,,,,
2010,2023,17,julio jones,PHI,8.5,-110.0,-110.0,1.5,165.0,-224.0,+900,,,
2011,2023,17,khadarel hodge,ATL,8.5,-110.0,-110.0,0.5,-210.0,152.0,+1100,,,


In [21]:
for column in ('Anytime Touchdown Line', ):
    df.loc[df[column].str.contains('EVEN', regex=False, na=False), column] = '100'
    df.loc[df[column] == '--', column] = 'nan'
    df[column] = df[column].astype(float)

df

Unnamed: 0,Year,Week,Player Name,Team,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under
0,2020,1,ceedee lamb,DAL,50.5,-110.0,-110.0,3.5,125.0,-145.0,260.0,,,
1,2020,1,sammy watkins,KC,42.5,-110.0,-110.0,3.5,108.0,-136.0,240.0,,,
2,2020,1,curtis samuel,CAR,36.5,-110.0,-110.0,3.5,125.0,-145.0,275.0,,,
3,2020,1,randall cobb,HOU,35.5,-110.0,-110.0,2.5,-172.0,134.0,350.0,,,
4,2020,1,larry fitzgerald,ARI,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2008,2023,17,david bell,CLE,13.5,-110.0,-120.0,1.5,-113.0,-113.0,600.0,,,
2009,2023,17,equanimeous st,CHI,,,,,,,,,,
2010,2023,17,julio jones,PHI,8.5,-110.0,-110.0,1.5,165.0,-224.0,900.0,,,
2011,2023,17,khadarel hodge,ATL,8.5,-110.0,-110.0,0.5,-210.0,152.0,1100.0,,,


In [22]:
def american_odds_to_probability(odds):
    """
    Converts American betting odds to implied probability.
    
    Args:
        odds (int or float): The American odds value (e.g., -150, +200).

    Returns:
        float: The implied probability as a decimal (e.g., 0.60 for 60%).
    """
    if odds > 0:
        # Positive odds: (100 / (odds + 100))
        probability = 100 / (odds + 100)
    else:
        # Negative odds: (-odds / (-odds + 100))
        probability = -odds / (-odds + 100)
    
    return probability

Adjust projections based on over/under lines

In [23]:
for stat in ('Rushing Yards', 'Receiving Yards', 'Receptions'):
    df[f'Adjusted {stat} Projection'] = (
        df[f'{stat} Projection'] +
        0.5 * df[f'{stat} Over'].apply(american_odds_to_probability) -
        0.5 * df[f'{stat} Under'].apply(american_odds_to_probability)
    )

df

Unnamed: 0,Year,Week,Player Name,Team,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection
0,2020,1,ceedee lamb,DAL,50.5,-110.0,-110.0,3.5,125.0,-145.0,260.0,,,,,50.500000,3.426304
1,2020,1,sammy watkins,KC,42.5,-110.0,-110.0,3.5,108.0,-136.0,240.0,,,,,42.500000,3.452249
2,2020,1,curtis samuel,CAR,36.5,-110.0,-110.0,3.5,125.0,-145.0,275.0,,,,,36.500000,3.426304
3,2020,1,randall cobb,HOU,35.5,-110.0,-110.0,2.5,-172.0,134.0,350.0,,,,,35.500000,2.602501
4,2020,1,larry fitzgerald,ARI,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2008,2023,17,david bell,CLE,13.5,-110.0,-120.0,1.5,-113.0,-113.0,600.0,,,,,13.489177,1.500000
2009,2023,17,equanimeous st,CHI,,,,,,,,,,,,,
2010,2023,17,julio jones,PHI,8.5,-110.0,-110.0,1.5,165.0,-224.0,900.0,,,,,8.500000,1.343000
2011,2023,17,khadarel hodge,ATL,8.5,-110.0,-110.0,0.5,-210.0,152.0,1100.0,,,,,8.500000,0.640297


In [24]:
df['Anytime Touchdown Probability'] = df['Anytime Touchdown Line'].apply(american_odds_to_probability)

df

Unnamed: 0,Year,Week,Player Name,Team,Receiving Yards Projection,Receiving Yards Over,Receiving Yards Under,Receptions Projection,Receptions Over,Receptions Under,Anytime Touchdown Line,Rushing Yards Projection,Rushing Yards Over,Rushing Yards Under,Adjusted Rushing Yards Projection,Adjusted Receiving Yards Projection,Adjusted Receptions Projection,Anytime Touchdown Probability
0,2020,1,ceedee lamb,DAL,50.5,-110.0,-110.0,3.5,125.0,-145.0,260.0,,,,,50.500000,3.426304,0.277778
1,2020,1,sammy watkins,KC,42.5,-110.0,-110.0,3.5,108.0,-136.0,240.0,,,,,42.500000,3.452249,0.294118
2,2020,1,curtis samuel,CAR,36.5,-110.0,-110.0,3.5,125.0,-145.0,275.0,,,,,36.500000,3.426304,0.266667
3,2020,1,randall cobb,HOU,35.5,-110.0,-110.0,2.5,-172.0,134.0,350.0,,,,,35.500000,2.602501,0.222222
4,2020,1,larry fitzgerald,ARI,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2008,2023,17,david bell,CLE,13.5,-110.0,-120.0,1.5,-113.0,-113.0,600.0,,,,,13.489177,1.500000,0.142857
2009,2023,17,equanimeous st,CHI,,,,,,,,,,,,,,
2010,2023,17,julio jones,PHI,8.5,-110.0,-110.0,1.5,165.0,-224.0,900.0,,,,,8.500000,1.343000,0.100000
2011,2023,17,khadarel hodge,ATL,8.5,-110.0,-110.0,0.5,-210.0,152.0,1100.0,,,,,8.500000,0.640297,0.083333


In [25]:
df.to_parquet(r'..\..\data\betting_lines\wr3_props.parquet')

print('Done')

Done
