## Scrape odds continuously from DK

In [2]:
# Libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup
import numpy as np
import datetime as dt

In [1]:
# Variables to use in the rest of notebook
path_to_team_odds = '../data/game_team_odds.csv'
path_to_total_odds = '../data/game_total_odds.csv'
dk_hockey_main_url = 'https://sportsbook.draftkings.com/leagues/hockey/nhl?category=game-lines&subcategory=game'

In [17]:
# Function to return data frames from DK containing the cleaned odds information for 1) Moneyline/Puckline and 2) O/U's
def retrieve_today_odds(url, get_game_odds = True, get_total_odds = True):
    if not (get_game_odds or get_total_odds):
        print('No odds were specified.')
        return

    # Record the current date and time
    dt_now = dt.datetime.now()

    # Record the HTML code from url
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Each sportsbook table on the page separated in a list
    sportsbook_tables = soup.find_all(class_ = 'sportsbook-table')

    # Isolate table for today's games
    # Note: If games for the day have NOT started yet, the word 'tomorrow' still implies 'today' on DK's website
    # Once games begin, the word 'today' actually correspond to 'today' on DK's website
    for ind, table in enumerate(sportsbook_tables):
        if 'tomorrow' in [tbl.text.strip().lower() for tbl in table.find_all(class_ = 'sportsbook-table-header__title')]:
            today_table = sportsbook_tables[ind]

    # Provide the list of teams playing today given the HTML code
    today_teams = [team.text.strip() for team in today_table.find_all(class_ = 'event-cell__name-text')]

    # Number of teams that play today
    n_teams = len(set(today_teams))

    # Gathers puck line and O/U lines (ex: -1.5, 6.5, +1.5, 6.5, etc...)
    today_lines = [line.text for line in today_table.find_all(class_ = 'sportsbook-outcome-cell__line')]

    # List of odds for today
    today_odds = [odds.text for odds in today_table.find_all(class_ = 'sportsbook-outcome-cell__elements')]

    # If user specifies to get moneyline and puckline odds
    if get_game_odds:
        # List of teams to use in game odds df (twice each, 1 for each row of moneyline + puckline)
        today_teams_game = np.repeat(today_teams, 2)

        # Array of bet types (alternating moneyline + puckline)
        today_bet_types_game = ['puckline', 'moneyline'] * n_teams

        # List of pucklines. Need to insert NA value in every other entry to ensure correct set up for data frame. The NA value corresponds to no line for the moneyline bet type
        # Select every other entry to avoid selecting O/U since O/U lines are included in separate table
        today_lines_game = today_lines[::2]
        today_lines_game_revised = []
        for ind, line in enumerate(today_lines_game):
            today_lines_game_revised.append(line)
            today_lines_game_revised.append(np.nan)

        # List of today's odds for moneyline and puckline
        # The % 3 != 0 removes the O/U odds since this will be included in a separate table
        today_odds_game = [odds for ind, odds in enumerate(today_odds) if (ind % 3 != 1)]

        # Create the data frame for today's odds for moneyline and puckline
        df_today_game = pd.DataFrame({
            'date_recorded':dt_now.date(),
            'time_recorded':dt_now.strftime('%H:%M:%S'),
            'date_game':dt_now.date(),
            'team':today_teams_game,
            'bet_type':today_bet_types_game,
            'line':today_lines_game_revised,
            'odds':today_odds_game
            })
        
    # If user specifies to get O/U odds
    if get_total_odds:
        # List of home teams
        today_home_teams = today_teams[1::2]
        today_home_teams = np.repeat(today_home_teams, 2)

        # List of away teams
        today_away_teams = today_teams[::2]
        today_away_teams = np.repeat(today_away_teams, 2)

        # List of O/U lines (ex: 6.5, 6.5, 5.5, 5.5, 6, 6, etc...)
        today_ou_lines = today_lines[1::2]

        # List of O/U bet types (O then U repeated)
        today_ou_bet_type = ['O', 'U'] * int(len(today_ou_lines) / 2)

        # List of today's O/U odds. Need % 3 == 1 to select the correct odds from the masterlist above
        today_ou_odds = [x.text for ind, x in enumerate(today_table.find_all(class_ = 'sportsbook-outcome-cell__elements')) if (ind % 3 == 1)]
            
        df_today_total = pd.DataFrame({
            'date_recorded':dt_now.date(),
            'time_recorded':dt_now.strftime('%H:%M:%S'),
            'date_game':dt_now.date(),
            'home':today_home_teams,
            'away':today_away_teams,
            'bet_type':today_ou_bet_type,
            'line':today_ou_lines,
            'odds':today_ou_odds
        })

    # Return the scraped odds data frames
    if (get_game_odds and get_total_odds):
        return df_today_game, df_today_total
    elif get_game_odds:
        return df_today_game
    else:
        return df_today_total


In [27]:
# Use function to collect odds into 2 data frames
df_today_team_odds, df_today_total_odds = retrieve_today_odds(url = dk_hockey_main_url, get_game_odds=True, get_total_odds=True)

In [30]:
# Sanity check
display(df_today_team_odds.head())
display(df_today_total_odds.head())

Unnamed: 0,date_recorded,time_recorded,date_game,team,bet_type,line,odds
0,2023-02-14,20:03:28,2023-02-14,CHI Blackhawks,puckline,1.5,−125
1,2023-02-14,20:03:28,2023-02-14,CHI Blackhawks,moneyline,,+390
2,2023-02-14,20:03:28,2023-02-14,MTL Canadiens,puckline,-1.5,−105
3,2023-02-14,20:03:28,2023-02-14,MTL Canadiens,moneyline,,−540
4,2023-02-14,20:03:28,2023-02-14,CAR Hurricanes,puckline,-1.5,−105


Unnamed: 0,date_recorded,time_recorded,date_game,home,away,bet_type,line,odds
0,2023-02-14,20:03:28,2023-02-14,MTL Canadiens,CHI Blackhawks,O,2.5,−265
1,2023-02-14,20:03:28,2023-02-14,MTL Canadiens,CHI Blackhawks,U,2.5,+210
2,2023-02-14,20:03:28,2023-02-14,WAS Capitals,CAR Hurricanes,O,7.5,+155
3,2023-02-14,20:03:28,2023-02-14,WAS Capitals,CAR Hurricanes,U,7.5,−190
4,2023-02-14,20:03:28,2023-02-14,NY Islanders,OTT Senators,O,3.5,+110


### Write data frames to files or update an existing ones

In [12]:
try:
    current_game_team_odds = pd.read_csv(path_to_team_odds)
    updated_game_team_odds = pd.concat([current_game_team_odds, df_today_team_odds], axis=0).reset_index(drop=True)
    #display(updated_game_team_odds)
    updated_game_team_odds.to_csv(path_to_team_odds, header=True, index=False)
except:
    raise Exception('New data was not able to be concatenated')
    #df_today.to_csv(path_to_team_odds, header=True, index=False) # THIS WILL OVERWRITE ALL OLD DATA

In [16]:
try:
    current_game_total_odds = pd.read_csv(path_to_total_odds)
    updated_game_total_odds = pd.concat([current_game_total_odds, df_today_total_odds], axis=0).reset_index(drop=True)
    #display(updated_game_total_odds)
    updated_game_total_odds.to_csv(path_to_total_odds, header=True, index=False)
except:
    raise Exception('New data was not able to be concatenated')
    #df_today_total_odds.to_csv(path_to_total_odds, header=True, index=False) # THIS WILL OVERWRITE ALL OLD DATA