## Scrape odds continuously from DK

In [None]:
# Libraries
import numpy as np
import pandas as pd
import datetime as dt
import requests
from bs4 import BeautifulSoup

In [None]:
# Variables to use in the rest of notebook
path_to_ml_odds = '../data/odds/ml_odds.csv'
path_to_pl_odds = '../data/odds/pl_odds.csv'
path_to_total_odds = '../data/odds/total_odds.csv'
dk_hockey_main_url = 'https://sportsbook.draftkings.com/leagues/hockey/nhl?category=game-lines&subcategory=game'

In [None]:
# Function to return data frames from DK containing the cleaned odds information for 1) Moneyline/Puckline and 2) O/U's
def retrieve_today_odds(url, get_ml_odds = True, get_pl_odds = True, get_total_odds = True):
    if not (get_ml_odds or get_pl_odds or get_total_odds):
        print('No odds were specified.')
        return

    # Record the current date and time
    dt_now = dt.datetime.now()

    # Record the HTML code from url
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Each sportsbook table on the page separated in a list
    sportsbook_tables = soup.find_all(class_ = 'sportsbook-table')

    # Isolate table for today's games
    # Note: If games for the day have NOT started yet, the word 'tomorrow' still implies 'today' on DK's website
    # Once games begin, the word 'today' actually correspond to 'today' on DK's website
    for ind, table in enumerate(sportsbook_tables):
        if 'tomorrow' in [tbl.text.strip().lower() for tbl in table.find_all(class_ = 'sportsbook-table-header__title')]:
            today_table = sportsbook_tables[ind]

    # Provide the list of teams playing today given the HTML code
    today_teams = [team.text.strip() for team in today_table.find_all(class_ = 'event-cell__name-text')]

    # Number of teams that play today
    #n_teams = len(set(today_teams))

    # Gathers puck line and O/U lines (ex: -1.5, 6.5, +1.5, 6.5, etc...)
    today_lines = [line.text for line in today_table.find_all(class_ = 'sportsbook-outcome-cell__line')]

    # List of odds for today
    today_odds = [odds.text for odds in today_table.find_all(class_ = 'sportsbook-outcome-cell__elements')]

    # If user specifies to get moneyline odds
    if get_ml_odds:
        # List of moneyline odds for today's games
        today_ml_odds = today_odds[2::3]

        # Create the data frame for today's odds for moneyline
        df_today_ml = pd.DataFrame({
            'date_recorded':dt_now.date(),
            'time_recorded':dt_now.strftime('%H:%M:%S'),
            'date_game':dt_now.date(),
            'team':today_teams,
            'ml_odds':today_ml_odds
            })
        
    # If user specifies to get puckline odds
    if get_pl_odds:
        # List of pucklines for today
        today_pl_lines = today_lines[::2]

        # List of odds for today's pucklines
        today_pl_odds = today_odds[::3]

        # Create the data frame for today's odds for puckline
        df_today_pl = pd.DataFrame({
            'date_recorded':dt_now.date(),
            'time_recorded':dt_now.strftime('%H:%M:%S'),
            'date_game':dt_now.date(),
            'team':today_teams,
            'pl_line':today_pl_lines,
            'pl_odds':today_pl_odds
            })
        
    # If user specifies to get O/U odds
    if get_total_odds:
        # List of home teams
        today_home_teams = today_teams[1::2]
        today_home_teams = np.repeat(today_home_teams, 2)

        # List of away teams
        today_away_teams = today_teams[::2]
        today_away_teams = np.repeat(today_away_teams, 2)

        # List of O/U lines (ex: 6.5, 6.5, 5.5, 5.5, 6, 6, etc...)
        today_ou_lines = today_lines[1::2]

        # List of O/U bet types (O then U repeated)
        today_ou_bet_type = ['O', 'U'] * int(len(today_ou_lines) / 2)

        # List of today's O/U odds
        today_ou_odds = today_odds[1::3]
            
        df_today_total = pd.DataFrame({
            'date_recorded':dt_now.date(),
            'time_recorded':dt_now.strftime('%H:%M:%S'),
            'date_game':dt_now.date(),
            'home':today_home_teams,
            'away':today_away_teams,
            'bet_type':today_ou_bet_type,
            'total_line':today_ou_lines,
            'total_odds':today_ou_odds
        })

    # Return the scraped odds data frames
    if (get_ml_odds and get_pl_odds and get_total_odds):
        return df_today_ml, df_today_pl, df_today_total
    elif (get_ml_odds and get_pl_odds):
        return df_today_ml, df_today_pl
    elif (get_ml_odds and get_total_odds):
        return df_today_ml, df_today_total
    elif (get_pl_odds and get_total_odds):
        return df_today_pl, df_today_total
    elif get_ml_odds:
        return df_today_ml
    elif get_pl_odds:
        return df_today_pl
    else:
        return df_today_total


In [None]:
# Use function to collect odds into 2 data frames
df_today_ml_odds, df_today_pl_odds, df_today_total_odds = retrieve_today_odds(url = dk_hockey_main_url, get_ml_odds=True, get_pl_odds=True, get_total_odds=True)

### Write data frames to files or update an existing ones

In [None]:
try:
    current_ml_odds = pd.read_csv(path_to_ml_odds)
    updated_ml_odds = pd.concat([current_ml_odds, df_today_ml_odds], axis=0).reset_index(drop=True)
    #display(updated_ml_odds)
    updated_ml_odds.to_csv(path_to_ml_odds, header=True, index=False)
except:
    raise Exception('This is where I will add other validations when trying to update the old CSV.')

In [None]:
try:
    current_pl_odds = pd.read_csv(path_to_pl_odds)
    updated_pl_odds = pd.concat([current_pl_odds, df_today_pl_odds], axis=0).reset_index(drop=True)
    #display(updated_pl_odds)
    updated_pl_odds.to_csv(path_to_pl_odds, header=True, index=False)
except:
    raise Exception('This is where I will add other validations when trying to update the old CSV.')

In [None]:
try:
    current_total_odds = pd.read_csv(path_to_total_odds)
    updated_total_odds = pd.concat([current_total_odds, df_today_total_odds], axis=0).reset_index(drop=True)
    #display(updated_total_odds)
    updated_total_odds.to_csv(path_to_total_odds, header=True, index=False)
except:
    raise Exception('This is where I will add other validations when trying to update the old CSV.')