## Scrape odds continuously from DK
Currently can only use this to scrape odds THE DAY OF and BEFORE the games. Each game can only be recorded ONE TIME. Otherwise, innacurate information will be recorded.

In [1]:
# Libraries
import numpy as np
import pandas as pd
import datetime as dt
from dateutil import tz
import requests
from bs4 import BeautifulSoup

In [2]:
# Variables to use in the rest of notebook
path_to_ml_odds = '../data/odds/ml_odds.csv'
path_to_pl_odds = '../data/odds/pl_odds.csv'
path_to_total_odds = '../data/odds/total_odds.csv'
dk_hockey_main_url = 'https://sportsbook.draftkings.com/leagues/hockey/nhl?category=game-lines&subcategory=game'

In [13]:
# Function to return data frames from DK containing the cleaned odds information for 1) Moneyline/Puckline and 2) O/U's
def retrieve_sportsbook_info(url):

    # Record the current date and time
    dt_now = dt.datetime.now()

    # Record the HTML code from url
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Each sportsbook table on the page separated in a list
    sportsbook_tables = soup.find_all(class_ = 'sportsbook-table')

    # Isolate table for today's games
    # Note: If games for the day have NOT started yet, the word 'tomorrow' still implies 'today' on DK's website
    # Once games begin, the word 'today' actually correspond to 'today' on DK's website
    for ind, table in enumerate(sportsbook_tables):
        if 'tomorrow' in [tbl.text.strip().lower() for tbl in table.find_all(class_ = 'sportsbook-table-header__title')]:
            today_table = sportsbook_tables[ind]

    # Provide the list of teams playing today given the HTML code
    today_teams = [team.text.strip() for team in today_table.find_all(class_ = 'event-cell__name-text')]

    # Provide list of game times today (1 time per each team)
    from_zone = tz.tzutc()
    to_zone = tz.tzlocal()
    today_times = [time.text for time in today_table.find_all(class_ = 'event-cell__start-time')]
    today_times = [dt.datetime.strptime(time, '%I:%M%p').replace(tzinfo=from_zone) for time in today_times]
    today_times_adjusted = [(time.astimezone(to_zone) + dt.timedelta(hours = 1)).time() for time in today_times] ##### [(time.astimezone(to_zone) + dt.timedelta(hours = 1)).time() for time in today_times]
    
    # Gathers puck line and O/U lines (ex: -1.5, 6.5, +1.5, 6.5, etc...)
    today_lines = [line.text for line in today_table.find_all(class_ = 'sportsbook-outcome-cell__line')]

    # List of odds for today
    today_odds = [odds.text for odds in today_table.find_all(class_ = 'sportsbook-outcome-cell__elements')]

    # Create dictionary of information regarding today
    today_info = {
        'datetime':dt_now,
        'game_times':today_times_adjusted,
        'teams':today_teams,
        'lines':today_lines,
        'odds':today_odds
    }

    # Return dictionary
    return today_info

In [4]:
def get_ml_odds(sportsbook_recording):
    # List of moneyline odds for today's games
    today_ml_odds = sportsbook_recording['odds'][2::3]

    # Create the data frame for today's odds for moneyline
    df_today_ml = pd.DataFrame({
        'date_recorded':sportsbook_recording['datetime'].date(),
        'time_recorded':sportsbook_recording['datetime'].strftime('%H:%M:%S'),
        'game_id':np.nan,
        'date_game':sportsbook_recording['datetime'].date(),
        'time_game':sportsbook_recording['game_times'],
        'team':sportsbook_recording['teams'],
        'ml_odds':today_ml_odds
        })
    
    # Return df of odds
    return df_today_ml

In [5]:
def get_pl_odds(sportsbook_recording):
    # List of pucklines for today
    today_pl_lines = sportsbook_recording['lines'][::2]

    # List of odds for today's pucklines
    today_pl_odds = sportsbook_recording['odds'][::3]

    # Create the data frame for today's odds for puckline
    df_today_pl = pd.DataFrame({
        'date_recorded':sportsbook_recording['datetime'].date(),
        'time_recorded':sportsbook_recording['datetime'].strftime('%H:%M:%S'),
        'game_id':np.nan,
        'date_game':sportsbook_recording['datetime'].date(),
        'time_game':sportsbook_recording['game_times'],
        'team':sportsbook_recording['teams'],
        'pl_line':today_pl_lines,
        'pl_odds':today_pl_odds
    })
    
    # Return df of odds
    return df_today_pl

In [6]:
def get_total_odds(sportsbook_recording):
    # List of home teams
    today_home_teams = sportsbook_recording['teams'][1::2]
    today_home_teams = np.repeat(today_home_teams, 2)

    # List of away teams
    today_away_teams = sportsbook_recording['teams'][::2]
    today_away_teams = np.repeat(today_away_teams, 2)

    # List of O/U lines (ex: 6.5, 6.5, 5.5, 5.5, 6, 6, etc...)
    today_ou_lines = sportsbook_recording['lines'][1::2]

    # List of O/U bet types (O then U repeated)
    today_ou_bet_type = ['O', 'U'] * int(len(today_ou_lines) / 2)

    # List of today's O/U odds
    today_ou_odds = sportsbook_recording['odds'][1::3]
        
    df_today_total = pd.DataFrame({
        'date_recorded':sportsbook_recording['datetime'].date(),
        'time_recorded':sportsbook_recording['datetime'].strftime('%H:%M:%S'),
        'game_id':np.nan,
        'date_game':sportsbook_recording['datetime'].date(),
        'time_game':sportsbook_recording['game_times'],
        'home':today_home_teams,
        'away':today_away_teams,
        'bet_type':today_ou_bet_type,
        'total_line':today_ou_lines,
        'total_odds':today_ou_odds
    })
    
    # Return df of odds
    return df_today_total

In [14]:
# Record a table from DK sportsbook
sportsbook_recording = retrieve_sportsbook_info(dk_hockey_main_url)

In [15]:
# Create df's to use in updating the CSV files
df_today_ml_odds = get_ml_odds(sportsbook_recording)
df_today_pl_odds = get_pl_odds(sportsbook_recording)
df_today_total_odds = get_total_odds(sportsbook_recording)

In [19]:
# Sanity check
display(df_today_ml_odds)
display(df_today_pl_odds)
display(df_today_total_odds)

Unnamed: 0,date_recorded,time_recorded,game_id,date_game,time_game,team,ml_odds
0,2023-04-10,13:51:57,,2023-04-10,20:00:00,MIN Wild,−240
1,2023-04-10,13:51:57,,2023-04-10,20:00:00,CHI Blackhawks,+200
2,2023-04-10,13:51:57,,2023-04-10,20:30:00,NSH Predators,+185
3,2023-04-10,13:51:57,,2023-04-10,20:30:00,CGY Flames,−215
4,2023-04-10,13:51:57,,2023-04-10,21:00:00,SEA Kraken,−230
5,2023-04-10,13:51:57,,2023-04-10,21:00:00,ARI Coyotes,+195
6,2023-04-10,13:51:57,,2023-04-10,21:30:00,VAN Canucks,+165
7,2023-04-10,13:51:57,,2023-04-10,21:30:00,LA Kings,−195


Unnamed: 0,date_recorded,time_recorded,game_id,date_game,time_game,team,pl_line,pl_odds
0,2023-04-10,13:51:57,,2023-04-10,20:00:00,MIN Wild,-1.5,+105
1,2023-04-10,13:51:57,,2023-04-10,20:00:00,CHI Blackhawks,1.5,−130
2,2023-04-10,13:51:57,,2023-04-10,20:30:00,NSH Predators,1.5,−155
3,2023-04-10,13:51:57,,2023-04-10,20:30:00,CGY Flames,-1.5,+125
4,2023-04-10,13:51:57,,2023-04-10,21:00:00,SEA Kraken,-1.5,+105
5,2023-04-10,13:51:57,,2023-04-10,21:00:00,ARI Coyotes,1.5,−130
6,2023-04-10,13:51:57,,2023-04-10,21:30:00,VAN Canucks,1.5,−155
7,2023-04-10,13:51:57,,2023-04-10,21:30:00,LA Kings,-1.5,+125


Unnamed: 0,date_recorded,time_recorded,game_id,date_game,time_game,home,away,bet_type,total_line,total_odds
0,2023-04-10,13:51:57,,2023-04-10,20:00:00,CHI Blackhawks,MIN Wild,O,5.5,−120
1,2023-04-10,13:51:57,,2023-04-10,20:00:00,CHI Blackhawks,MIN Wild,U,5.5,+100
2,2023-04-10,13:51:57,,2023-04-10,20:30:00,CGY Flames,NSH Predators,O,5.5,−110
3,2023-04-10,13:51:57,,2023-04-10,20:30:00,CGY Flames,NSH Predators,U,5.5,−110
4,2023-04-10,13:51:57,,2023-04-10,21:00:00,ARI Coyotes,SEA Kraken,O,6.5,−115
5,2023-04-10,13:51:57,,2023-04-10,21:00:00,ARI Coyotes,SEA Kraken,U,6.5,−105
6,2023-04-10,13:51:57,,2023-04-10,21:30:00,LA Kings,VAN Canucks,O,6.5,−105
7,2023-04-10,13:51:57,,2023-04-10,21:30:00,LA Kings,VAN Canucks,U,6.5,−115


### Write data frames to files or update an existing ones

In [20]:
try:
    current_ml_odds = pd.read_csv(path_to_ml_odds)
    updated_ml_odds = pd.concat([current_ml_odds, df_today_ml_odds], axis=0).reset_index(drop=True)
    updated_ml_odds['game_id'] = np.repeat([id for id in range(1, len(updated_ml_odds.index) // 2 + 1)], 2)
    #display(updated_ml_odds)
    updated_ml_odds.to_csv(path_to_ml_odds, header=True, index=False)
except:
    raise Exception('This is where I will add other validations when trying to update the old CSV.')

In [21]:
try:
    current_pl_odds = pd.read_csv(path_to_pl_odds)
    updated_pl_odds = pd.concat([current_pl_odds, df_today_pl_odds], axis=0).reset_index(drop=True)
    updated_pl_odds['game_id'] = np.repeat([id for id in range(1, len(updated_pl_odds.index) // 2 + 1)], 2)
    #display(updated_pl_odds)
    updated_pl_odds.to_csv(path_to_pl_odds, header=True, index=False)
except:
    raise Exception('This is where I will add other validations when trying to update the old CSV.')

In [22]:
try:
    current_total_odds = pd.read_csv(path_to_total_odds)
    updated_total_odds = pd.concat([current_total_odds, df_today_total_odds], axis=0).reset_index(drop=True)
    updated_total_odds['game_id'] = np.repeat([id for id in range(1, len(updated_total_odds.index) // 2 + 1)], 2)
    #display(updated_total_odds)
    updated_total_odds.to_csv(path_to_total_odds, header=True, index=False)
except:
    raise Exception('This is where I will add other validations when trying to update the old CSV.')